Skip to content

Commit

Permalink
Merge pull request #188 from scottclowe/rf_neuropil
Browse files Browse the repository at this point in the history
RF: Refactor neuropil.separate
  • Loading branch information
scottclowe committed Jun 26, 2021
2 parents 7a4dcf5 + 78385ee commit 3a70ce0
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 165 deletions.
264 changes: 139 additions & 125 deletions fissa/neuropil.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,31 @@

import numpy as np
import numpy.random as rand
from sklearn.decomposition import FastICA, NMF, PCA
import sklearn.decomposition


def separate(
S, sep_method='nmf', n=None, maxiter=10000, tol=1e-4,
random_state=892, maxtries=10, W0=None, H0=None, alpha=0.1):
"""For the signals in S, finds the independent signals underlying it,
using ica or nmf.
S,
sep_method="nmf",
n=None,
maxiter=10000,
tol=1e-4,
random_state=892,
maxtries=10,
W0=None,
H0=None,
alpha=0.1,
):
"""
Find independent signals, sorted by matching score against the first input signal.
Parameters
----------
S : :term:`array_like`
S : :term:`array_like` shaped (signals, observations)
2-d array containing mixed input signals.
Each column of `S` should be a different signal, and each row an
observation of the signals. For ``S[i,j]``, `j` = each signal,
``i`` = signal content.
observation of the signals. For ``S[i, j]``, ``j`` is a signal, and
``i`` is an observation.
The first column, ``j = 0``, is considered the primary signal and the
one for which we will try to extract a decontaminated equivalent.
Expand All @@ -36,13 +45,14 @@ def separate(
- ``"nmf"``: Non-negative Matrix Factorization
n : int, optional
How many components to estimate. If ``None`` (default), use PCA to
estimate how many components would explain at least 99% of the
variance and adopt this value for ``n``.
How many components to estimate. If ``None`` (default), for the NMF
method, ``n`` is the number of input signals; for the ICA method,
we use PCA to estimate how many components would explain at least 99%
of the variance and adopt this value for ``n``.
maxiter : int, optional
Number of maximally allowed iterations. Default is ``500``.
Number of maximally allowed iterations. Default is ``10000``.
tol : float, optional
Error tolerance for termination. Default is ``1e-5``.
Error tolerance for termination. Default is ``1e-4``.
random_state : int or None, optional
Initial state for the random number generator. Set to ``None`` to use
the numpy.random default. Default seed is ``892``.
Expand All @@ -62,28 +72,34 @@ def separate(
Returns
-------
S_sep : numpy.ndarray
S_sep : :class:`numpy.ndarray` shaped (signals, observations)
The raw separated traces.
S_matched : numpy.ndarray
S_matched : :class:`numpy.ndarray` shaped (signals, observations)
The separated traces matched to the primary signal, in order
of matching quality (see Methods below).
A_sep : numpy.ndarray
of matching quality (see Notes below).
A_sep : :class:`numpy.ndarray` shaped (signals, signals)
Mixing matrix.
convergence : dict
Metadata for the convergence result, with keys:
- ``"random_state"``: seed for ICA initiation
- ``"iterations"``: number of iterations needed for convergence
- ``"max_iterations"``: maximum number of iterations allowed
- ``"converged"``: whether the algorithm converged or not (`bool`)
Metadata for the convergence result, with the following keys and
values:
``convergence["random_state"]``
Seed for estimator initiation.
``convergence["iterations"]``
Number of iterations needed for convergence.
``convergence["max_iterations"]``
Maximum number of iterations allowed.
``convergence["converged"]``
Whether the algorithm converged or not (:class:`bool`).
Notes
-----
Concept by Scott Lowe and Sander Keemink.
Normalize the columns in estimated mixing matrix A so that
``sum(column) = 1``.
This results in a relative score of how strongly each separated signal
is represented in each ROI signal.
To identify which independent signal matches the primary signal best,
we first normalize the columns in the output mixing matrix `A` such that
``sum(A[:, separated]) = 1``. This results in a relative score of how
strongly each raw signal contributes to each separated signal. From this,
we find the separated signal to which the ROI trace makes the largest
(relative) contribution.
See Also
--------
Expand All @@ -102,112 +118,112 @@ def separate(

# estimate number of signals to find, if not given
if n is None:
if sep_method == 'ica':
if sep_method.lower() == "ica":
# Perform PCA
pca = PCA(whiten=False)
pca = sklearn.decomposition.PCA(whiten=False)
pca.fit(S.T)

# find number of components with at least x percent explained var
n = sum(pca.explained_variance_ratio_ > 0.01)
else:
n = S.shape[0]

if sep_method == 'ica':
# Use sklearn's implementation of ICA.
for i_try in range(maxtries):

if sep_method.lower() in {"ica", "fastica"}:
# Use sklearn's implementation of ICA.

for ith_try in range(maxtries):
# Make an instance of the FastICA class. We can do whitening of
# the data now.
ica = FastICA(n_components=n, whiten=True, max_iter=maxiter,
tol=tol, random_state=random_state)
estimator = sklearn.decomposition.FastICA(
n_components=n,
whiten=True,
max_iter=maxiter,
tol=tol,
random_state=random_state,
)

# Perform ICA and find separated signals
S_sep = ica.fit_transform(S.T)

# check if max number of iterations was reached
if ica.n_iter_ < maxiter:
print((
'ICA converged after {} iterations.'
).format(ica.n_iter_))
break
print((
'Attempt {} failed to converge at {} iterations.'
).format(ith_try + 1, ica.n_iter_))
if ith_try + 1 < maxtries:
print('Trying a new random state.')
# Change to a new random_state
if random_state is not None:
random_state = (random_state + 1) % 2**32

if ica.n_iter_ == maxiter:
print((
'Warning: maximum number of allowed tries reached at {} '
'iterations for {} tries of different random seed states.'
).format(ica.n_iter_, ith_try + 1))

A_sep = ica.mixing_

elif sep_method == 'nmf':
for ith_try in range(maxtries):
# nSignals = nRegions +1
# ICA = FastICA(n_components=nSignals)
# ica = ICA.fit_transform(mixed.T) # Reconstruct signals
# A_ica = ICA.mixing_ # Get estimated mixing matrix
#
#
S_sep = estimator.fit_transform(S.T)

elif sep_method.lower() in {"nmf", "nnmf"}:

# Make an instance of the sklearn NMF class
if W0 is None and H0 is None:
nmf = NMF(
init='nndsvdar', n_components=n,
alpha=alpha, l1_ratio=0.5,
tol=tol, max_iter=maxiter, random_state=random_state)

# Perform NMF and find separated signals
S_sep = nmf.fit_transform(S.T)

else:
nmf = NMF(
init='custom', n_components=n,
alpha=alpha, l1_ratio=0.5,
tol=tol, max_iter=maxiter, random_state=random_state)

# Perform NMF and find separated signals
S_sep = nmf.fit_transform(S.T, W=W0, H=H0)

# check if max number of iterations was reached
if nmf.n_iter_ < maxiter - 1:
print((
'NMF converged after {} iterations.'
).format(nmf.n_iter_ + 1))
break
print((
'Attempt {} failed to converge at {} iterations.'
).format(ith_try, nmf.n_iter_ + 1))
if ith_try + 1 < maxtries:
print('Trying a new random state.')
# Change to a new random_state
if random_state is not None:
random_state = (random_state + 1) % 2**32

if nmf.n_iter_ == maxiter - 1:
print((
'Warning: maximum number of allowed tries reached at {} '
'iterations for {} tries of different random seed states.'
).format(nmf.n_iter_ + 1, ith_try + 1))

A_sep = nmf.components_.T
estimator = sklearn.decomposition.NMF(
init="nndsvdar" if W0 is None and H0 is None else "custom",
n_components=n,
alpha=alpha,
l1_ratio=0.5,
tol=tol,
max_iter=maxiter,
random_state=random_state,
)

# Perform NMF and find separated signals
S_sep = estimator.fit_transform(S.T, W=W0, H=H0)

elif hasattr(sklearn.decomposition, sep_method):

print(
"Using ad hoc signal decomposition method"
" sklearn.decomposition.{}. Only NMF and ICA are officially"
" supported.".format(sep_method)
)

# Load up arbitrary decomposition algorithm from sklearn
estimator = getattr(sklearn.decomposition, sep_method)(
n_components=n,
tol=tol,
max_iter=maxiter,
random_state=random_state,
)
S_sep = estimator.fit_transform(S.T)

else:
raise ValueError('Unknown separation method "{}".'.format(sep_method))

# check if max number of iterations was reached
if estimator.n_iter_ < maxiter:
print(
"{} converged after {} iterations.".format(
repr(estimator).split("(")[0], estimator.n_iter_
)
)
break

print(
"Attempt {} failed to converge at {} iterations.".format(
i_try + 1, estimator.n_iter_
)
)
if i_try + 1 < maxtries:
print("Trying a new random state.")
# Change to a new random_state
if random_state is not None:
random_state = (random_state + 1) % 2 ** 32

if estimator.n_iter_ == maxiter:
print(
"Warning: maximum number of allowed tries reached at {} iterations"
" for {} tries of different random seed states.".format(
estimator.n_iter_, i_try + 1
)
)

if hasattr(estimator, "mixing_"):
A_sep = estimator.mixing_
else:
raise ValueError('Unknown separation method "{}".'.format(sep_method))

# make empty matched structure
S_matched = np.zeros(np.shape(S_sep))
A_sep = estimator.components_.T

# Normalize the columns in A so that sum(column)=1 (can be done in one line
# too).
# This results in a relative score of how strongly each separated signal
# is represented in each ROI signal.
#
# Our mixing matrix is shaped (input/raw, output/separated). For each
# separated (output) signal, we find how much weighting each input (raw)
# signal contributes to that separated signal, relative to the other input
# signals.
A = abs(np.copy(A_sep))
for j in range(n):
if np.sum(A[:, j]) != 0:
Expand All @@ -216,25 +232,23 @@ def separate(
# get the scores for the somatic signal
scores = A[0, :]

# get the order of scores
# Rank the separated signals in descending ordering of their score.
# The separated signal to which the somatic signal makes up the largest
# contribution is sorted first.
order = np.argsort(scores)[::-1]

# order the signals according to their scores
# Order the signals according to their scores, and scale the magnitude
# back to the original magnitude.
S_matched = np.zeros_like(S_sep)
for j in range(n):
s_ = A_sep[0, order[j]] * S_sep[:, order[j]]
S_matched[:, j] = s_
S_matched[:, j] = A_sep[0, order[j]] * S_sep[:, order[j]]

# save the algorithm convergence info
convergence = {}
convergence['max_iterations'] = maxiter
if sep_method == 'ica':
convergence['random_state'] = random_state
convergence['iterations'] = ica.n_iter_
convergence['converged'] = not ica.n_iter_ == maxiter
elif sep_method == 'nmf':
convergence['random_state'] = random_state
convergence['iterations'] = nmf.n_iter_
convergence['converged'] = not nmf.n_iter_ == maxiter
convergence["max_iterations"] = maxiter
convergence["random_state"] = random_state
convergence["iterations"] = estimator.n_iter_
convergence["converged"] = estimator.n_iter_ != maxiter

# scale back to raw magnitudes
S_matched *= median
Expand Down

0 comments on commit 3a70ce0

Please sign in to comment.