Skip to content

Commit

Permalink
Added decomposition classes for SVD and Hermitian ED.
Browse files Browse the repository at this point in the history
Moved ICA to decomposition module.
Updated code related to ICA.

Signed-off-by: Timothy Click <tcthepoet@yahoo.com>
  • Loading branch information
Timothy Click authored and Timothy Click committed Jan 8, 2019
1 parent a2ad640 commit 95be2b1
Show file tree
Hide file tree
Showing 6 changed files with 619 additions and 32 deletions.
2 changes: 1 addition & 1 deletion src/fluctmatch/commands/cmd_sca.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from sklearn.decomposition import PCA
from ..analysis.paramtable import ParamTable
from ..analysis import fluctsca
from fluctmatch.fluctsca import ica
from fluctmatch.decomposition import ica


@click.command(
Expand Down
52 changes: 52 additions & 0 deletions src/fluctmatch/decomposition/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# pysca --- https://github.com/tclick/python-pysca
# Copyright (c) 2015-2017 The pySCA Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the New BSD license.
#
# Please cite your use of fluctmatch in published work:
#
# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu.
# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics
# Simulation. Meth Enzymology. 578 (2016), 327-342,
# doi:10.1016/bs.mie.2016.05.024.
#
from __future__ import (
absolute_import,
division,
print_function,
unicode_literals,
)

from future.utils import (
native_str,
raise_from,
with_metaclass,
)
from future.builtins import (
ascii,
bytes,
chr,
dict,
filter,
hex,
input,
map,
next,
oct,
open,
pow,
range,
round,
str,
super,
zip,
)

import numpy as np
import pandas as pd


148 changes: 148 additions & 0 deletions src/fluctmatch/decomposition/eigh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# fluctmatch --- https://github.com/tclick/python-fluctmatch
# Copyright (c) 2015-2017 The fluctmatch Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the New BSD license.
#
# Please cite your use of fluctmatch in published work:
#
# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu.
# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics
# Simulation. Meth Enzymology. 578 (2016), 327-342,
# doi:10.1016/bs.mie.2016.05.024.
#

import numpy as np
from scipy.sparse import issparse, linalg

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_random_state
from sklearn.utils import check_array
from sklearn.utils.extmath import randomized_svd, svd_flip

class Eigh(BaseEstimator, TransformerMixin):
"""Hermitian eigenvalue decomposition.
This transformer performs eigenvalue decomposition (ED) of
Hermitian square matrices. The eigenvalues and vectors are
sorted in descending order of the eigenvalues.
Parameters
----------
copy : bool (default True)
If False, data passed to fit are overwritten and running
fit(X).transform(X) will not yield the expected results,
use fit_transform(X) instead.
Attributes
----------
components_ : array, shape (n_components, n_components)
The transpose of the returned matrix.
explained_variance_ : array, shape (n_components,)
Same as the eigenvector.
explained_variance_ratio_ : array, shape (n_components,)
Percentage of variance explained by each of the selected components.
eigenvalues_ : array, shape (n_components,)
The eigenvalues corresponding to each of the selected components.
Examples
--------
>>> import numpy as np
>>> from fluctmatch.decomposition.svd import SVD
>>> X = np.arange(4, dtype=np.float).reshape((2,2))
>>> eigh = Eigh()
>>> eigh.fit(X)
Eigh(copy=True)
>>> print(eigh.explained_variance_ratio_) # doctest: +ELLIPSIS
[-1... 4...]
>>> print(eigh.singular_values_) # doctest: +ELLIPSIS
[-1... 4...]
Notes
-----
SVD suffers from a problem called "sign indeterminacy", which means the
sign of the ``components_`` and the output from transform depend on the
algorithm and random state. To work around this, fit instances of this
class to data once, then keep the instance around to do transformations.
"""
def __init__(self, copy=True):
self.copy = copy

def fit(self, X: np.ndarray, y=None) -> object:
"""Fit the model with X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : Ignored
Returns
-------
self : object
Returns the instance itself.
"""
self._fit(X)
return self

def transform(self, X: np.ndarray, y=None) -> np.ndarray:
"""Fit the model with X and apply the dimensionality reduction on X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : Ignored
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
V = self._fit(X)
return V

def _fit(self, X: np.ndarray):
"""Dispatch to the right submethod depending on the chosen solver."""

# Raise an error for sparse input.
# This is more informative than the generic one raised by check_array.
X: np.ndarray = check_array(X, dtype=[np.float64, np.float32],
ensure_2d=True, copy=self.copy)
L, V = linalg.eigsh(X, k=X.shape[0])
idx: np.ndarray = np.argsort(L)[::-1]
self.eigenvalues_: np.ndarray = L[idx].copy()
self.explained_variance_: np.ndarray = L[idx].copy()
total_eigv: float = L.sum()
self.explained_variance_ratio_: np.ndarray = L / total_eigv
V: np.ndarray = V[:, idx]
self.components_ = V.T
return V

def inverse_transform(self, X: np.ndarray) -> np.ndarray:
"""Transform X back to its original space.
Returns an array X_original whose transform would be X.
Parameters
----------
X : array-like, shape (n_samples, n_components)
New data.
Returns
-------
X_original : array, shape (n_samples, n_features)
Note that this is always a dense array.
"""
X: np.ndarray = check_array(X)
return np.dot(X * self.eigenvalues_, self.components_)
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,14 @@
as_float_array, check_array, check_is_fitted, check_random_state, FLOAT_DTYPES
)

from fluctmatch.lib.center import Center2D

logger: logging.Logger = logging.getLogger(__name__)
logging.captureWarnings(True)


def _infomax(X, l_rate: float=None, weights: np.ndarray=None, block: float=None,
w_change: float=1e-12, anneal_deg: float=60., anneal_step: float=0.9,
extended: bool=True, n_subgauss: int=1, kurt_size: int=6000,
ext_blocks: int=1, max_iter:int =200,
ext_blocks: int=1, max_iter:int =200, whiten=True,
random_state: np.random.RandomState=None, blowup: float=1e4,
blowup_fac: float=0.5, n_small_angle: int=20, use_bias: bool=True,
verbose: bool=None) -> np.ndarray:
Expand Down Expand Up @@ -149,7 +147,27 @@ def _infomax(X, l_rate: float=None, weights: np.ndarray=None, block: float=None,

# check data shape
n_samples, n_features = X.shape
n_features_square = n_features ** 2
n_features_square = np.square(n_features)

if whiten:
# Centering the columns (ie the variables)
X_mean = X.mean(axis=-1)
X -= X_mean[:, np.newaxis]

# Whitening and preprocessing by PCA
u, d, _ = linalg.svd(X, full_matrices=False)

del _
K = (u / d).T[:n_components] # see (6.33) p.140
del u, d
X1 = np.dot(K, X)
# see (13.6) p.267 Here X1 is white and data
# in X has been projected onto a subspace by PCA
X1 *= np.sqrt(n_features)
else:
# X must be casted to floats to avoid typing issues with numpy
# 2.0 and the line below
X1 = as_float_array(X, copy=False) # copy has been taken care of

# check input parameters
# heuristic default - may need adjustment for large or tiny data sets
Expand Down Expand Up @@ -204,7 +222,7 @@ def _infomax(X, l_rate: float=None, weights: np.ndarray=None, block: float=None,
# ICA training block
# loop across block samples
for t in range(0, lastt, block):
u = np.dot(X[permute[t:t + block], :], weights)
u = np.dot(X1[permute[t:t + block], :], weights)
u += np.dot(bias, onesrow).T

if extended:
Expand Down Expand Up @@ -581,33 +599,19 @@ def fit(self, data: np.ndarray):
self.n_samples, n_features = data.shape
random_state = check_random_state(self.random_state)

pca = PCA(n_components=self.n_components, whiten=True,
copy=True, svd_solver='full')

# take care of ICA
if self.method == 'fastica':
ica = FastICA(whiten=False, random_state=random_state,
ica = FastICA(whiten=self.whiten, random_state=random_state,
**self.fit_params)
pipe_data = [pca, ica] if self.whiten else [ica,]
pipeline = make_pipeline(*pipe_data)
pipeline.fit(data)
ica.fit(data)
self.components_ = ica.components_
self.mixing_ = ica.mixing_
elif self.method in ('infomax', 'extended-infomax'):
if self.whiten:
data = pca.fit_transform(data)
self.components_ = _infomax(data, random_state=random_state,
whiten=self.whiten,
**self.fit_params)[:self.n_components]
if self.whiten:
self.components_ /= np.sqrt(pca.explained_variance_)[None, :] # whitening
self.mixing_ = linalg.pinv(self.components_)

# the things to store for PCA
if self.whiten:
self.mean_ = pca.mean_
self.pca_components_ = pca.components_
self.pca_explained_variance_ = pca.explained_variance_

return self

def transform(self, data: np.ndarray, copy: bool=True) -> np.ndarray:
Expand All @@ -616,11 +620,6 @@ def transform(self, data: np.ndarray, copy: bool=True) -> np.ndarray:

data = check_array(data, copy=copy, dtype=FLOAT_DTYPES)

# Apply first PCA
if self.whiten:
data -= self.mean_
data = np.dot(data, self.pca_components_.T)

# Apply unmixing to low dimension PCA
sources = np.dot(data, self.components_.T)
return sources
Expand All @@ -630,9 +629,6 @@ def inverse_transform(self, data: np.ndarray, copy: bool=True) -> np.ndarray:

data: np.ndarray = check_array(data, copy=copy, dtype=FLOAT_DTYPES)
data = np.dot(data, self.mixing_.T)
if self.whiten:
data = np.dot(data, self.pca_components_)
data += self.mean_
return data

def copy(self) -> object:
Expand Down
Loading

0 comments on commit 95be2b1

Please sign in to comment.