diff --git a/setup.py b/setup.py index e84bc88..4f77c4e 100644 --- a/setup.py +++ b/setup.py @@ -88,7 +88,6 @@ def read(*names, **kwargs): ], install_requires=[ "click", - "future", "MDAnalysis", "MDAnalysisTests", "numpy", diff --git a/src/fluctmatch/decomposition/__init__.py b/src/fluctmatch/decomposition/__init__.py index 8bf062f..5ec8559 100644 --- a/src/fluctmatch/decomposition/__init__.py +++ b/src/fluctmatch/decomposition/__init__.py @@ -1,52 +1,41 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# -*- coding: utf-8 -*- # -# pysca --- https://github.com/tclick/python-pysca -# Copyright (c) 2015-2017 The pySCA Development Team and contributors -# (see the file AUTHORS for the full list of names) +# python-fluctmatch - +# Copyright (c) 2019 Timothy H. Click, Ph.D. # -# Released under the New BSD license. +# All rights reserved. # -# Please cite your use of fluctmatch in published work: +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: # -# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. -# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics -# Simulation. Meth Enzymology. 578 (2016), 327-342, -# doi:10.1016/bs.mie.2016.05.024. +# Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. # -from __future__ import ( - absolute_import, - division, - print_function, - unicode_literals, -) - -from future.utils import ( - native_str, - raise_from, - with_metaclass, -) -from future.builtins import ( - ascii, - bytes, - chr, - dict, - filter, - hex, - input, - map, - next, - oct, - open, - pow, - range, - round, - str, - super, - zip, -) - -import numpy as np -import pandas as pd - +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# Neither the name of the author nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. +# Simulation. Meth Enzymology. 578 (2016), 327-342, +# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics +# doi:10.1016/bs.mie.2016.05.024. +from .eigh import Eigh +from .ica import ICA +from .ipca import IPCA +from .svd import SVD diff --git a/src/fluctmatch/decomposition/eigh.py b/src/fluctmatch/decomposition/eigh.py index fb6ecd4..d75098b 100644 --- a/src/fluctmatch/decomposition/eigh.py +++ b/src/fluctmatch/decomposition/eigh.py @@ -1,19 +1,39 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# -*- coding: utf-8 -*- # -# fluctmatch --- https://github.com/tclick/python-fluctmatch -# Copyright (c) 2015-2017 The fluctmatch Development Team and contributors -# (see the file AUTHORS for the full list of names) +# python-fluctmatch - +# Copyright (c) 2019 Timothy H. Click, Ph.D. # -# Released under the New BSD license. +# All rights reserved. # -# Please cite your use of fluctmatch in published work: +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: # -# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. -# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics -# Simulation. Meth Enzymology. 578 (2016), 327-342, -# doi:10.1016/bs.mie.2016.05.024. +# Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. # +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# Neither the name of the author nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. +# Simulation. Meth Enzymology. 578 (2016), 327-342, +# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics +# doi:10.1016/bs.mie.2016.05.024. import numpy as np from scipy.sparse import linalg diff --git a/src/fluctmatch/decomposition/ica.py b/src/fluctmatch/decomposition/ica.py index d960c1b..c9878eb 100644 --- a/src/fluctmatch/decomposition/ica.py +++ b/src/fluctmatch/decomposition/ica.py @@ -1,11 +1,39 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# -*- coding: utf-8 -*- # -# fluctmatch --- https://github.com/tclick/python-fluctmatch -# Copyright (c) 2015-2017 The fluctmatch Development Team and contributors -# (see the file AUTHORS for the full list of names) +# python-fluctmatch - +# Copyright (c) 2019 Timothy H. Click, Ph.D. # -# Released under the New BSD license. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# Neither the name of the author nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. +# Simulation. Meth Enzymology. 578 (2016), 327-342, +# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics +# doi:10.1016/bs.mie.2016.05.024. # # Please cite your use of fluctmatch in published work: # @@ -20,14 +48,14 @@ # C. Brodbeck, R. Goj, M. Jas, T. Brooks, L. Parkkonen, M. Hämäläinen, # MEG and EEG data analysis with MNE-Python, Frontiers in Neuroscience, # Volume 7, 2013, ISSN 1662-453X, + import logging from copy import deepcopy import numpy as np from scipy import linalg from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.decomposition import PCA, FastICA -from sklearn.pipeline import make_pipeline +from sklearn.decomposition import FastICA from sklearn.utils.validation import ( as_float_array, check_array, check_is_fitted, check_random_state, FLOAT_DTYPES ) @@ -403,26 +431,16 @@ class ICA(BaseEstimator, TransformerMixin): """Signal decomposition using Independent Component Analysis (ICA). This object can be used to estimate ICA components and then remove some - from Raw or Epochs for data exploration or artifact correction. + data exploration or artifact correction. Caveat! If supplying a noise covariance, keep track of the projections - available in the cov or in the raw object. For example, if you are - interested in EOG or ECG artifacts, EOG and ECG projections should be - temporally removed before fitting ICA, for example:: - - >> projs, raw.info['projs'] = raw.info['projs'], [] - >> ica.fit(raw) - >> raw.info['projs'] = projs + available in the cov or in the raw object. .. note:: Methods currently implemented are FastICA (default), Infomax, - Extended Infomax. Infomax can be quite sensitive to - differences in floating point arithmetic. Extended Infomax seems - to be more stable in this respect enhancing reproducibility and - stability of results. - - .. warning:: ICA is sensitive to low-frequency drifts and therefore - requires the data to be high-pass filtered prior to fitting. - Typically, a cutoff frequency of 1 Hz is recommended. + Extended Infomax. Infomax can be quite sensitive to differences in + floating point arithmetic. Extended Infomax seems to be more + stable in this respect enhancing reproducibility and stability of + results. Parameters ---------- diff --git a/src/fluctmatch/decomposition/ipca.py b/src/fluctmatch/decomposition/ipca.py new file mode 100644 index 0000000..3ca7fda --- /dev/null +++ b/src/fluctmatch/decomposition/ipca.py @@ -0,0 +1,146 @@ +# python-fluctmatch - +# Copyright (c) 2019 Timothy H. Click, Ph.D. +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# Neither the name of the author nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +"""Performs independent principal component analysis (see [1]. + +References +---------- +.. [1] Yao, F.; Coquery, J.; Le Cao, K.-A. 2012. Independent Principal + Component Analysis for biologically meaningful dimension reduction of + large biological data sets. BMC Bioinformatics 13 (1). +""" + +from typing import Union + +import numpy as np +from scipy import linalg, stats +from sklearn.decomposition.base import BaseEstimator, TransformerMixin +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline, Pipeline +from sklearn.utils.validation import check_array, check_is_fitted + +from .ica import ICA + + +class IPCA(BaseEstimator, TransformerMixin): + """Signal decomposition using Independent Principal Component Analysis (IPCA). + + This object can be used to estimate ICA components and then remove some + from Raw or Epochs for data exploration or artifact correction. + + .. note:: Methods currently implemented are FastICA (default), Infomax, + Extended Infomax. Infomax can be quite sensitive to differences in + floating point arithmetic. Extended Infomax seems to be more + stable in this respect enhancing reproducibility and stability of + results. + + Parameters + ---------- + n_components : int | float | None + Number of components to extract. If None no dimension reduction + is performed. + whiten : boolean, optional + If whiten is false, the data is already considered to be + whitened, and no whitening is performed. + random_state : None | int | instance of np.random.RandomState + Random state to initialize ICA estimation for reproducible results. + method : {'fastica', 'infomax', 'extended-infomax'} + The ICA method to use. Defaults to 'fastica'. For reference, see [2]_, + [3]_, and [4] . + max_iter : int + The maximum number of iterations. Defaults to 200. + random_state : int | np.random.RandomState + If random_state is an int, use random_state to seed the random number + generator. If random_state is already a np.random.RandomState instance, + use random_state as random number generator. + + Attributes + ---------- + components_ : ndarray, shape (`n_samples`, `n_components`) + If fit, the matrix to unmix observed data. + + References + ---------- + .. [2] Hyvärinen, A., 1999. Fast and robust fixed-point algorithms for + independent component analysis. IEEE transactions on Neural + Networks, 10(3), pp.626-634. + + .. [3] Bell, A.J., Sejnowski, T.J., 1995. An information-maximization + approach to blind separation and blind deconvolution. Neural + computation, 7(6), pp.1129-1159. + + .. [4] Lee, T.W., Girolami, M., Sejnowski, T.J., 1999. Independent + component analysis using an extended infomax algorithm for mixed + subgaussian and supergaussian sources. Neural computation, 11(2), + pp.417-441. + """ + def __init__(self, n_components: Union[int, float, str]=None, + whiten: bool=True, max_iter: int=1000, copy=True, + method: str= "fastica", + random_state: np.random.RandomState=None): + self.n_components: Union[int, float, str] = n_components + self.whiten: bool = whiten + self.max_iter: int = max_iter + self.copy: bool = copy + self.method: str = method + self.random_state: np.random.RandomState = random_state + + def fit(self, X: np.ndarray, y=None) -> "IPCA": + scale: StandardScaler = StandardScaler(with_std=self.whiten) + pca: PCA = PCA(n_components=self.n_components, svd_solver="full", + copy=self.copy) + pca_pipeline: Pipeline = make_pipeline(scale, pca) + self.pca_projection_: np.ndarray = pca_pipeline.fit_transform(X) + self.components_: np.ndarray = pca.components_ + self.singular_values_: np.ndarray = pca.singular_values_ + self.explained_variance_: np.ndarray = pca.explained_variance_ + self.explained_variance_ratio_: np.ndarray = pca.explained_variance_ratio_ + return self + + def transform(self, X: np.ndarray) -> np.ndarray: + check_is_fitted(self, "components_") + + X: np.ndarray = check_array(X, copy=self.copy) + X = StandardScaler().fit_transform(X) + scale: StandardScaler = StandardScaler() + ica: ICA = ICA(whiten=False, method=self.method, max_iter=self.max_iter, + random_state=self.random_state) + ica_pipeline: Pipeline = make_pipeline(scale, ica) + S: np.ndarray = ica_pipeline.fit_transform(self.components_.T) + + # Sort signals by kurtosis and reduce dimensions. + kurtosis: np.ndarray = stats.kurtosis(S) + idx: np.ndarray = np.argsort(-kurtosis) + self.kurtosis_: np.ndarray = kurtosis[idx] + S: np.ndarray = S[:, idx][:, np.where(np.abs(self.kurtosis_) >= 1.)[0]] + S /= linalg.norm(S, ord=2) + self.signal_ = S.copy() + return S diff --git a/src/fluctmatch/decomposition/svd.py b/src/fluctmatch/decomposition/svd.py index 6ed5d7d..f832676 100644 --- a/src/fluctmatch/decomposition/svd.py +++ b/src/fluctmatch/decomposition/svd.py @@ -1,29 +1,48 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# -*- coding: utf-8 -*- # -# fluctmatch --- https://github.com/tclick/python-fluctmatch -# Copyright (c) 2015-2017 The fluctmatch Development Team and contributors -# (see the file AUTHORS for the full list of names) +# python-fluctmatch - +# Copyright (c) 2019 Timothy H. Click, Ph.D. # -# Released under the New BSD license. +# All rights reserved. # -# Please cite your use of fluctmatch in published work: +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: # -# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. -# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics -# Simulation. Meth Enzymology. 578 (2016), 327-342, -# doi:10.1016/bs.mie.2016.05.024. +# Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. # +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# Neither the name of the author nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Timothy H. Click, Nixon Raj, and Jhih-Wei Chu. +# Simulation. Meth Enzymology. 578 (2016), 327-342, +# Calculation of Enzyme Fluctuograms from All-Atom Molecular Dynamics +# doi:10.1016/bs.mie.2016.05.024. + import logging -from typing import Optional, Tuple, Union import numpy as np from scipy import linalg -from sklearn.base import BaseEstimator, TransformerMixin from sklearn.decomposition.base import _BasePCA from sklearn.utils import check_array, safe_sqr from sklearn.utils.validation import check_is_fitted -from sklearn.utils.extmath import fast_logdet, svd_flip +from sklearn.utils.extmath import svd_flip logger = logging.getLogger(__name__) diff --git a/src/fluctmatch/fluctsca/finddims.py b/src/fluctmatch/fluctsca/finddims.py index 81015de..5586cb4 100644 --- a/src/fluctmatch/fluctsca/finddims.py +++ b/src/fluctmatch/fluctsca/finddims.py @@ -17,9 +17,10 @@ import numpy as np from numpy.random import RandomState from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.preprocessing import scale, StandardScaler +from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline, Pipeline -from sklearn.utils.validation import check_array, check_is_fitted, FLOAT_DTYPES +from sklearn.utils.validation import (check_array, FLOAT_DTYPES, + check_random_state, check_is_fitted) from ..decomposition.svd import SVD @@ -30,48 +31,47 @@ class FindDims(BaseEstimator, TransformerMixin): """ def __init__(self, whiten: bool=True, max_iter: int=100, stddev: int=2, random_state: RandomState=None, - tol: float=0.99, algorithm="auto"): + tol: float=0.99): self.whiten: bool=whiten self.max_iter: int= max_iter self.stddev: int= stddev self.random_state: RandomState= random_state self.tol: float= tol - self.algorithm: str= algorithm def fit(self, X: np.ndarray) -> "FindDims": X: np.ndarray = check_array(X, copy=True, dtype=FLOAT_DTYPES) + random_state = check_random_state(self.random_state) + Xt = X.T n_samples, n_features = X.shape scaler: StandardScaler = StandardScaler() - svd: SVD = SVD(random_state=self.random_state, - iterated_power=self.max_iter, - algorithm=self.algorithm) + svd: SVD = SVD() pipeline: Pipeline = ( make_pipeline(scaler, svd) if self.whiten else make_pipeline(svd) ) - scaler.fit(X) - self.mean_: np.ndarray = np.tile(scaler.mean_[None, :], (n_samples, 1)) - self.std_: np.ndarray = np.tile(scaler.var_[None, :], (n_samples, 1)) + scaler.fit(Xt) + self.mean_: np.ndarray = np.tile(scaler.mean_[None, :], (n_features, 1)).T + self.std_: np.ndarray = np.tile(scaler.var_[None, :], (n_features, 1)).T self.positive_: bool = np.all(X >= 0.) - self.random_: np.ndarray = np.empty((self.max_iter, n_features), + self.random_: np.ndarray = np.empty((self.max_iter, np.min(X.shape)), dtype=X.dtype) for _ in range(self.max_iter): - Y: np.ndarray = np.random.normal(self.mean_, self.std_) + Y: np.ndarray = random_state.normal(self.mean_, self.std_) if self.positive_: Y[Y < 0.] = 0. pipeline.fit(Y) - self.random_[_, :] = svd.explained_variance_.copy() + self.random_[_, :] = svd.singular_values_.copy() return self def transform(self, X: np.ndarray) -> int: + check_is_fitted(self, ["random_"]) + scaler: StandardScaler = StandardScaler() - svd: SVD = SVD(random_state=self.random_state, - iterated_power=self.max_iter, - algorithm=self.algorithm) + svd: SVD = SVD() pipeline: Pipeline = ( make_pipeline(scaler, svd) if self.whiten @@ -79,16 +79,14 @@ def transform(self, X: np.ndarray) -> int: ) pipeline.fit(X) - self.eigenvector_: np.ndarray = svd.explained_variance_.copy() if self.whiten: - eigenvector_: np.ndarray = svd.explained_variance_ + self.eigenvector_ = eigenvector_ = svd.singular_values_ mean: np.ndarray = self.random_.mean(axis=1)[1] std: np.ndarray = self.random_.std(axis=1)[1] value: float = mean + ((self.stddev + 1) * std) n_components: int = eigenvector_[eigenvector_ > value].size else: - explained_ratio: np.ndarray = svd.explained_variance_ratio_.cumsum() + self.eigenvector_ = explained_ratio = svd.explained_variance_ratio_.cumsum() n_components: int = explained_ratio[explained_ratio <= self.tol].size - self.n_components: int = n_components return n_components