From a52b99cef1e0243251e5d48b960a50a2909beff7 Mon Sep 17 00:00:00 2001 From: SIKAI ZHANG <34108862+MatthewSZhang@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:35:12 +0800 Subject: [PATCH] FEAT add ssc function --- doc/index.rst | 3 +- fastcan/__init__.py | 2 + fastcan/_ssc.py | 47 +++++++++++++++++++ .../{test_correlation.py => test_fastcan.py} | 1 - tests/test_ssc.py | 32 +++++++++++++ 5 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 fastcan/_ssc.py rename tests/{test_correlation.py => test_fastcan.py} (99%) create mode 100644 tests/test_ssc.py diff --git a/doc/index.rst b/doc/index.rst index 5af4147..c7163c7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,12 +12,13 @@ .. currentmodule:: fastcan -FastCan Class +API Reference ~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ FastCan + ssc ................... diff --git a/fastcan/__init__.py b/fastcan/__init__.py index 4a8a395..cf177b5 100644 --- a/fastcan/__init__.py +++ b/fastcan/__init__.py @@ -3,7 +3,9 @@ """ from ._fastcan import FastCan +from ._ssc import ssc __all__ = [ "FastCan", + "ssc", ] diff --git a/fastcan/_ssc.py b/fastcan/_ssc.py new file mode 100644 index 0000000..d365074 --- /dev/null +++ b/fastcan/_ssc.py @@ -0,0 +1,47 @@ +"""Sum squared of correlation.""" + +import numpy as np +from sklearn.cross_decomposition import CCA +from sklearn.utils import check_X_y +from sklearn.utils._param_validation import validate_params + + +@validate_params( + { + "X": ["array-like"], + "y": ["array-like"], + }, + prefer_skip_nested_validation=True, +) +def ssc(X, y): + """Sum of the squared canonical correlation coefficients. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Feature matrix. + + y : array-like of shape (n_samples, n_outputs) + Target matrix. + + Returns + ------- + ssc : float + Sum of the squared canonical correlation coefficients. + + Examples + -------- + >>> from fastcan import ssc + >>> X = [[1], [-1], [0]] + >>> y = [[0], [1], [-1]] + >>> ssc(X, y) + np.float64(0.25) + """ + X, y = check_X_y( + X, y, dtype=float, ensure_2d=True, multi_output=True, ensure_min_samples=2 + ) + n_components = min(X.shape[1], y.shape[1]) + cca = CCA(n_components=n_components) + X_c, y_c = cca.fit_transform(X, y) + corrcoef = np.diagonal(np.corrcoef(X_c, y_c, rowvar=False), offset=n_components) + return sum(corrcoef**2) diff --git a/tests/test_correlation.py b/tests/test_fastcan.py similarity index 99% rename from tests/test_correlation.py rename to tests/test_fastcan.py index c6efe44..0be407d 100644 --- a/tests/test_correlation.py +++ b/tests/test_fastcan.py @@ -1,4 +1,3 @@ -# pylint: skip-file """Test FastCan""" import numpy as np diff --git a/tests/test_ssc.py b/tests/test_ssc.py new file mode 100644 index 0000000..7e6160e --- /dev/null +++ b/tests/test_ssc.py @@ -0,0 +1,32 @@ +"Test ssc" + +import numpy as np +from numpy.testing import assert_almost_equal +from sklearn.linear_model import LinearRegression + +from fastcan import ssc + + +def test_pearson_r(): + """Test Pearson's correlation.""" + rng = np.random.default_rng(12345) + X = rng.random(100) + y = rng.random(100) + r2 = ssc(X.reshape(-1, 1), y.reshape(-1, 1)) + gtruth_r2 = np.corrcoef(X, y)[0, 1]**2 + assert_almost_equal(actual=r2, desired=gtruth_r2) + +def test_multi_r(): + """Test multiple correlation.""" + rng = np.random.default_rng(12345) + X = rng.random((100, 10)) + y = rng.random(100) + r2 = ssc(X, y.reshape(-1, 1)) + gtruth_r2 = LinearRegression().fit(X, y).score(X, y) + assert_almost_equal(actual=r2, desired=gtruth_r2) + + X = rng.random(100) + y = rng.random((100, 10)) + r2 = ssc(X.reshape(-1, 1), y) + gtruth_r2 = LinearRegression().fit(y, X).score(y, X) + assert_almost_equal(actual=r2, desired=gtruth_r2)