In [197]:
import numpy as np

def partial_corr(X, Y):
    n = X.shape[0]
    cX = X - np.sum(X, axis=0, keepdims=True) / n
    cY = Y - np.sum(Y, axis=0, keepdims=True) / n
    cov_XY = 1. / (n - 1) * np.dot(cX.T, cY)
    var_X = 1. / (n - 1) * np.sum(cX**2, axis=0)
    var_Y = 1. / (n - 1) * np.sum(cY**2, axis=0)
    return cov_XY / np.sqrt(var_X[:, None] * var_Y[None, :])

def mean_center_scale(x):
    x = x - x.mean(0)
    return x / x.std(0)

class SCCA:
    """Implementation of sparce canonical correlation
    Reference: Parkhomenko et al.: Sparse Canonical Correlation Analysis
    This is a work in progress, not expected to work yet.
    """
    def __init__(self, X, Y, c1=0.00005, c2=0.00005, lu=0.00025, lv=0.000005):
        self.X = mean_center_scale(X)
        self.Y = mean_center_scale(Y)
        self.p = np.shape(X)[1]
        self.q = np.shape(Y)[1]
        self.c1 = c1
        self.c2 = c2
        self.lu = lu
        self.lv = lv

    def _K(self):
        xx = partial_corr(self.X, self.X)
        yy = partial_corr(self.Y, self.Y)
        xy = partial_corr(self.X, self.Y)
        xxi = np.linalg.inv(xx)
        yyi = np.linalg.inv(yy)
        return np.dot(xxi, xy).dot(yyi)

    def _norm(self, w):
        lengthw = np.linalg.norm(w)
        return w/lengthw

    def _soft_thresh(self, w, t):
        if t == 'u':
            w = (np.abs(w) - .5*self.lu) + np.sign(w)
        elif t == 'v':
            w = (np.abs(w) - .5*self.lv) + np.sign(w)
        return w

    def fit(self):
        u = np.random.sample(self.p)
        v = np.random.sample(self.q)
        K = self._K()

        i = 0
        
        while not (np.linalg.norm(u, ord=1) <= self.c1) and \
              not (np.linalg.norm(v, ord=1) <= self.c2):
                
            u = np.dot(K, v)
            u = self._norm(u)
            u = self._soft_thresh(u, 'u')
            u = self._norm(u)
            v = np.dot(K.T, u)
            v = self._norm(v)
            v = self._soft_thresh(v, 'v')
            v = self._norm(v)
            i += 1
            
            if i > 100000:
                break

        return {'K':K, 'u':u, 'v':v, 'i':i}

In [198]:
np.random.seed(1)
X = np.random.random((50,10))
Y = np.random.random((50,200))

In [203]:
from sklearn.datasets import make_multilabel_classification

In [204]:
X, Y = make_multilabel_classification(n_classes=2, n_labels=1,
                                      allow_unlabeled=True,
                                      random_state=1)

In [205]:
test = SCCA(X=X, Y=Y)

In [206]:
res = test.fit()

In [207]:
res

{'K': array([[-0.09256498, -0.01504999],
        [ 0.13691358, -0.33751055],
        [-0.08221492, -0.07064599],
        [ 0.00065494, -0.03195981],
        [ 0.0642199 ,  0.10329273],
        [-0.09500602,  0.18799675],
        [-0.24397874,  0.22625151],
        [ 0.0152739 , -0.10489335],
        [-0.02068866, -0.11487397],
        [ 0.06909904,  0.08751363],
        [-0.14909766,  0.02816326],
        [ 0.11456296,  0.13568287],
        [-0.0578814 , -0.28829252],
        [-0.09993971,  0.12685537],
        [-0.02455644, -0.20476883],
        [ 0.19678833, -0.05459793],
        [ 0.17967123, -0.0506575 ],
        [ 0.09393062,  0.02650467],
        [-0.12452442,  0.12273439],
        [ 0.18729089, -0.23080287]]),
 'i': 100001,
 'u': array([-0.20123622, -0.1336051 , -0.18361469, -0.21425528,  0.27517599,
         0.2727518 ,  0.25593289, -0.19135095, -0.18047042,  0.27057087,
        -0.20503924,  0.2969449 , -0.11132771,  0.2500434 , -0.14777366,
         0.24618376,  0.24408972,  

array([[ 5.,  3.,  2., ...,  2.,  2.,  3.],
       [ 4.,  2.,  3., ...,  5.,  2.,  1.],
       [ 0.,  0.,  3., ...,  2.,  4.,  3.],
       ..., 
       [ 1.,  2.,  2., ...,  6.,  2.,  4.],
       [ 0.,  0.,  1., ...,  2.,  2.,  2.],
       [ 2.,  2.,  4., ...,  1.,  2.,  2.]])