## Why regularized CCA is needed?
Regularized CCA is needed inplace of CCA when there are 2 views of the data and the number of samples are finite as compared to the number of features in either of the views.

In [12]:
import numpy as np

In [13]:
samples = 6

latvar1 = np.random.randn(samples,)
latvar2 = np.random.randn(samples,)

indep1 = np.random.randn(samples, 7)
indep2 = np.random.randn(samples, 8)

# Create two datasets, with each dimension composed as a sum of 75% one of the latent variables and 25% independent component
a = np.array((latvar1,latvar2,latvar1,latvar2,latvar1,latvar2,latvar1,latvar2,latvar1,latvar2,latvar1,latvar2,latvar1,latvar2))
data1 = 0.25*indep1 + 0.75*np.vstack(a[:7]).T
data2 = 0.25*indep2 + 0.75*np.vstack(a[:8]).T

train1 = data1[:4]
train2 = data2[:4]
test1 = data1[4:]
test2 = data2[4:]

In [14]:
import rcca
from sklearn.preprocessing import StandardScaler

In [15]:
print(train1)
print(train2)

[[-0.72616411 -1.09465342 -0.83486294 -0.74594287 -0.44173743 -1.35370751
  -1.44399677]
 [ 1.10619009  1.62198266  0.70738313  1.4291668   1.10374839  1.16517865
   0.77589611]
 [-1.08718659  0.55269356 -0.26636467  0.6160399  -1.14936629  0.29172956
  -1.11744266]
 [ 0.07302111  0.55743761  0.09457326  0.65024627  0.22873065  0.82386078
   0.31943011]]
[[-0.83805559 -0.90587929 -0.98939598 -1.00142378 -0.96218901 -0.91641909
  -1.09727994 -1.0181483 ]
 [ 0.98666932  0.88464573  1.81403892  0.54822069  0.84917476  0.78568102
   0.57291968  0.30360778]
 [-0.59746003  0.0060637  -0.58799486 -0.14507019 -1.25073987  0.04398425
  -0.6500493  -0.12880329]
 [ 0.59217356  0.68290409  0.00958009  0.7825918  -0.09696398  0.5475091
   0.33683746  0.98728202]]


In [16]:
sc = StandardScaler()
t1,t2 = sc.fit_transform(train1),sc.fit_transform(train2)

In [17]:
print(t1)
print(t2)

[[-0.67391964 -1.54819536 -1.35695821 -1.57551684 -0.45312978 -1.63984178
  -1.15178275]
 [ 1.5015488   1.24823521  1.39651441  1.20309764  1.40404458  0.96542181
   1.22121878]
 [-1.10254475  0.14753838 -0.34198113  0.16436097 -1.30347091  0.06202047
  -0.80270582]
 [ 0.27491559  0.15242177  0.30242494  0.20805822  0.35255611  0.6123995
   0.73326978]]
[[-1.13330805 -1.53331638 -0.97998527 -1.50864538 -0.72838087 -1.57974697
  -1.29069487 -1.44969387]
 [ 1.23310141  1.0257892   1.63414196  0.7231984   1.48157184  1.02675446
   1.13722477  0.36804825]
 [-0.82128972 -0.22992303 -0.60568958 -0.27529961 -1.08042709 -0.10903872
  -0.64056895 -0.22662411]
 [ 0.72149635  0.73745021 -0.0484671   1.06074659  0.32723613  0.66203123
   0.79403905  1.30826973]]


In [18]:
cca = rcca.RCCA(n_comp = 2,reg_param = 0.6)
cca.fit([t1,t2])

Training RCCA with regularization parameter = 0.6 and 2 components


<rcca.RCCA at 0x21f431d3308>

In [19]:
cca.transform([t1,t2])

[array([[-0.8659283 ,  0.61039765],
        [ 0.92323521,  0.28466866],
        [-0.35311979, -0.81557902],
        [ 0.29581288, -0.0794873 ]]),
 array([[-0.87475415,  0.55043539],
        [ 0.8618747 ,  0.43010532],
        [-0.39346037, -0.56955951],
        [ 0.40633983, -0.4109812 ]])]

In [20]:
cca.variates 

array([0.99515106, 0.9094706 ])

In [21]:
cca.weights

[array([[ 0.10009401,  0.31651832],
        [ 0.08334018, -0.1019833 ],
        [ 0.10321871,  0.00719266],
        [ 0.08691663, -0.14046065],
        [ 0.1015408 ,  0.34760041],
        [ 0.11598686, -0.33139967],
        [ 0.1356443 , -0.05933635]]),
 array([[ 0.09351357,  0.20454562],
        [ 0.093828  , -0.20957072],
        [ 0.14054833,  0.09802608],
        [ 0.06237321, -0.16852906],
        [ 0.10884123,  0.42698958],
        [ 0.09815587, -0.28854462],
        [ 0.09041389,  0.07199727],
        [ 0.02899651, -0.17302547]])]

In [22]:
cca.c_comp

[array([[-0.8659283 ,  0.61039765],
        [ 0.92323521,  0.28466866],
        [-0.35311979, -0.81557902],
        [ 0.29581288, -0.0794873 ]]),
 array([[-0.87475415,  0.55043539],
        [ 0.8618747 ,  0.43010532],
        [-0.39346037, -0.56955951],
        [ 0.40633983, -0.4109812 ]])]