In [None]:
import numpy as np
from sklearn.cross_decomposition import CCA
import matplotlib.pyplot as plt


In [None]:
saved_data = np.load('processed_data.npz')
X = saved_data['X']
X_sym = saved_data['X_sym']
Y = saved_data['Y']
Z = saved_data['Z']
pids = saved_data['pids']

M = X.shape[0]
p = X.shape[1]; q = Y.shape[1]

In [None]:
split = 0.8
m_train = int(split*M) + 1
x_train = X_sym[:m_train, :, 0]; y_train = Z[:m_train, :]

cca = CCA(n_components=3)
x_c, y_c = cca.fit_transform(x_train, y_train)

In [None]:
x_test = X_sym[m_train:, :, 0]; y_test = Z[m_train:, :]
x_tc, y_tc = cca.transform(x_test, y_test)

plt.figure(figsize=(15,6))
plt.subplot(121)
plt.title('Projection onto First Canonical Vectors')
plt.scatter(x_c[:,0],y_c[:,0], c='b', label='Training Data'); plt.scatter(x_tc[:,0],y_tc[:,0], c='r', label='Test Data')
plt.xlabel('u'); plt.ylabel('v')

plt.subplot(122)
plt.title('Projection onto Second Canonical Vectors')
plt.scatter(x_c[:,1],y_c[:,1], c='b', label='Training Data'); plt.scatter(x_tc[:,1],y_tc[:,1], c='r', label='Test Data')
plt.xlabel('u'); plt.ylabel('v')
plt.savefig('CCA_results.png')


## Sanity check

In [None]:
A = np.random.randn(100,20)
B = np.random.randn(100,5)
cca_t = CCA(n_components=2)
A_c, B_c = cca_t.fit_transform(A,B)

plt.figure()
plt.scatter(A_c[:,0], B_c[:,0])
plt.figure()
plt.scatter(A_c[:,1], B_c[:,1])

In [None]:
C = np.random.randn(20,5)
A = np.random.randn(100,20)
B = A @ C
cca_t = CCA(n_components=2)
A_c, B_c = cca_t.fit_transform(A[:80,:],A[:80,:])

A_ct, B_ct = cca_t.transform(A[80:,:], A[80:, :])
plt.figure()
plt.scatter(A_c[:,0], B_c[:,0], c='b'); plt.scatter(A_ct[:,0], B_ct[:,0], c='r')
plt.figure()
plt.scatter(A_c[:,1], B_c[:,1], c='b'); plt.scatter(A_ct[:,0], B_ct[:,0], c='r')