In [336]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import CCA
from sklearn.datasets import load_diabetes

# Load the diabetes dataset
data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)# Split data into two sets: Demographic (age, sex) and medical indicators (BMI, BP, etc.)
X1 = X[['age', 'sex']]  # Set 1: Demographic variables
X2 = X[['bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']]  # Set 2: Medical variables# Standardizing the features
scaler = StandardScaler()
X1 = scaler.fit_transform(X1)
X2 = scaler.fit_transform(X2)

print(data.feature_names)

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [337]:
import numpy as np

nb_samples = 50
z = np.random.randn(nb_samples, 1)
W1 = np.random.randn(1, 10)
W2 = np.random.randn(1, 15)
X1 = z @ W1
X2 = z @ W2
X = np.concatenate((X1, X2), axis=1)


In [338]:
# Create a CCA instance
cca = CCA(n_components=1)  # We’ll find 2 canonical components for simplicity# Fit the model
cca.fit(X1, X2)# Transform the data based on the CCA
X1_c, X2_c = cca.transform(X1, X2)# Display the canonical correlations
print("Canonical correlations:")
print(cca.score(X1, X2))

Canonical correlations:
0.9663264951887851


In [339]:
# Get the canonical weights for each variable
print("Canonical weights for X1 (demographics):", cca.x_weights_)
print("Canonical weights for X2 (medical indicators):", cca.y_weights_)

Canonical weights for X1 (demographics): [[-0.31622777]
 [ 0.31622777]
 [ 0.31622777]
 [ 0.31622777]
 [-0.31622777]
 [ 0.31622777]
 [ 0.31622777]
 [ 0.31622777]
 [ 0.31622777]
 [-0.31622777]]
Canonical weights for X2 (medical indicators): [[-0.25819889]
 [-0.25819889]
 [ 0.25819889]
 [ 0.25819889]
 [ 0.25819889]
 [-0.25819889]
 [ 0.25819889]
 [-0.25819889]
 [ 0.25819889]
 [-0.25819889]
 [-0.25819889]
 [-0.25819889]
 [-0.25819889]
 [-0.25819889]
 [ 0.25819889]]


In [340]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=X1_c[:, 0],
    y=X2_c[:, 0],
    mode='markers',
    marker=dict(size=10, color='blue'), 
    name='Points 2D'
))

fig.update_layout(
    xaxis_title = "Projection of X1",
    yaxis_title = "Projection of X2"
)


fig.show()

In [341]:
import os
import sys

notebook_dir = os.path.dirname(os.path.abspath("__file__"))
src_path = os.path.join(notebook_dir, "..", "code")
if src_path not in sys.path:
    sys.path.append(src_path)

from CCA import EM_for_PCCA
d_A, d_B = 10, 15
#W_0 = np.random.randn(d_A+d_B, 1)
W_0 = np.random.rand(d_A+d_B, 1)
#W_0 = np.zeros((d_A+d_B, 1))

phi_0 = np.eye(d_A+d_B)
W, phi, M = EM_for_PCCA(X, d_A, d_B, nb_components=1, W_0=W_0, phi_0=phi_0, max_iter = 10)

 20%|██        | 2/10 [00:00<00:00, 77.92it/s]


LinAlgError: Singular matrix

In [308]:
print(W_0)
print(W)
print(M)

[[0.94231466]
 [0.24733533]
 [0.42362313]
 [0.54122073]
 [0.58901068]
 [0.77422378]
 [0.87234321]
 [0.87990065]
 [0.08174275]
 [0.41122548]
 [0.40867269]
 [0.69837081]
 [0.12192993]
 [0.26621849]
 [0.35925725]
 [0.62372191]
 [0.48923138]
 [0.44235728]
 [0.80245903]
 [0.34900936]
 [0.07050456]
 [0.03260166]
 [0.43288169]
 [0.88870478]
 [0.74774318]]
[[ 1.19389444]
 [-0.06092274]
 [-0.23375943]
 [ 6.71786575]
 [-2.21827297]
 [ 0.65890052]
 [-2.02812347]
 [-3.99960527]
 [ 0.11734397]
 [ 3.76683712]
 [ 3.33618823]
 [-0.48755422]
 [-0.37161375]
 [ 1.09886313]
 [-2.18917235]
 [-3.37066685]
 [-2.54033016]
 [ 6.61059912]
 [-0.07044318]
 [ 0.44491127]
 [-0.95175125]
 [-9.3079557 ]
 [-4.77490321]
 [-0.83545552]
 [-1.24589407]]
[[0.0736582]]


In [309]:
cov = np.cov(X)
print(np.linalg.inv(cov))

[[-6.73683090e+13 -1.15198285e+15 -6.85338200e+14 ...  3.75709584e+14
   1.64208955e+14 -6.31333413e+14]
 [-4.21200230e+14 -1.51267645e+15 -7.80735244e+14 ...  5.62047540e+14
  -3.47599257e+14 -6.24059037e+15]
 [ 1.12344731e+14  3.61001320e+14  2.77257900e+14 ... -4.91768700e+14
  -5.89089372e+14  2.98973829e+15]
 ...
 [ 3.60406976e+14 -2.76370705e+15 -3.71571923e+14 ... -6.54765707e+14
  -1.83497028e+15 -1.11751059e+16]
 [-8.89808492e+14  3.73014967e+15  7.48917836e+13 ...  5.05248028e+13
   2.48117169e+14  1.03960732e+16]
 [-2.85008056e+15  1.09128925e+15  5.08928572e+14 ... -7.23448284e+15
  -3.70164682e+15  1.51266643e+16]]


In [310]:
def transform(X, M, nb_components, eps=1e-10):
    X1 = X[:, :d_A]
    X2 = X[:, d_A:]
    mu_1 = np.mean(X1, axis=0)
    mu_2 = np.mean(X2, axis=0)
    sigma_1 = np.cov((X1-mu_1).T)
    sigma_2 = np.cov((X2-mu_2).T)
    eigval1, eigvec1 = np.linalg.eigh(sigma_1)
    eigval2, eigvec2 = np.linalg.eigh(sigma_2)
    U1 = np.diag(1/np.sqrt(eigval1 + eps)) @ eigvec1
    U2 = np.diag(1/np.sqrt(eigval2+eps)) @ eigvec2
    print(eigval1)
    print(eigval2)
    M1, M2 = M, M

    proj1, proj2 = [], []
    for i in range(X.shape[0]):
        proj1.append(M1.T @ U1[:, :nb_components].T @ (X1[i] - mu_1))
        proj2.append(M2.T @ U2[:, :nb_components].T @ (X2[i] - mu_2))

    return proj1, proj2

In [342]:
X1_c, X2_c = transform(X, M, nb_components=1)
X1_c = np.array(X1_c)
X2_c = np.array(X2_c)

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=X1_c[:, 0],
    y=X2_c[:, 0],
    mode='markers',
    marker=dict(size=10, color='blue'), 
    name='Points 2D'
))

fig.update_layout(
    xaxis_title = "Projection of X1",
    yaxis_title = "Projection of X2"
)


fig.show()

[-2.36315125e-15 -1.80072240e-16 -4.89972733e-17 -4.80423240e-18
  5.09005062e-19  2.06581828e-17  4.27105503e-16  9.49124541e-16
  1.74259621e-15  1.11537197e+01]
[-6.89971146e-16 -5.18878031e-16 -2.11954697e-16 -1.35464540e-16
 -1.03128605e-16 -2.72637615e-17 -1.09210997e-17  6.63075875e-18
  1.04298375e-17  7.08292428e-17  1.60254187e-16  3.10920113e-16
  5.18422857e-16  7.58005400e-16  6.14966090e+00]
