In [None]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

## Original Space
This joint space is the semantic behind the points.
It should be recovered by CCA.  
The two colors represent two different "Themes", for example "Airplanes" and "Cars"

In [None]:
mean_1 = np.array([0, 0])
sigma_1 = np.array([[1, 0.2], [0.2, 2]])
mean_2 = np.array([5, 5])
sigma_2 = np.array([[2, 0.2], [0.2, 1]])
data_1 = np.random.multivariate_normal(mean_1, sigma_1, 100)
data_2 = np.random.multivariate_normal(mean_2, sigma_2, 100)

plt.plot(data_1[:, 0], data_1[:, 1], 'ro')
plt.plot(data_2[:, 0], data_2[:, 1], 'bo')
plt.axis([-5, 10, -5, 10])
plt.title("Original space")
plt.show()

## Visual space
Let's create a first mapping from the original space to a "visual space"

In [None]:
def visual_mapping(x, y):
    return np.array([x+y, x-y]).T

In [None]:
data_1_visual = visual_mapping(data_1[:, 0], data_1[:, 1])
data_2_visual = visual_mapping(data_2[:, 0], data_2[:, 1])

plt.plot(data_1_visual[:, 0], data_1_visual[:, 1], 'rs')
plt.plot(data_2_visual[:, 0], data_2_visual[:, 1], 'bs')
plt.title('Visual space')
plt.show()

## Textual space
Similarly, create a second mapping from the original space to a "textual space"

In [None]:
def textual_mapping(x, y):
    return np.array([x*np.cos(y), y*np.sin(x)]).T

In [None]:
data_1_textual = textual_mapping(data_1[:, 0], data_1[:, 1])
data_2_textual = textual_mapping(data_2[:, 0], data_2[:, 1])

plt.plot(data_1_textual[:, 0], data_1_textual[:, 1], 'r^')
plt.plot(data_2_textual[:, 0], data_2_textual[:, 1], 'b^')
plt.title('Textual space')
plt.show()

# CCA

$\newcommand{\norm}[1]{\|#1\|}$

We want to solve:
$$ \min_{W_1, W_2} \norm{V W_1 - T W_2} $$

In [None]:
def apply_cca(V, T, title='Joint latent space'):
    d = 2 # Dimension of the final joint latent space
    cca = CCA(n_components=d, scale=False)
    cca.fit(V,T)

    # New basis projection matrices
    W1 = cca.x_weights_
    W2 = cca.y_weights_

    # Compute features in the new latent space
    V_latent = np.dot(V,W1)
    T_latent = np.dot(T,W2)

    evaluate_latent_space(V_latent, T_latent, labels, title)

def evaluate_latent_space(V_latent, T_latent, labels, title='Joint latent space'):
    '''Evaluate latent space with quadratic error and 2D plot'''
    error = np.linalg.norm(V_latent - T_latent)

    # Split data between themes (airplanes and cars)
    V_latent_1 = V_latent[labels==1]
    V_latent_2 = V_latent[labels==2]
    T_latent_1 = T_latent[labels==1]
    T_latent_2 = T_latent[labels==2]

    # Plot data from both views (visual and textual)
    plt.figure(figsize=(12,8))
    plt.plot(V_latent_1[:, 0], V_latent_1[:, 1], 'rs', label='1 (from visual)')
    plt.plot(T_latent_1[:, 0], T_latent_1[:, 1], 'r^', label='1 (from textual)')
    plt.plot(V_latent_2[:, 0], V_latent_2[:, 1], 'bs', label='2 (from visual)')
    plt.plot(T_latent_2[:, 0], T_latent_2[:, 1], 'b^', label='2 (from textual)')
    plt.title('%s\n Error: %.2f' % (title,error), fontsize=20)
    plt.legend(numpoints=1, loc='lower left')
    plt.show()

In [None]:
from sklearn.cross_decomposition import CCA

# Concatenate all features into matrices
V = np.concatenate((data_1_visual, data_2_visual)) # Visual features
T = np.concatenate((data_1_textual, data_2_textual)) # Textual features
labels = np.ones(V.shape[0]) 
labels[data_2_visual.shape[0]:] = 2

# Apply CCA
apply_cca(V,T)

# Apply CCA with nonlinear kernel mappings
phi = lambda x,y : np.array([x,y,np.cos(x),np.cos(y),np.sin(x),np.sin(y)]).T
V = phi(V[:,0],V[:,1])
T = phi(T[:,0],T[:,1])
apply_cca(V,T)