In [1]:
from sklearn.cross_decomposition import CCA
from sklearn.decomposition import PCA
from tqdm import tqdm_notebook as tqdm
import _pickle as cPickle
import numpy as np

In [2]:
def load_pickle(path):
    with open(path,'rb') as file_:
        value = cPickle.load(file_)
    return value

def flatten_(value_):
    for i in range(len(value_)):
        value_[i] = value_[i].ravel()
    array_ = np.array(value_)
    return array_

def compute_cca(path_v, path_r):
    #load values
    array_v = load_pickle(path_v)
    array_r = load_pickle(path_r)
    
    #flatten data
    array_v = flatten_(array_v)
    array_r = flatten_(array_r)
    
    print("Data dimensions before PCA : ", array_v.shape)
    
    #PCA
    pca = PCA(n_components=50)
    array_v = pca.fit_transform(array_v)
    array_r = pca.fit_transform(array_r)
    
    print("Data dimensions after PCA : ", array_v.shape)
    
    #CCA
    cca = CCA(n_components=1)
    V_c, R_c = cca.fit_transform(array_v, array_r)

    #CCA Score
    result = np.corrcoef(V_c.T, R_c.T)[0,1]

    print("Canonical-Correlation Analysis score : ", result)

In [3]:
#For virtual env images
enet_v = "./virtual_data/extracted_values_v.pkl"
enet_r = "./virtual_data/extracted_values_r.pkl"
compute_cca(enet_v,enet_r)

Data dimensions before PCA :  (1000, 345600)
Data dimensions after PCA :  (1000, 50)
Canonical-Correlation Analysis score :  0.9463777252633526


In [4]:
#For realworld env images
enet_v = "./realworld_data/extracted_values_v.pkl"
enet_r = "./realworld_data/extracted_values_r.pkl"
compute_cca(enet_v,enet_r)

Data dimensions before PCA :  (1000, 345600)
Data dimensions after PCA :  (1000, 50)
Canonical-Correlation Analysis score :  0.9731857649361263
