In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from glob import glob
import seaborn as sns
from matplotlib import pyplot as plt
sns.set_palette("colorblind")

from scipy.io import loadmat
from sklearn.neural_network import MLPRegressor

In [3]:
data_loc = 'data/'
result_loc = 'results/'
main_results_loc = 'results/'

In [4]:
import seaborn as sns
palette = sns.color_palette("Set1")

import matplotlib 
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 
plt.rcParams.update({'font.size': 22})

In [5]:
mse_all = {}

for noise_sigma in [0.5, 0.6, 0.7, 0.8]:
    mode = 'basic'

    list_of_suffixes = ['CCA_HD']
   
    
    n = 100;
    p = 200;
    q = 200;
    d = 5;

    i = 0
    sparse_mse = {}

    for sigma in [noise_sigma]: 
            for itr in range(1, 51):  

                # 1. Load the matrices corresponding to X and Y
                data_loaded = loadmat(data_loc + mode + '/' + #str(int(sparsity*100)) + '_' + 
                                      str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                      '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')


                X_train = np.array(data_loaded['X_train'])
                X_valid = np.array(data_loaded['X_valid'])
                X_test  = np.array(data_loaded['X_test'])

                Y_train = np.array(data_loaded['Y_train'])
                Y_valid = np.array(data_loaded['Y_valid'])
                Y_test  = np.array(data_loaded['Y_test'])
        
                w_train = np.array(data_loaded['w_train'])
                w_valid = np.array(data_loaded['w_valid'])
                w_test  = np.array(data_loaded['w_test'])
                
                # 4. Train the MLP regressor on these concatenated vectors, training on 'train' and tuning on 'valid'
                # 5. Run prediction on test set and note the MSE.

                regr = MLPRegressor(hidden_layer_sizes=(50), 
                                    random_state=1, max_iter=1000).fit(X_train.T, w_train)
                w_test_pred = regr.predict(X_test.T)
#                 curr_mse = (mean_squared_error(w_test, w_test_pred))
                curr_mse = 1/w_test.shape[0] * np.sum((w_test[0,:] - w_test_pred)**2)
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Modality 1', curr_mse]
                i = i+1

                regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(Y_train.T, w_train)
                w_test_pred = regr.predict(Y_test.T)
#                 curr_mse = (mean_squared_error(w_test, w_test_pred))
                curr_mse = 1/w_test.shape[0] * np.sum((w_test[0,:] - w_test_pred)**2)
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Modality 2', curr_mse]
                i = i+1

                train_concat = np.concatenate((X_train, Y_train), axis=0).T
                valid_concat = np.concatenate((X_valid, Y_valid), axis=0).T
                test_concat = np.concatenate((X_test, Y_test), axis=0).T

                # 4. Train the MLP regressor on these concatenated vectors, training on 'train' and tuning on 'valid'
                # 5. Run prediction on test set and note the MSE.

                regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
                w_test_pred = regr.predict(test_concat)
#                 curr_mse = (mean_squared_error(w_test, w_test_pred))
                curr_mse = 1/w_test.shape[0] * np.sum((w_test[0,:] - w_test_pred)**2)
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Concat', curr_mse]
                i = i+1

                X_train = np.array(data_loaded['X_train'])
                X_valid = np.array(data_loaded['X_valid'])
                X_test  = np.array(data_loaded['X_test'])

                Y_train = np.array(data_loaded['Y_train'])
                Y_valid = np.array(data_loaded['Y_valid'])
                Y_test  = np.array(data_loaded['Y_test'])
                
                weights_loaded = loadmat(result_loc + mode + '/' + #str(int(sparsity*100)) + '_' + 
                                      str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                      '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')
                
                for suffix in list_of_suffixes:
                    U_pred = np.array(weights_loaded['U_' + suffix])
                    V_pred = np.array(weights_loaded['V_' + suffix])

                    train_gen = U_pred.T.dot(X_train).T
                    valid_gen = U_pred.T.dot(X_valid).T
                    test_gen = U_pred.T.dot(X_test).T

                    train_img = V_pred.T.dot(Y_train).T
                    valid_img = V_pred.T.dot(Y_valid).T
                    test_img = V_pred.T.dot(Y_test).T

                    train_concat = np.concatenate((U_pred.T.dot(X_train), V_pred.T.dot(Y_train)), axis=0).T
                    valid_concat = np.concatenate((U_pred.T.dot(X_valid), V_pred.T.dot(Y_valid)), axis=0).T
                    test_concat = np.concatenate((U_pred.T.dot(X_test), V_pred.T.dot(Y_test)), axis=0).T

                    regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
                    w_test_pred = regr.predict(test_concat)
                    curr_mse = 1/w_test.shape[0] * np.sum((w_test[0,:] - w_test_pred)**2)
                    sparse_mse[i] = [mode, n, p, q, d, sigma, itr, suffix, curr_mse]
                    i = i+1
    mse_all[noise_sigma] = sparse_mse

In [7]:
for noise_sigma in  [0.5, 0.6, 0.7, 0.8]:
    if noise_sigma == 0.5:
        results_df = pd.DataFrame.from_dict(mse_all[noise_sigma], orient='index')
        results_df = results_df.rename(columns={0:'simulation',
                                   1:'n', 
                                   2:'p',
                                   3:'q',
                                   4:'d',
                                   5:'sigma',
                                   6:'itr',
                                   7:'method',
                                   8:'mse'})
    
    else:
        temp_df = pd.DataFrame.from_dict(mse_all[noise_sigma], orient='index')
        temp_df = temp_df.rename(columns={0:'simulation',
                                   1:'n', 
                                   2:'p',
                                   3:'q',
                                   4:'d',
                                   5:'sigma',
                                   6:'itr',
                                   7:'method',
                                   8:'mse'})
        
        results_df = pd.concat([results_df, temp_df], axis=0)
        
unknown_methods = ["Modality 1", "Modality 2", "Concat", "CCA_HD"]
results_df = results_df[results_df["method"].isin(unknown_methods)]
results_df["mse"] = results_df["mse"] #*100

print_row = ["Noise $\sigma$"]
print_row.extend(unknown_methods)
print(" & ".join(print_row))

for noise_sigma in [0.5, 0.6, 0.7, 0.8]:
    print_row = [str("{:.2f}".format(noise_sigma))]
    for method in unknown_methods:
        curr_df = results_df[results_df["method"] == method]
        curr_df = curr_df[curr_df["sigma"] == noise_sigma]
        print_row.append(str("{:.3f}".format(curr_df['mse'].mean())) + " $\pm$ " + 
              str("{:.2f}".format(curr_df['mse'].std())))
    print(" & ".join(print_row) + "\\\\")

Noise $\sigma$ & Modality 1 & Modality 2 & Concat & CCA_HD
0.50 & 11.525 $\pm$ 2.55 & 11.713 $\pm$ 2.75 & 12.148 $\pm$ 2.76 & 11.472 $\pm$ 3.03\\
0.60 & 13.010 $\pm$ 2.98 & 13.093 $\pm$ 3.08 & 13.866 $\pm$ 3.01 & 12.061 $\pm$ 2.71\\
0.70 & 13.821 $\pm$ 3.77 & 13.922 $\pm$ 3.45 & 14.253 $\pm$ 3.51 & 12.040 $\pm$ 3.34\\
0.80 & 14.815 $\pm$ 4.02 & 15.160 $\pm$ 4.25 & 16.061 $\pm$ 4.29 & 11.992 $\pm$ 4.00\\
