In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from scipy.io import loadmat
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [3]:
data_loc = 'data/'
result_loc = 'results/'
main_results_loc = 'results/'


In [4]:
sparse_mse_all = {}

for noise_sigma in [0.1]:
    mode = 'sparse'

    ### Here's the overall flow
    # 1. Load the matrices corresponding to X and Y
    # 2. Load the learnt weight embeddings U and V
    # 3. Generate the concatenated vectors [U^T.X; V^T.Y] for train and valid
    # 4. Train the SVM regressor on these concatenated vectors, training on 'train' and tuning on 'valid'
    # 5. Run prediction on test set and note the MSE.

    list_of_suffixes = ['SCCA_HD', 'GNSCCA_HD',
                        'SCCA_PD', 'GNSCCA_PD',
                        'SCCA_OPD','GNSCCA_OPD']
    
    
    n = 100;
    p = 200;
    q = 200;
    d = 5;

    i = 0
    sparse_mse = {}
    sparsity_sigma = {0.1:0.25, 0.25:0.5}

    for sparsity in [sparsity_sigma[noise_sigma]]: #, 0.5, 0.75]:
        for sigma in [noise_sigma]: #, 0.25, 0.5]:
            for itr in range(1, 11):  #int(20*np.sqrt(sigma))):

                data_loaded = loadmat(data_loc + mode + '/' + str(int(sparsity*100)) + '_' + 
                                      str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                      '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')


                X_train = np.array(data_loaded['X_train'])
                X_valid = np.array(data_loaded['X_valid'])
                X_test  = np.array(data_loaded['X_test'])

                Y_train = np.array(data_loaded['Y_train'])
                Y_valid = np.array(data_loaded['Y_valid'])
                Y_test  = np.array(data_loaded['Y_test'])
                
                w_train = np.array(data_loaded['w_train'])
                w_valid = np.array(data_loaded['w_valid'])
                w_test  = np.array(data_loaded['w_test'])

                regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(X_train.T, w_train)
                w_test_pred = regr.predict(X_test.T)
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Genomics', curr_mse]
                i = i+1

                regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(Y_train.T, w_train)
                w_test_pred = regr.predict(Y_test.T)
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Imaging', curr_mse]
                i = i+1

                train_concat = np.concatenate((X_train, Y_train), axis=0).T
                valid_concat = np.concatenate((X_valid, Y_valid), axis=0).T
                test_concat = np.concatenate((X_test, Y_test), axis=0).T

                regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
                w_test_pred = regr.predict(test_concat)
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                sparse_mse[i] = [mode, n, p, q, d, sigma, itr, 'Concat', curr_mse]
                i = i+1

                weights_loaded = loadmat(result_loc + mode + '/' + str(int(sparsity*100)) + '_' + 
                                      str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                      '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')
                
                X_train = np.array(data_loaded['X_train'])
                X_valid = np.array(data_loaded['X_valid'])
                X_test  = np.array(data_loaded['X_test'])

                Y_train = np.array(data_loaded['Y_train'])
                Y_valid = np.array(data_loaded['Y_valid'])
                Y_test  = np.array(data_loaded['Y_test'])
                
                for suffix in list_of_suffixes:
                    U_pred = np.array(weights_loaded['U_' + suffix])
                    V_pred = np.array(weights_loaded['V_' + suffix])

                    train_gen = U_pred.T.dot(X_train).T
                    valid_gen = U_pred.T.dot(X_valid).T
                    test_gen = U_pred.T.dot(X_test).T

                    train_img = V_pred.T.dot(Y_train).T
                    valid_img = V_pred.T.dot(Y_valid).T
                    test_img = V_pred.T.dot(Y_test).T

                    regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(
                        train_gen, w_train)
                    w_test_pred = regr.predict(test_gen)
                    
                    curr_mse = (mean_squared_error(w_test, w_test_pred))
                    sparse_mse[i] = [mode, n, p, q, d, sigma, itr, suffix + '-GEN', curr_mse]
                    i = i+1

                    regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(
                            train_img, w_train)
                    w_test_pred = regr.predict(test_img)
                    curr_mse = (mean_squared_error(w_test, w_test_pred))
                    sparse_mse[i] = [mode, n, p, q, d, sigma, itr, suffix + '-IMG', curr_mse]
                    i = i+1

                    train_concat = np.concatenate((U_pred.T.dot(X_train), V_pred.T.dot(Y_train)), axis=0).T
                    valid_concat = np.concatenate((U_pred.T.dot(X_valid), V_pred.T.dot(Y_valid)), axis=0).T
                    test_concat = np.concatenate((U_pred.T.dot(X_test), V_pred.T.dot(Y_test)), axis=0).T

                    regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
                    w_test_pred = regr.predict(test_concat)
                    curr_mse = (mean_squared_error(w_test, w_test_pred))
                    sparse_mse[i] = [mode, n, p, q, d, sigma, itr, suffix, curr_mse]
                    i = i+1
    sparse_mse_all[noise_sigma] = sparse_mse

In [5]:
graph_mse_all = {}

for noise_sigma in [0.1]:
    mode = 'graph'

    ### Here's the overall flow
    # 1. Load the matrices corresponding to X and Y
    # 2. Load the learnt weight embeddings U and V
    # 3. Generate the concatenated vectors [U^T.X; V^T.Y] for train and valid
    # 4. Train the SVM regressor on these concatenated vectors, training on 'train' and tuning on 'valid'
    # 5. Run prediction on test set and note the MSE.

    list_of_suffixes = ['SCCA_HD', 'GNSCCA_HD', 
                        'SCCA_PD', 'GNSCCA_PD', 
                        'SCCA_OPD','GNSCCA_OPD']
    
    
    n = 100;
    p = 200;
    q = 200;
    d = 5;

    i = 0
    graph_mse = {}

    for sigma in [noise_sigma]: #, 0.25, 0.5]:
        for itr in range(1, 11): #, int(20*np.sqrt(sigma))):

            data_loaded = loadmat(data_loc + mode + '/' + 
                                  str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                  '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')

            X_train = np.array(data_loaded['X_train'])
            X_valid = np.array(data_loaded['X_valid'])
            X_test  = np.array(data_loaded['X_test'])

            Y_train = np.array(data_loaded['Y_train'])
            Y_valid = np.array(data_loaded['Y_valid'])
            Y_test  = np.array(data_loaded['Y_test'])

            w_train = np.array(data_loaded['w_train'])
            w_valid = np.array(data_loaded['w_valid'])
            w_test  = np.array(data_loaded['w_test'])

            regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(X_train.T, w_train)
            w_test_pred = regr.predict(X_test.T)
            curr_mse = (mean_squared_error(w_test, w_test_pred))
            graph_mse[i] = [mode, n, p, q, d, sigma, itr, 'Genomics', curr_mse]
            i = i+1

            regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(Y_train.T, w_train)
            w_test_pred = regr.predict(Y_test.T)
            curr_mse = (mean_squared_error(w_test, w_test_pred))
            graph_mse[i] = [mode, n, p, q, d, sigma, itr, 'Imaging', curr_mse]
            i = i+1

            train_concat = np.concatenate((X_train, Y_train), axis=0).T
            valid_concat = np.concatenate((X_valid, Y_valid), axis=0).T
            test_concat = np.concatenate((X_test, Y_test), axis=0).T

            regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
            w_test_pred = regr.predict(test_concat)
            curr_mse = (mean_squared_error(w_test, w_test_pred))
            graph_mse[i] = [mode, n, p, q, d, sigma, itr, 'Concat', curr_mse]
            i = i+1

            X_train = np.array(data_loaded['X_train'])
            X_valid = np.array(data_loaded['X_valid'])
            X_test  = np.array(data_loaded['X_test'])

            Y_train = np.array(data_loaded['Y_train'])
            Y_valid = np.array(data_loaded['Y_valid'])
            Y_test  = np.array(data_loaded['Y_test'])

            weights_loaded = loadmat(result_loc + mode + '/' + #str(int(sparsity*100)) + '_' + 
                                  str(n)+ '_' + str(p) + '_' + str(q) + '_' + str(d) +
                                  '_' + str(int(sigma*100)) + '_' + str(itr) + '.mat')
            
            for suffix in list_of_suffixes:
                U_pred = np.array(weights_loaded['U_' + suffix])
                V_pred = np.array(weights_loaded['V_' + suffix])

                train_gen = U_pred.T.dot(X_train).T
                valid_gen = U_pred.T.dot(X_valid).T
                test_gen = U_pred.T.dot(X_test).T
                
                train_img = V_pred.T.dot(Y_train).T
                valid_img = V_pred.T.dot(Y_valid).T
                test_img = V_pred.T.dot(Y_test).T
                
                regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(
                    train_gen, w_train)
                w_test_pred = regr.predict(test_gen) 
                
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                graph_mse[i] = [mode, n, p, q, d, sigma, itr, suffix + '-GEN', curr_mse]
                i = i+1
                
                regr = MLPRegressor(hidden_layer_sizes=(50), random_state=1, max_iter=1000).fit(
                        train_img, w_train)
                w_test_pred = regr.predict(test_img)
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                graph_mse[i] = [mode, n, p, q, d, sigma, itr, suffix + '-IMG', curr_mse]
                i = i+1
                
                train_concat = np.concatenate((U_pred.T.dot(X_train), V_pred.T.dot(Y_train)), axis=0).T
                valid_concat = np.concatenate((U_pred.T.dot(X_valid), V_pred.T.dot(Y_valid)), axis=0).T
                test_concat = np.concatenate((U_pred.T.dot(X_test), V_pred.T.dot(Y_test)), axis=0).T

                regr = MLPRegressor(hidden_layer_sizes=(100), random_state=1, max_iter=1000).fit(train_concat, w_train)
                w_test_pred = regr.predict(test_concat)
                curr_mse = (mean_squared_error(w_test, w_test_pred))
                graph_mse[i] = [mode, n, p, q, d, sigma, itr, suffix, curr_mse]
                i = i+1
    graph_mse_all[noise_sigma] = graph_mse

In [9]:
for noise_sigma in [0.1]:
    print(noise_sigma)
    sparse_df = pd.DataFrame.from_dict(sparse_mse_all[noise_sigma], orient='index')
    graph_df = pd.DataFrame.from_dict(graph_mse_all[noise_sigma], orient='index')

    sparse_df = sparse_df.rename(columns={0:'simulation',
                               1:'n', 
                               2:'p',
                               3:'q',
                               4:'d',
                               5:'sigma',
                               6:'itr',
                               7:'method',
                               8:'mse'})
    graph_df = graph_df.rename(columns={0:'simulation',
                               1:'n', 
                               2:'p',
                               3:'q',
                               4:'d',
                               5:'sigma',
                               6:'itr',
                               7:'method',
                               8:'mse'})

    results_df = pd.concat([sparse_df, graph_df], axis=0, ignore_index=True)

    # curr_row = ['ND']
    curr_row = ['Observations & \\']
    for simulation_type in ['sparse']:
        for method in ['Genomics', 'Imaging', 'Concat']:
            temp_df = results_df[results_df['simulation']==simulation_type]
            temp_df = temp_df[temp_df['sigma']== noise_sigma]
            new_df = temp_df[temp_df['method']== method]
            curr_row = (method + " & " + str("{:.2f}".format(new_df['mse'].mean()*100)) + ' $\pm$ ' + 
                            str("{:.2f}".format(new_df['mse'].std()*100)) + ' \\\\')
            print(curr_row)
    print('\\hline')

    
    for method in ['SCCA', 'GNSCCA']:    
        if method == 'GCCA':
            method_print =  'GN-SCCA-PG'
        elif method == 'GNSCCA':
            method_print =  'GN-SCCA'
        else:
            method_print = method
        
        print(" & ".join([method_print, 'HD', 'PD', 'OPD']))
       
        for add_suffix in ['-GEN', '-IMG', '']:
            curr_row = []
            curr_row.append(add_suffix)
         
            for deflation_type in ['HD', 'PD', 'OPD']:
            
                for simulation_type in ['sparse']: 
                    
                    temp_df = results_df[results_df['simulation']==simulation_type]
                    temp_df = temp_df[temp_df['sigma']== noise_sigma]
                    temp_df = temp_df[temp_df['d']==5]
                    new_df = temp_df[temp_df['method'] == (method + '_' + deflation_type + add_suffix)]
                    curr_row.append(str("{:.2f}".format(new_df['mse'].mean()*100)) + ' $\pm$ ' + 
                                    str("{:.2f}".format(new_df['mse'].std()*100)))

            print(' & '.join(curr_row) + ' \\\\')
        print('\\hline')
        print('\n')

0.1
Genomics & 36.62 $\pm$ 3.89 \\
Imaging & 39.14 $\pm$ 5.88 \\
Concat & 45.41 $\pm$ 9.11 \\
\hline
SCCA & HD & PD & OPD
-GEN & 30.01 $\pm$ 15.87 & 14.02 $\pm$ 9.95 & 18.12 $\pm$ 10.39 \\
-IMG & 37.56 $\pm$ 18.67 & 17.56 $\pm$ 10.79 & 21.48 $\pm$ 10.74 \\
 & 28.15 $\pm$ 16.70 & 14.36 $\pm$ 11.70 & 17.61 $\pm$ 13.56 \\
\hline


GN-SCCA & HD & PD & OPD
-GEN & 18.03 $\pm$ 8.81 & 16.41 $\pm$ 12.62 & 15.75 $\pm$ 7.53 \\
-IMG & 14.45 $\pm$ 7.42 & 16.25 $\pm$ 10.18 & 19.41 $\pm$ 13.18 \\
 & 10.46 $\pm$ 4.18 & 13.26 $\pm$ 9.36 & 13.30 $\pm$ 7.60 \\
\hline




In [10]:
for noise_sigma in [0.1]:
    print(noise_sigma)
    sparse_df = pd.DataFrame.from_dict(sparse_mse_all[noise_sigma], orient='index')
    graph_df = pd.DataFrame.from_dict(graph_mse_all[noise_sigma], orient='index')

    sparse_df = sparse_df.rename(columns={0:'simulation',
                               1:'n', 
                               2:'p',
                               3:'q',
                               4:'d',
                               5:'sigma',
                               6:'itr',
                               7:'method',
                               8:'mse'})
    graph_df = graph_df.rename(columns={0:'simulation',
                               1:'n', 
                               2:'p',
                               3:'q',
                               4:'d',
                               5:'sigma',
                               6:'itr',
                               7:'method',
                               8:'mse'})

    results_df = pd.concat([sparse_df, graph_df], axis=0, ignore_index=True)

    # curr_row = ['ND']
    curr_row.append('Observations & \\')
    for simulation_type in ['graph']:
        for method in ['Genomics', 'Imaging', 'Concat']:
            temp_df = results_df[results_df['simulation']==simulation_type]
            temp_df = temp_df[temp_df['sigma']== noise_sigma]
            new_df = temp_df[temp_df['method']== method]
            curr_row = (method + " & " + str("{:.2f}".format(new_df['mse'].mean()*100)) + ' $\pm$ ' + 
                            str("{:.2f}".format(new_df['mse'].std()*100)) + ' \\\\')
            print(curr_row)
    print('\\hline')

    
    for method in ['SCCA', 'GNSCCA']:    
        if method == 'GCCA':
            method_print =  'GN-SCCA-PG'
        elif method == 'GNSCCA':
            method_print =  'GN-SCCA'
        else:
            method_print = method
        
        print(" & ".join([method_print, 'HD', 'PD', 'OPD']))
       
        for add_suffix in ['-GEN', '-IMG', '']:
            curr_row = []
            curr_row.append(add_suffix)
         
            for deflation_type in ['HD', 'PD', 'OPD']:
            
                for simulation_type in ['graph']: 
                    
                    temp_df = results_df[results_df['simulation']==simulation_type]
                    temp_df = temp_df[temp_df['sigma']== noise_sigma]
                    temp_df = temp_df[temp_df['d']==5]
                    new_df = temp_df[temp_df['method'] == (method + '_' + deflation_type + add_suffix)]
                    curr_row.append(str("{:.2f}".format(new_df['mse'].mean()*100)) + ' $\pm$ ' + 
                                    str("{:.2f}".format(new_df['mse'].std()*100)))

            print(' & '.join(curr_row) + ' \\\\')
        print('\\hline')
        print('\n')

0.1
Genomics & 74.46 $\pm$ 16.25 \\
Imaging & 80.94 $\pm$ 9.52 \\
Concat & 75.88 $\pm$ 9.80 \\
\hline
SCCA & HD & PD & OPD
-GEN & 19.77 $\pm$ 7.64 & 19.14 $\pm$ 13.11 & 23.51 $\pm$ 15.16 \\
-IMG & 19.69 $\pm$ 10.78 & 19.17 $\pm$ 13.65 & 23.46 $\pm$ 13.35 \\
 & 18.51 $\pm$ 9.19 & 16.19 $\pm$ 9.59 & 19.34 $\pm$ 12.71 \\
\hline


GN-SCCA & HD & PD & OPD
-GEN & 30.47 $\pm$ 14.84 & 37.05 $\pm$ 9.10 & 33.84 $\pm$ 12.84 \\
-IMG & 27.13 $\pm$ 16.27 & 35.55 $\pm$ 11.48 & 32.26 $\pm$ 15.36 \\
 & 22.39 $\pm$ 10.86 & 30.43 $\pm$ 10.55 & 28.20 $\pm$ 12.41 \\
\hline


