In [None]:
#setup - one-time tasks (run from terminal, not jupyter)
pip install cca-zoo

In [3]:
#import libraries
import pandas as pd
import numpy as np
import scipy
import os

userhome = os.path.expanduser('~')

In [4]:
# load residualized and scaled clinical features (from R script)
#replication
clin_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/Data_prep/clin_features_scaled.csv'

clin_features = pd.read_csv(clin_file)
clin_features = clin_features.to_numpy()
print(type(clin_features))
print(clin_features.shape)

<class 'numpy.ndarray'>
(2926, 8)


In [5]:
# load residualized and scaled imaging features (from R script)
#replication
rsFC_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/Data_prep/rsFC_features_scaled.csv'

rsFC_feat_chunks = pd.read_csv(rsFC_file, chunksize=1000)
rsFC_features = pd.concat(rsFC_feat_chunks)
#rsFC_features.sample(10)
rsFC_features = rsFC_features.to_numpy()
print(type(rsFC_features))
print(rsFC_features.shape)

<class 'numpy.ndarray'>
(2926, 61776)


In [13]:
# load selected rsFC features
#discovery
stable_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/discovery_stable_feats_95_Pearson05.csv'
#replication
#stable_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/replication_stable_feats_95_Pearson05.csv'

stable_features = pd.read_csv(stable_features_file, header=None)
stable_features = stable_features.to_numpy()
stable_features = np.transpose(stable_features)
print(type(stable_features))
print(stable_features.shape)

<class 'numpy.ndarray'>
(1, 7095)


In [14]:
# initialize arrays to store optimal hyperparams and scores from each CV loop
eCCA_eachfold_c1 = np.empty((10,27), dtype=object) 
eCCA_eachfold_c2 = np.empty((10,27), dtype=object) 
eCCA_eachfold_l1_ratio = np.empty((10,27), dtype=object)
eCCA_eachfold_score_TRAIN = np.empty((10,27), dtype=object) 
eCCA_eachfold_score_TEST = np.empty((10,27), dtype=object) 

print(eCCA_eachfold_c1)

[[None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None Non

In [15]:
#hyperparam optimization
from cca_zoo.models import ElasticCCA

eCCA_model = ElasticCCA(max_iter=100000)
eCCA_param_grid = {"c" : [[1e-3,1e-2,1e-1], [1e-2,1e-1,3e-1]], "l1_ratio" : [0.25,0.5,0.75]}
# c is lasso alpha for each view
# l1_ratio is l1 ratio in lasso subproblems

for foldind in range(0,10):
    #foldind = 0 #remember python starts indexing at 0, while R and Matlab start at 1

    # load 10-fold cross validation (CV) splits
    #replication
    thisfold_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold' + str(foldind + 1) + r'.csv'
    print(thisfold_file)
    thisfold_file = pd.read_csv(thisfold_file)
    thisfold_file = thisfold_file.to_numpy()
    print(type(thisfold_file))
    print(thisfold_file.shape)

    # for each CV fold, extract only the selected participants and features
    # NB: put this in loop later once troubleshooting complete
    # NB: row and col indices were generated in R/Matlab, which index from 1. python indexes from 0
    thisfold_clin_features = clin_features[(thisfold_file - 1), :]
    #print(thisfold_clin_features.shape)
    thisfold_clin_features = np.squeeze(thisfold_clin_features)
    print(thisfold_clin_features.shape)
    #print(thisfold_clin_features)

    thisfold_rsFC_features = rsFC_features[(thisfold_file - 1), stable_features.astype(int)-1]
    print(thisfold_rsFC_features.shape)

    #thisfoldfeats = CVfold_features[foldind, :][~np.isnan(CVfold_features[foldind, :])]
    #thisfold_rsFC_features = rsFC_features[(thisfold_file - 1), thisfoldfeats.astype(int)-1]
    #print(thisfold_rsFC_features.shape)
    #print(thisfold_rsFC_features)


    #test set for this CV fold
    thisfold_clin_features_TEST = np.delete(clin_features,thisfold_file-1, axis=0)
    print(thisfold_clin_features_TEST.shape)
    thisfold_rsFC_features_TEST = np.delete(rsFC_features,thisfold_file-1, axis=0)
    thisfold_rsFC_features_TEST = thisfold_rsFC_features_TEST[:, stable_features.astype(int) - 1]
    thisfold_rsFC_features_TEST = np.squeeze(thisfold_rsFC_features_TEST)
    #thisfold_rsFC_features_TEST = thisfold_rsFC_features_TEST[:, thisfoldfeats.astype(int) - 1]
    print(thisfold_rsFC_features_TEST.shape)
    
    paramind = 0
    for thisc1 in eCCA_param_grid["c"][0]:
        for thisc2 in eCCA_param_grid["c"][1]:
            for thisl1_ratio in eCCA_param_grid["l1_ratio"]:
                eCCA_fit = ElasticCCA(c = (thisc1, thisc2), l1_ratio = thisl1_ratio, latent_dims=1).fit([thisfold_clin_features, thisfold_rsFC_features])
                eCCA_eachfold_c1[foldind,paramind] = thisc1
                eCCA_eachfold_c2[foldind,paramind] = thisc2
                eCCA_eachfold_l1_ratio[foldind,paramind] = thisl1_ratio
                print('params: ' + str(thisc1) + ', ' + str(thisc2) + ', ' + str(thisl1_ratio))
                eCCA_eachfold_score_TRAIN[foldind,paramind] = eCCA_fit.score((thisfold_clin_features, thisfold_rsFC_features))[0]
                print('CCA score (train): ' + str(eCCA_eachfold_score_TRAIN[foldind,paramind]))
                eCCA_eachfold_score_TEST[foldind,paramind] = eCCA_fit.score((thisfold_clin_features_TEST, thisfold_rsFC_features_TEST))[0]
                print('CCA score (test): ' + str(eCCA_eachfold_score_TEST[foldind,paramind]))

                paramind = paramind + 1

/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold1.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7095)
(292, 8)
(292, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9799550538846034
CCA score (test): 0.013357523997242104




params: 0.001, 0.01, 0.5
CCA score (train): 0.9509973758034038
CCA score (test): 0.024015047696634095




params: 0.001, 0.01, 0.75
CCA score (train): 0.9194562969829381
CCA score (test): 0.02387254337830269




params: 0.001, 0.1, 0.25
CCA score (train): 0.7019420549239501
CCA score (test): 0.06494228628703813




params: 0.001, 0.1, 0.5
CCA score (train): 0.4715353595328504
CCA score (test): 0.08034762000959406




params: 0.001, 0.1, 0.75
CCA score (train): 0.3175106970626145
CCA score (test): 0.10798699701271675




params: 0.001, 0.3, 0.25
CCA score (train): 0.3247269524423122
CCA score (test): 0.12444584244389345




params: 0.001, 0.3, 0.5
CCA score (train): 0.13232180424104056
CCA score (test): 0.08597932757408677


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9798927465018545
CCA score (test): 0.013559102295631398




params: 0.01, 0.01, 0.5
CCA score (train): 0.9504561191158023
CCA score (test): 0.029059335097993078




params: 0.01, 0.01, 0.75
CCA score (train): 0.9190002845600049
CCA score (test): 0.02491663518597864




params: 0.01, 0.1, 0.25
CCA score (train): 0.6990334963252147
CCA score (test): 0.07845811393921442




params: 0.01, 0.1, 0.5
CCA score (train): 0.4704563236482022
CCA score (test): 0.10910731790413197




params: 0.01, 0.1, 0.75
CCA score (train): 0.3173713944615486
CCA score (test): 0.13494631597653717




params: 0.01, 0.3, 0.25
CCA score (train): 0.3222301772003795
CCA score (test): 0.12501566422496047




params: 0.01, 0.3, 0.5
CCA score (train): 0.13240997474892024
CCA score (test): 0.0870423873770807


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9785027830358444
CCA score (test): -0.006136543134060868




params: 0.1, 0.01, 0.5
CCA score (train): 0.9464832161558592
CCA score (test): 0.023226432571726185




params: 0.1, 0.01, 0.75
CCA score (train): 0.908877150169213
CCA score (test): -0.025336165215354978




params: 0.1, 0.1, 0.25
CCA score (train): 0.6893176427070307
CCA score (test): 0.05858600877003117




params: 0.1, 0.1, 0.5
CCA score (train): 0.4421444851748335
CCA score (test): 0.09987000197796037




params: 0.1, 0.1, 0.75
CCA score (train): 0.31219361796752776
CCA score (test): 0.07943119655256825




params: 0.1, 0.3, 0.25
CCA score (train): 0.31073837925739056
CCA score (test): 0.13248368284664602




params: 0.1, 0.3, 0.5
CCA score (train): 0.11356617246171274
CCA score (test): 0.07239435000105865


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold2.csv
<class 'numpy.ndarray'>
(2633, 1)
(2633, 8)
(2633, 7095)
(293, 8)
(293, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9817929801327527
CCA score (test): 0.021822279437953007




params: 0.001, 0.01, 0.5
CCA score (train): 0.9529861775525306
CCA score (test): 0.018427258205131025




params: 0.001, 0.01, 0.75
CCA score (train): 0.9202651276309204
CCA score (test): 0.03899354407002109




params: 0.001, 0.1, 0.25
CCA score (train): 0.6926704617412056
CCA score (test): -0.00036620678165255605




params: 0.001, 0.1, 0.5
CCA score (train): 0.46897833913129405
CCA score (test): 0.0718568661598038




params: 0.001, 0.1, 0.75
CCA score (train): 0.33675027094093735
CCA score (test): 0.04000342232571574




params: 0.001, 0.3, 0.25
CCA score (train): 0.34665967436248124
CCA score (test): 0.04201501406959407




params: 0.001, 0.3, 0.5
CCA score (train): 0.14997222954302902
CCA score (test): 0.007270660414080421


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9814524352983693
CCA score (test): 0.02283531872582545




params: 0.01, 0.01, 0.5
CCA score (train): 0.9526812493014167
CCA score (test): 0.014161055912492726




params: 0.01, 0.01, 0.75
CCA score (train): 0.919832892591864
CCA score (test): 0.029429752703832524




params: 0.01, 0.1, 0.25
CCA score (train): 0.6913181072770995
CCA score (test): 0.004415235917835103




params: 0.01, 0.1, 0.5
CCA score (train): 0.4659750239480056
CCA score (test): 0.061215073398919184




params: 0.01, 0.1, 0.75
CCA score (train): 0.33638211245869454
CCA score (test): 0.036540261802811225




params: 0.01, 0.3, 0.25
CCA score (train): 0.3447989319367104
CCA score (test): 0.03859629608133286




params: 0.01, 0.3, 0.5
CCA score (train): 0.13506335485566456
CCA score (test): 0.0035493447421299784


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9798468620478435
CCA score (test): 0.029381851045878093




params: 0.1, 0.01, 0.5
CCA score (train): 0.9487255196513789
CCA score (test): 0.06660026043901746




params: 0.1, 0.01, 0.75
CCA score (train): 0.9108876976001126
CCA score (test): 0.09774407307663457




params: 0.1, 0.1, 0.25
CCA score (train): 0.6839472548648005
CCA score (test): 0.012157561152701835




params: 0.1, 0.1, 0.5
CCA score (train): 0.45017974497391
CCA score (test): 0.055103149627817816




params: 0.1, 0.1, 0.75
CCA score (train): 0.31801325455376706
CCA score (test): 0.03247618210884218




params: 0.1, 0.3, 0.25
CCA score (train): 0.33465766388054696
CCA score (test): 0.03873963538040348




params: 0.1, 0.3, 0.5
CCA score (train): 0.12781531496139031
CCA score (test): -0.003211407537715205


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold3.csv
<class 'numpy.ndarray'>
(2633, 1)
(2633, 8)
(2633, 7095)
(293, 8)
(293, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9805457645593552
CCA score (test): -0.013135545939806814




params: 0.001, 0.01, 0.5
CCA score (train): 0.95257540119233
CCA score (test): -0.010680010246299387




params: 0.001, 0.01, 0.75
CCA score (train): 0.920304362476625
CCA score (test): -0.01591629856827459




params: 0.001, 0.1, 0.25
CCA score (train): 0.6998105470206188
CCA score (test): 0.02160931188668358




params: 0.001, 0.1, 0.5
CCA score (train): 0.479021796711083
CCA score (test): 0.008910771899363512




params: 0.001, 0.1, 0.75
CCA score (train): 0.3411254890001043
CCA score (test): 0.014223771112437333




params: 0.001, 0.3, 0.25
CCA score (train): 0.35384348830918455
CCA score (test): 0.01707468762955866




params: 0.001, 0.3, 0.5
CCA score (train): 0.158015161417413
CCA score (test): 0.026145718176643573


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9803195661092574
CCA score (test): -0.015991550378981145




params: 0.01, 0.01, 0.5
CCA score (train): 0.9522622845317708
CCA score (test): -0.01042952391208285




params: 0.01, 0.01, 0.75
CCA score (train): 0.9202238845115032
CCA score (test): -0.015069714958606317




params: 0.01, 0.1, 0.25
CCA score (train): 0.6989664274690939
CCA score (test): 0.020798872878843078




params: 0.01, 0.1, 0.5
CCA score (train): 0.4789305444374512
CCA score (test): 0.0025860117797786852




params: 0.01, 0.1, 0.75
CCA score (train): 0.3418397465022094
CCA score (test): 0.005837461438495062




params: 0.01, 0.3, 0.25
CCA score (train): 0.3533735323554972
CCA score (test): 0.0008173838569964786




params: 0.01, 0.3, 0.5
CCA score (train): 0.14949492472453874
CCA score (test): 0.03166249052772052


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9788168669564337
CCA score (test): -0.03386905514948779




params: 0.1, 0.01, 0.5
CCA score (train): 0.9459327691250701
CCA score (test): 0.024435197139822007




params: 0.1, 0.01, 0.75
CCA score (train): 0.9166235063554078
CCA score (test): -0.006359806430416115




params: 0.1, 0.1, 0.25
CCA score (train): 0.6941871337004042
CCA score (test): 0.01474148224060512




params: 0.1, 0.1, 0.5
CCA score (train): 0.4657461968328245
CCA score (test): 0.019506185078807192




params: 0.1, 0.1, 0.75
CCA score (train): 0.3070533173506942
CCA score (test): 0.02043425127223175




params: 0.1, 0.3, 0.25
CCA score (train): 0.34076321306816904
CCA score (test): 0.010289212082495336




params: 0.1, 0.3, 0.5
CCA score (train): 0.14314828401598656
CCA score (test): 0.03807699968879086


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold4.csv
<class 'numpy.ndarray'>
(2633, 1)
(2633, 8)
(2633, 7095)
(293, 8)
(293, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.980342067456939
CCA score (test): 0.1119321687887016




params: 0.001, 0.01, 0.5
CCA score (train): 0.9524766403706384
CCA score (test): 0.13059538381044877




params: 0.001, 0.01, 0.75
CCA score (train): 0.9202703682346054
CCA score (test): 0.12625803534308955




params: 0.001, 0.1, 0.25
CCA score (train): 0.6961425493625879
CCA score (test): 0.16225807874938036




params: 0.001, 0.1, 0.5
CCA score (train): 0.4823662801899764
CCA score (test): 0.17204878983167649




params: 0.001, 0.1, 0.75
CCA score (train): 0.31391503053820924
CCA score (test): 0.17118995092634703




params: 0.001, 0.3, 0.25
CCA score (train): 0.3286314997690003
CCA score (test): 0.18703356698244322




params: 0.001, 0.3, 0.5
CCA score (train): 0.13438115886976743
CCA score (test): 0.12114970554400561


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9801623390290102
CCA score (test): 0.11085660630255689




params: 0.01, 0.01, 0.5
CCA score (train): 0.9522289323045006
CCA score (test): 0.13254459190388257




params: 0.01, 0.01, 0.75
CCA score (train): 0.9189567868778006
CCA score (test): 0.13321696246017334




params: 0.01, 0.1, 0.25
CCA score (train): 0.696825825336755
CCA score (test): 0.1673236742145754




params: 0.01, 0.1, 0.5
CCA score (train): 0.48093504241871177
CCA score (test): 0.1791658937339058




params: 0.01, 0.1, 0.75
CCA score (train): 0.3256802006268189
CCA score (test): 0.17788242343363048




params: 0.01, 0.3, 0.25
CCA score (train): 0.3302376774652336
CCA score (test): 0.19160643307502823




params: 0.01, 0.3, 0.5
CCA score (train): 0.13010569429869934
CCA score (test): 0.12171660996000244


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9788284336494759
CCA score (test): 0.07460377931719653




params: 0.1, 0.01, 0.5
CCA score (train): 0.9489363335170715
CCA score (test): 0.05515244460751845




params: 0.1, 0.01, 0.75
CCA score (train): 0.9135226994414019
CCA score (test): 0.05395075997325516




params: 0.1, 0.1, 0.25
CCA score (train): 0.6901080228580245
CCA score (test): 0.177226067829916




params: 0.1, 0.1, 0.5
CCA score (train): 0.46941094028692154
CCA score (test): 0.15815369238547605




params: 0.1, 0.1, 0.75
CCA score (train): 0.2968682694455729
CCA score (test): 0.13047059043252673




params: 0.1, 0.3, 0.25
CCA score (train): 0.3186557853224201
CCA score (test): 0.18362127730160926




params: 0.1, 0.3, 0.5
CCA score (train): 0.1067358208702407
CCA score (test): 0.04208050671691099


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold5.csv
<class 'numpy.ndarray'>
(2632, 1)
(2632, 8)
(2632, 7095)
(294, 8)
(294, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.97856940399684
CCA score (test): -0.08295447475955364




params: 0.001, 0.01, 0.5
CCA score (train): 0.94952769827646
CCA score (test): -0.04650107080134935




params: 0.001, 0.01, 0.75
CCA score (train): 0.9161132209455736
CCA score (test): -0.044444145475542096




params: 0.001, 0.1, 0.25
CCA score (train): 0.6976560387333652
CCA score (test): -0.038268688345697854




params: 0.001, 0.1, 0.5
CCA score (train): 0.48132423557656256
CCA score (test): -0.01601070412475325




params: 0.001, 0.1, 0.75
CCA score (train): 0.34728837354873754
CCA score (test): -0.011834264616307877




params: 0.001, 0.3, 0.25
CCA score (train): 0.3549899234521019
CCA score (test): -0.025841025464140688




params: 0.001, 0.3, 0.5
CCA score (train): 0.14673030253518693
CCA score (test): -0.05522051100615022


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9784963322689844
CCA score (test): -0.07688212209632983




params: 0.01, 0.01, 0.5
CCA score (train): 0.948520059613116
CCA score (test): -0.053268967388458766




params: 0.01, 0.01, 0.75
CCA score (train): 0.9144679899377302
CCA score (test): -0.043755098519970836




params: 0.01, 0.1, 0.25
CCA score (train): 0.6947464412078634
CCA score (test): -0.04433964313294925




params: 0.01, 0.1, 0.5
CCA score (train): 0.48158823821327146
CCA score (test): -0.016181957370725142




params: 0.01, 0.1, 0.75
CCA score (train): 0.34387079501185225
CCA score (test): 0.0035721002999031803




params: 0.01, 0.3, 0.25
CCA score (train): 0.3544870631987185
CCA score (test): -0.021927783836182657




params: 0.01, 0.3, 0.5
CCA score (train): 0.1453700690125035
CCA score (test): -0.047638267824613334


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9776931276314109
CCA score (test): -0.047209645398683886




params: 0.1, 0.01, 0.5
CCA score (train): 0.9478703776669675
CCA score (test): -0.02274765094711051




params: 0.1, 0.01, 0.75
CCA score (train): 0.913031506990134
CCA score (test): -0.019585748471934616




params: 0.1, 0.1, 0.25
CCA score (train): 0.6897467048161245
CCA score (test): -0.017153749042968403




params: 0.1, 0.1, 0.5
CCA score (train): 0.4657772648392089
CCA score (test): 0.051615062135435874




params: 0.1, 0.1, 0.75
CCA score (train): 0.30292678039642373
CCA score (test): 0.017954605857058548




params: 0.1, 0.3, 0.25
CCA score (train): 0.3440128520447563
CCA score (test): 0.008354720936031512




params: 0.1, 0.3, 0.5
CCA score (train): 0.14287623958337003
CCA score (test): -0.02945444223210658


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold6.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7095)
(292, 8)
(292, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9814665660232444
CCA score (test): -0.05539614164831119




params: 0.001, 0.01, 0.5
CCA score (train): 0.9536111522888839
CCA score (test): -0.07239521516257497




params: 0.001, 0.01, 0.75
CCA score (train): 0.9240316978975884
CCA score (test): -0.08271533565088984




params: 0.001, 0.1, 0.25
CCA score (train): 0.7095496833109345
CCA score (test): -0.062383884203463746




params: 0.001, 0.1, 0.5
CCA score (train): 0.4774815211372514
CCA score (test): -0.016522028305543834




params: 0.001, 0.1, 0.75
CCA score (train): 0.32384506385794065
CCA score (test): 0.043710058163272336




params: 0.001, 0.3, 0.25
CCA score (train): 0.33337344578640216
CCA score (test): 0.043807698676091134




params: 0.001, 0.3, 0.5
CCA score (train): 0.1428332076623513
CCA score (test): 0.02429490655255151


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9813425762147816
CCA score (test): -0.05595131228002925




params: 0.01, 0.01, 0.5
CCA score (train): 0.9535799965241158
CCA score (test): -0.0676051248861097




params: 0.01, 0.01, 0.75
CCA score (train): 0.9231236447265139
CCA score (test): -0.06893173661697527




params: 0.01, 0.1, 0.25
CCA score (train): 0.7060968786024646
CCA score (test): -0.0478993722447949




params: 0.01, 0.1, 0.5
CCA score (train): 0.4749881280666013
CCA score (test): -0.010703355020857175




params: 0.01, 0.1, 0.75
CCA score (train): 0.3262692638299116
CCA score (test): 0.03590107659726405




params: 0.01, 0.3, 0.25
CCA score (train): 0.3323333658560106
CCA score (test): 0.044465111246786204




params: 0.01, 0.3, 0.5
CCA score (train): 0.14254587409401354
CCA score (test): 0.026098237945965108


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9805351025759279
CCA score (test): -0.06259598099487451




params: 0.1, 0.01, 0.5
CCA score (train): 0.9517870111815794
CCA score (test): -0.05820809067350852




params: 0.1, 0.01, 0.75
CCA score (train): 0.9197465839718006
CCA score (test): -0.03831503422811777




params: 0.1, 0.1, 0.25
CCA score (train): 0.6964213094648095
CCA score (test): -0.03668599990549248




params: 0.1, 0.1, 0.5
CCA score (train): 0.4475927518706688
CCA score (test): 0.050043387969679776




params: 0.1, 0.1, 0.75
CCA score (train): 0.3129268940818952
CCA score (test): 0.005782483175088338




params: 0.1, 0.3, 0.25
CCA score (train): 0.32198205323111395
CCA score (test): 0.04816228029067271




params: 0.1, 0.3, 0.5
CCA score (train): 0.13499650421321974
CCA score (test): 0.030736545779567193


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold7.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7095)
(292, 8)
(292, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9814632985255374
CCA score (test): -0.09276133656977759




params: 0.001, 0.01, 0.5
CCA score (train): 0.9535246625516278
CCA score (test): -0.1178886606458428




params: 0.001, 0.01, 0.75
CCA score (train): 0.9216590925528825
CCA score (test): -0.14049073305415627




params: 0.001, 0.1, 0.25
CCA score (train): 0.7068391478873246
CCA score (test): -0.1012030674008042




params: 0.001, 0.1, 0.5
CCA score (train): 0.47066705425277533
CCA score (test): 0.0001704860481179793




params: 0.001, 0.1, 0.75
CCA score (train): 0.3404773821408593
CCA score (test): -0.04291262147165564




params: 0.001, 0.3, 0.25
CCA score (train): 0.3479383183483784
CCA score (test): -0.033249420107354544




params: 0.001, 0.3, 0.5
CCA score (train): 0.14710923539671383
CCA score (test): 0.012498571514693246


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9814214070245342
CCA score (test): -0.09518748492312112




params: 0.01, 0.01, 0.5
CCA score (train): 0.9532541345457226
CCA score (test): -0.12498664493604839




params: 0.01, 0.01, 0.75
CCA score (train): 0.9207873313127757
CCA score (test): -0.15137164932989478




params: 0.01, 0.1, 0.25
CCA score (train): 0.7066536994705703
CCA score (test): -0.10645325131849714




params: 0.01, 0.1, 0.5
CCA score (train): 0.4689877964120408
CCA score (test): -0.01373298838186976




params: 0.01, 0.1, 0.75
CCA score (train): 0.33977234164870485
CCA score (test): -0.047505613937792956




params: 0.01, 0.3, 0.25
CCA score (train): 0.3478189875395472
CCA score (test): -0.03542453199922735




params: 0.01, 0.3, 0.5
CCA score (train): 0.14288700117443787
CCA score (test): 0.017619701454306913


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9809097350945157
CCA score (test): -0.09399976565014145




params: 0.1, 0.01, 0.5
CCA score (train): 0.9500295656461495
CCA score (test): -0.12386518286581882




params: 0.1, 0.01, 0.75
CCA score (train): 0.9140486224798976
CCA score (test): -0.11430617388691044




params: 0.1, 0.1, 0.25
CCA score (train): 0.6984531340175306
CCA score (test): -0.10561878155733939




params: 0.1, 0.1, 0.5
CCA score (train): 0.46196505401503374
CCA score (test): -0.020312896459925844




params: 0.1, 0.1, 0.75
CCA score (train): 0.3142555855869107
CCA score (test): -0.03842956855252544




params: 0.1, 0.3, 0.25
CCA score (train): 0.33597999536991097
CCA score (test): -0.026579411623560256




params: 0.1, 0.3, 0.5
CCA score (train): 0.14113592508244777
CCA score (test): 0.009117439161935081


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold8.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7095)
(292, 8)
(292, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9813110795952438
CCA score (test): -0.0451007576625041




params: 0.001, 0.01, 0.5
CCA score (train): 0.9542408266396631
CCA score (test): -0.005686318062536055




params: 0.001, 0.01, 0.75
CCA score (train): 0.9221865984845969
CCA score (test): 0.022123023698671807




params: 0.001, 0.1, 0.25
CCA score (train): 0.6820374245626848
CCA score (test): 0.11712843561986053




params: 0.001, 0.1, 0.5
CCA score (train): 0.472431669071264
CCA score (test): 0.06905340609536914




params: 0.001, 0.1, 0.75
CCA score (train): 0.3437521815971114
CCA score (test): 0.03715801601007174




params: 0.001, 0.3, 0.25
CCA score (train): 0.34544961993745704
CCA score (test): 0.02519816241552908




params: 0.001, 0.3, 0.5
CCA score (train): 0.12092609292475065
CCA score (test): 0.06574858325655919


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9812089374197694
CCA score (test): -0.04884887804436144




params: 0.01, 0.01, 0.5
CCA score (train): 0.9538342088197986
CCA score (test): -0.007615467368865758




params: 0.01, 0.01, 0.75
CCA score (train): 0.9221978461744027
CCA score (test): 0.02839098053062994




params: 0.01, 0.1, 0.25
CCA score (train): 0.6870426113936621
CCA score (test): 0.10795828927561324




params: 0.01, 0.1, 0.5
CCA score (train): 0.47460598481700744
CCA score (test): 0.08576910545709104




params: 0.01, 0.1, 0.75
CCA score (train): 0.3397910557462567
CCA score (test): 0.10544945177459142




params: 0.01, 0.3, 0.25
CCA score (train): 0.34462071624336743
CCA score (test): 0.05287632123097952




params: 0.01, 0.3, 0.5
CCA score (train): 0.12443281507247295
CCA score (test): 0.0768310042207665


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9801321283335582
CCA score (test): -0.06646452592165975




params: 0.1, 0.01, 0.5
CCA score (train): 0.951522964002774
CCA score (test): 0.06880335393786652




params: 0.1, 0.01, 0.75
CCA score (train): 0.9183127335017129
CCA score (test): 0.10074162694219857




params: 0.1, 0.1, 0.25
CCA score (train): 0.684742667889501
CCA score (test): 0.09639298034673582




params: 0.1, 0.1, 0.5
CCA score (train): 0.4647665359196387
CCA score (test): 0.11869237487214157




params: 0.1, 0.1, 0.75
CCA score (train): 0.31648780749214467
CCA score (test): 0.11971725537603883




params: 0.1, 0.3, 0.25
CCA score (train): 0.33202019390187454
CCA score (test): 0.15005953807387584




params: 0.1, 0.3, 0.5
CCA score (train): 0.12494346844564808
CCA score (test): 0.10115364415612493


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold9.csv
<class 'numpy.ndarray'>
(2633, 1)
(2633, 8)
(2633, 7095)
(293, 8)
(293, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9801482051272572
CCA score (test): -0.1175074710395817




params: 0.001, 0.01, 0.5
CCA score (train): 0.9517139287399012
CCA score (test): -0.09011194415945722




params: 0.001, 0.01, 0.75
CCA score (train): 0.9208328160532855
CCA score (test): -0.057104803118216196




params: 0.001, 0.1, 0.25
CCA score (train): 0.6985396060326599
CCA score (test): -0.027960518048945615




params: 0.001, 0.1, 0.5
CCA score (train): 0.48210896863413955
CCA score (test): 0.03370877748667578




params: 0.001, 0.1, 0.75
CCA score (train): 0.35203578942489
CCA score (test): 0.043354175567209996




params: 0.001, 0.3, 0.25
CCA score (train): 0.362305888084286
CCA score (test): 0.05515895534197757




params: 0.001, 0.3, 0.5
CCA score (train): 0.13588910906829654
CCA score (test): 0.011861085130615567


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9799938653683673
CCA score (test): -0.11770544169255648




params: 0.01, 0.01, 0.5
CCA score (train): 0.9511484073993652
CCA score (test): -0.09193569874000751




params: 0.01, 0.01, 0.75
CCA score (train): 0.9191623000855063
CCA score (test): -0.06988187944052493




params: 0.01, 0.1, 0.25
CCA score (train): 0.6973654963285547
CCA score (test): -0.032487257361625055




params: 0.01, 0.1, 0.5
CCA score (train): 0.47658202428671625
CCA score (test): 0.022034981681472443




params: 0.01, 0.1, 0.75
CCA score (train): 0.3471334596020288
CCA score (test): 0.023169149275648504




params: 0.01, 0.3, 0.25
CCA score (train): 0.358715755395981
CCA score (test): 0.04733425857634588




params: 0.01, 0.3, 0.5
CCA score (train): 0.13264247989593958
CCA score (test): 0.032166634526068494


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9788120649578311
CCA score (test): -0.12216520170726475




params: 0.1, 0.01, 0.5
CCA score (train): 0.9473690003470305
CCA score (test): -0.06057161989137705




params: 0.1, 0.01, 0.75
CCA score (train): 0.9110059396036714
CCA score (test): -0.04017356507972947




params: 0.1, 0.1, 0.25
CCA score (train): 0.6900611354181514
CCA score (test): -0.039659921078442295




params: 0.1, 0.1, 0.5
CCA score (train): 0.4625252725279094
CCA score (test): -0.013077217712187394




params: 0.1, 0.1, 0.75
CCA score (train): 0.3223135944650719
CCA score (test): -0.053620191273066053




params: 0.1, 0.3, 0.25
CCA score (train): 0.3371637129400664
CCA score (test): 0.037688871153382264




params: 0.1, 0.3, 0.5
CCA score (train): 0.13584866659314598
CCA score (test): 0.015750359389104407


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan
/Users/marshlab2/Desktop/OCS_bigdata_ML_local/FINAL/replication/data/PartitionsEnCCA/partitions2_rep01fold10.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7095)
(292, 8)
(292, 7095)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9817916347420024
CCA score (test): 0.07041949043439599




params: 0.001, 0.01, 0.5
CCA score (train): 0.9535977694324234
CCA score (test): 0.08464660360076826




params: 0.001, 0.01, 0.75
CCA score (train): 0.9198245520246475
CCA score (test): 0.08308430130050515




params: 0.001, 0.1, 0.25
CCA score (train): 0.6947887565154018
CCA score (test): 0.05756822465281908




params: 0.001, 0.1, 0.5
CCA score (train): 0.4774585142048924
CCA score (test): 0.08664937832855357




params: 0.001, 0.1, 0.75
CCA score (train): 0.3276637719352187
CCA score (test): 0.04005824421552129




params: 0.001, 0.3, 0.25
CCA score (train): 0.33542386598613416
CCA score (test): 0.07945956800696652




params: 0.001, 0.3, 0.5
CCA score (train): 0.14642768981548393
CCA score (test): 0.013660086485335343


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9816748838476244
CCA score (test): 0.07137191664014075




params: 0.01, 0.01, 0.5
CCA score (train): 0.9533334428203619
CCA score (test): 0.08889506109668233




params: 0.01, 0.01, 0.75
CCA score (train): 0.9197242231369032
CCA score (test): 0.08329226773933329




params: 0.01, 0.1, 0.25
CCA score (train): 0.69372576380198
CCA score (test): 0.05523579764964981




params: 0.01, 0.1, 0.5
CCA score (train): 0.47818635628641437
CCA score (test): 0.09402424866681458




params: 0.01, 0.1, 0.75
CCA score (train): 0.321554714565232
CCA score (test): 0.049957429812833354




params: 0.01, 0.3, 0.25
CCA score (train): 0.3328356412162763
CCA score (test): 0.08083213562960379




params: 0.01, 0.3, 0.5
CCA score (train): 0.14506099405644624
CCA score (test): 0.015297211798138033


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.9806461001011462
CCA score (test): 0.07016648503970435




params: 0.1, 0.01, 0.5
CCA score (train): 0.95085375029125
CCA score (test): 0.09197478073418597




params: 0.1, 0.01, 0.75
CCA score (train): 0.9139971894144339
CCA score (test): 0.13497750253742113




params: 0.1, 0.1, 0.25
CCA score (train): 0.6845989734063129
CCA score (test): 0.05949816033481903




params: 0.1, 0.1, 0.5
CCA score (train): 0.47222235735411644
CCA score (test): 0.11710950552665977




params: 0.1, 0.1, 0.75
CCA score (train): 0.27158240915685905
CCA score (test): 0.0820297674003021




params: 0.1, 0.3, 0.25
CCA score (train): 0.2954252701584239
CCA score (test): 0.08959664252957777




params: 0.1, 0.3, 0.5
CCA score (train): 0.12172856078029914
CCA score (test): 0.02632777698867983


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75
CCA score (train): nan
CCA score (test): nan


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


In [16]:
#scratch - check that output is working
eCCA_eachfold_score_TEST

array([[0.013357523997242104, 0.024015047696634095, 0.02387254337830269,
        0.06494228628703813, 0.08034762000959406, 0.10798699701271675,
        0.12444584244389345, 0.08597932757408677, nan,
        0.013559102295631398, 0.029059335097993078, 0.02491663518597864,
        0.07845811393921442, 0.10910731790413197, 0.13494631597653717,
        0.12501566422496047, 0.0870423873770807, nan,
        -0.006136543134060868, 0.023226432571726185,
        -0.025336165215354978, 0.05858600877003117, 0.09987000197796037,
        0.07943119655256825, 0.13248368284664602, 0.07239435000105865,
        nan],
       [0.021822279437953007, 0.018427258205131025, 0.03899354407002109,
        -0.00036620678165255605, 0.0718568661598038, 0.04000342232571574,
        0.04201501406959407, 0.007270660414080421, nan,
        0.02283531872582545, 0.014161055912492726, 0.029429752703832524,
        0.004415235917835103, 0.061215073398919184, 0.036540261802811225,
        0.03859629608133286, 0.00354934474

In [17]:
#average TRAIN and TEST correlations over CV folds

#print(eCCA_eachfold_score_TRAIN[0:4,:])
eCCA_average_score_TRAIN = np.nanmean(eCCA_eachfold_score_TRAIN[:,:], axis=0)
print(eCCA_average_score_TRAIN)

print('\n')

eCCA_average_score_TEST = np.nanmean(eCCA_eachfold_score_TEST[:,:], axis=0)
print(eCCA_average_score_TEST)
print(max(eCCA_average_score_TEST))

print('\n')
print(eCCA_eachfold_c1)
print('\n')
print(eCCA_eachfold_c2)
print('\n')
print(eCCA_eachfold_l1_ratio)

#generate visualization/heatmap of correlation by params

ZeroDivisionError: division by zero

In [19]:
# save files
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/discovery-selected-features/hyperparam_tuning/eCCA_eachfold_c1.csv', eCCA_eachfold_c1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/discovery-selected-features/hyperparam_tuning/eCCA_eachfold_c2.csv', eCCA_eachfold_c2, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/discovery-selected-features/hyperparam_tuning/eCCA_eachfold_l1_ratio.csv', eCCA_eachfold_l1_ratio, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/discovery-selected-features/hyperparam_tuning/eCCA_eachfold_score_TRAIN.csv', eCCA_eachfold_score_TRAIN, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/discovery-selected-features/hyperparam_tuning/eCCA_eachfold_score_TEST.csv', eCCA_eachfold_score_TEST, delimiter=',')

# END OF HYPERPARAMETER OPTIMIZATION CODE


In [20]:
#Rerun elastic net CCA using full discovery sample and optimized hyperparameters
from cca_zoo.models import ElasticCCA
eCCA_model = ElasticCCA(max_iter=1000000000)

#discovery-selected features
optim_c1 = 0.1
optim_c2 = 0.3
optim_l1_ratio = 0.25
#replication-selected features
#optim_c1 = 0.001
#optim_c2 = 0.01
#optim_l1_ratio = 0.75

#load selected feature indices file
allsubsample_selected_features = np.transpose(stable_features[:,:])
print(allsubsample_selected_features.shape)

allsubsample_rsFC_features = rsFC_features[:, allsubsample_selected_features[:,0] - 1]
#print(alldiscovery_rsFC_features)
print(allsubsample_rsFC_features.shape)

dims = min(np.linalg.matrix_rank(allsubsample_rsFC_features),np.linalg.matrix_rank(clin_features))
print('dims: ' + str(dims))

eCCA_fit = ElasticCCA(c = (optim_c1, optim_c2), l1_ratio = optim_l1_ratio, latent_dims=dims).fit([clin_features, allsubsample_rsFC_features])
print('params: ' + str(optim_c1) + ', ' + str(optim_c2) + ', ' + str(optim_l1_ratio))
allsubsample_scores = eCCA_fit.score((clin_features, allsubsample_rsFC_features))
print('All subsample CCA scores: ' + str(allsubsample_scores))

#calculate transformed canonical variate values
U,V = eCCA_fit.transform([clin_features, allsubsample_rsFC_features])
#U1,V1 = eCCA_fit.fit_transform([clin_features, alldiscovery_rsFC_features])



(7095, 1)
(2926, 7095)
dims: 8




params: 0.1, 0.3, 0.25
All subsample CCA scores: [0.30785829 0.2588202  0.26790421 0.26115445 0.29509618 0.20255988
 0.25854365 0.26562124]


In [21]:
# View clinical loadings
#alldiscovery_loadings = eCCA_fit.get_loadings((clin_features, alldiscovery_rsFC_features))
allsubsample_loadings = eCCA_fit.get_loadings((clin_features, allsubsample_rsFC_features), normalize=True)
allsubsample_loadings_u = allsubsample_loadings[0]
allsubsample_loadings_v = allsubsample_loadings[1]

print(allsubsample_loadings_u.shape)
print(allsubsample_loadings_v.shape)

print('All subsample - clinical loadings CV1: \n' + str(allsubsample_loadings_u[:,0]))
print('All subsample - clinical loadings CV2: \n' + str(allsubsample_loadings_u[:,1]))
print('All subsample - clinical loadings CV3: \n' + str(allsubsample_loadings_u[:,2]))
print('All subsample - clinical loadings CV4: \n' + str(allsubsample_loadings_u[:,3]))
print('All subsample - clinical loadings CV5: \n' + str(allsubsample_loadings_u[:,4]))
print('All subsample - clinical loadings CV6: \n' + str(allsubsample_loadings_u[:,5]))
print('All subsample - clinical loadings CV7: \n' + str(allsubsample_loadings_u[:,6]))
print('All subsample - clinical loadings CV8: \n' + str(allsubsample_loadings_u[:,7]))

print(clin_features@allsubsample_loadings_u[:,0])


(8, 8)
(7095, 8)
All subsample - clinical loadings CV1: 
[-36.23461112 -23.26651035 -17.64034161 -18.27827868 -30.87635264
 -15.65411745 -15.93798782 -34.31117888]
All subsample - clinical loadings CV2: 
[  9.3414226   26.70461106  38.43223616  34.6054645    2.96560336
  -2.70985046 -14.55728883  17.91307486]
All subsample - clinical loadings CV3: 
[12.60788172  7.00976173  4.05623745  4.14789932 16.04268573 50.92832773
 19.75479543  7.94648911]
All subsample - clinical loadings CV4: 
[ -3.19814589  -5.82987734  18.01401483 -34.08248801  -3.37321765
  -1.03016694  -3.00194069  -2.88774666]
All subsample - clinical loadings CV5: 
[ -3.90879916   2.11663587   5.90217808   2.90245317 -40.08579763
  -9.53889899  -6.76321853  22.40986241]
All subsample - clinical loadings CV6: 
[19.14087767 54.74574608 20.0493424  22.71831906 13.43562577 14.94354819
 28.44842045 19.2110289 ]
All subsample - clinical loadings CV7: 
[18.12800489 10.34120401 25.31715538 20.45330288 15.92632801 20.59203329
 51.

In [22]:
# Save clin loadings
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV1norm.csv', allsubsample_loadings_u[:,0], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV2norm.csv', allsubsample_loadings_u[:,1], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV3norm.csv', allsubsample_loadings_u[:,2], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV4norm.csv', allsubsample_loadings_u[:,3], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV5norm.csv', allsubsample_loadings_u[:,4], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV6norm.csv', allsubsample_loadings_u[:,5], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV7norm.csv', allsubsample_loadings_u[:,6], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_u_CV8norm.csv', allsubsample_loadings_u[:,7], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_clinloadings_allnorm.csv', allsubsample_loadings_u, delimiter=',')



In [23]:
# View rs-FC loadings
allsubsample_loadings_v_CV1 = np.argsort(abs(allsubsample_loadings_v[:,0]))
#print(alldiscovery_loadings_v_CV1.shape)
print('CV1')
print(allsubsample_loadings_v_CV1[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV1[-20:],0])

allsubsample_loadings_v_CV2 = np.argsort(abs(allsubsample_loadings_v[:,1]))
print('CV2')
print(allsubsample_loadings_v_CV2[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV2[-20:],1])

allsubsample_loadings_v_CV3 = np.argsort(abs(allsubsample_loadings_v[:,2]))
print('CV3')
print(allsubsample_loadings_v_CV3[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV3[-20:],2])

allsubsample_loadings_v_CV4 = np.argsort(abs(allsubsample_loadings_v[:,3]))
print('CV4')
print(allsubsample_loadings_v_CV4[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV4[-20:],3])

allsubsample_loadings_v_CV5 = np.argsort(abs(allsubsample_loadings_v[:,4]))
allsubsample_loadings_v_CV6 = np.argsort(abs(allsubsample_loadings_v[:,5]))
allsubsample_loadings_v_CV7 = np.argsort(abs(allsubsample_loadings_v[:,6]))
allsubsample_loadings_v_CV8 = np.argsort(abs(allsubsample_loadings_v[:,7]))

CV1
[ 857  796  847 3465  347  350 2524  998  300 2051 3472 3249 3147 4210
  545  303   22 2135  537 1922]
[ -8.56942051  -8.63677465   8.67595258  -8.72420353  -8.76401239
  -9.29641937  -9.43080467  -9.44001602  -9.56158988  -9.60563173
  -9.68021515  -9.84486576 -10.04585609 -10.47377977 -10.60572908
 -10.82796689 -11.03556594 -11.04612459 -11.86851614 -13.27780614]
CV2
[5303 5269 1734 3456 4227 1870 5268 3467 4208 1717  415  396 6617 1869
  349 5371 1174  811 1876 5279]
[ 8.40205833  8.41959919  8.57862391  8.58194903  8.6342477   8.65066862
  8.72558863  8.73074393  8.87976333  8.89484759  8.90162788  8.90520553
  8.93514099  8.96268139  9.09284772  9.68094063 10.69536119 10.80273622
 11.03233733 11.30948912]
CV3
[5308 2149 2144 4931 2896 1842 1719 5396 4839 1723 4893 1066 4892 4934
 5487 1202 4917 1737 4886 4905]
[ -6.66086047   6.71116324   6.81531304   6.89992707   6.92103118
  -6.92511015  -7.01070296  -7.10965053   7.21819504   7.25606092
  -7.3051886    7.38681285  -7.660663

In [24]:
# Save rsFC loadings
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV1.csv', allsubsample_loadings_v_CV1 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV1.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV1,0], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV2.csv', allsubsample_loadings_v_CV2 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV2.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV2,1], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV3.csv', allsubsample_loadings_v_CV3 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV3.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV3,2], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV4.csv', allsubsample_loadings_v_CV4 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV4.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV4,3], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV5.csv', allsubsample_loadings_v_CV5 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV5.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV5,4], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV6.csv', allsubsample_loadings_v_CV6 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV6.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV6,5], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV7.csv', allsubsample_loadings_v_CV7 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV7.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV7,6], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCinds_v_CV8.csv', allsubsample_loadings_v_CV8 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/allreplication_rsFCloadings_v_CV8.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV8,7], delimiter=',')



In [25]:
# Save files
#discovery
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/transformed_clin_U.csv', U, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/replication/results/elasticnetCCA_scaled/transformed_rsFC_V.csv', V, delimiter=',')


In [32]:
alldiscovery_scores = eCCA_fit.score((clin_features, alldiscovery_rsFC_features))
print('All discovery CCA scores: ' + str(alldiscovery_scores))



NameError: name 'alldiscovery_rsFC_features' is not defined

In [41]:
np.corrcoef(U[:,1], V[:,1])

array([[1.      , 0.792935],
       [0.792935, 1.      ]])