In [None]:
#setup - one-time tasks (run from terminal, not jupyter)
pip install cca-zoo

In [1]:
#import libraries
import pandas as pd
import numpy as np
import scipy
import os

userhome = os.path.expanduser('~')

In [2]:
# load residualized and scaled clinical features (from R script)
#discovery
clin_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/data/clin_features_scaled.csv'
#replication
#clin_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/replication/clin_features_scaled.csv'

clin_features = pd.read_csv(clin_file)
clin_features = clin_features.to_numpy()
print(type(clin_features))
print(clin_features.shape)

<class 'numpy.ndarray'>
(2846, 8)


In [3]:
# load residualized and scaled imaging features (from R script)
#discovery
rsFC_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/data/rsFC_features_scaled.csv'
#replication
#rsFC_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/replication/rsFC_features_scaled.csv'

rsFC_feat_chunks = pd.read_csv(rsFC_file, chunksize=1000)
rsFC_features = pd.concat(rsFC_feat_chunks)
#rsFC_features.sample(10)
rsFC_features = rsFC_features.to_numpy()
print(type(rsFC_features))
print(rsFC_features.shape)

<class 'numpy.ndarray'>
(2846, 61776)


In [4]:
# load selected rsFC features
#discovery
stable_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/discovery_stable_feats_95_Pearson05.csv'
#replication
#stable_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/replication/replication_stable_feats_95_Pearson05.csv'

stable_features = pd.read_csv(stable_features_file, header=None)
stable_features = stable_features.to_numpy()
stable_features = np.transpose(stable_features)
print(type(stable_features))
print(stable_features.shape)

# if using different features for each CV fold
#CVfold_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/crossValidFeatsPearsonNum.csv'
#CVfold_features = pd.read_csv(CVfold_features_file, header=None)
#CVfold_features = CVfold_features.to_numpy()
#print(type(CVfold_features))
#print(CVfold_features)
#print(CVfold_features.shape)

<class 'numpy.ndarray'>
(1, 7095)


In [7]:
# initialize arrays to store optimal hyperparams and scores from each CV loop
eCCA_eachfold_c1 = np.empty((10,27), dtype=object) 
eCCA_eachfold_c2 = np.empty((10,27), dtype=object) 
eCCA_eachfold_l1_ratio = np.empty((10,27), dtype=object)
eCCA_eachfold_score_TRAIN = np.empty((10,27), dtype=object) 
eCCA_eachfold_score_TEST = np.empty((10,27), dtype=object) 

print(eCCA_eachfold_c1)

[[None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None None None None None None None None None None None None
  None None None None None None None None None None None None None]
 [None None None Non

In [20]:
#hyperparam optimization
from cca_zoo.models import ElasticCCA

eCCA_model = ElasticCCA(max_iter=100000)
eCCA_param_grid = {"c" : [[1e-3,1e-2,1e-1], [1e-2,1e-1,3e-1]], "l1_ratio" : [0.25,0.5,0.75]}
# c is lasso alpha for each view
# l1_ratio is l1 ratio in lasso subproblems

for foldind in range(0,10):
    #foldind = 0 #remember python starts indexing at 0, while R and Matlab start at 1

    # load 10-fold cross validation (CV) splits
    #discovery
    #thisfold_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/partitions2_rep01fold0' + str(foldind + 1) + r'.csv'
    #replication
    thisfold_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/replication/partitions2_rep01fold0' + str(foldind + 1) + r'.csv'
    print(thisfold_file)
    thisfold_file = pd.read_csv(thisfold_file)
    thisfold_file = thisfold_file.to_numpy()
    print(type(thisfold_file))
    print(thisfold_file.shape)

    # for each CV fold, extract only the selected participants and features
    # NB: put this in loop later once troubleshooting complete
    # NB: row and col indices were generated in R/Matlab, which index from 1. python indexes from 0
    thisfold_clin_features = clin_features[(thisfold_file - 1), :]
    #print(thisfold_clin_features.shape)
    thisfold_clin_features = np.squeeze(thisfold_clin_features)
    print(thisfold_clin_features.shape)
    #print(thisfold_clin_features)

    thisfold_rsFC_features = rsFC_features[(thisfold_file - 1), stable_features.astype(int)-1]
    print(thisfold_rsFC_features.shape)

    #thisfoldfeats = CVfold_features[foldind, :][~np.isnan(CVfold_features[foldind, :])]
    #thisfold_rsFC_features = rsFC_features[(thisfold_file - 1), thisfoldfeats.astype(int)-1]
    #print(thisfold_rsFC_features.shape)
    #print(thisfold_rsFC_features)


    #test set for this CV fold
    thisfold_clin_features_TEST = np.delete(clin_features,thisfold_file-1, axis=0)
    print(thisfold_clin_features_TEST.shape)
    thisfold_rsFC_features_TEST = np.delete(rsFC_features,thisfold_file-1, axis=0)
    thisfold_rsFC_features_TEST = thisfold_rsFC_features_TEST[:, stable_features.astype(int) - 1]
    thisfold_rsFC_features_TEST = np.squeeze(thisfold_rsFC_features_TEST)
    #thisfold_rsFC_features_TEST = thisfold_rsFC_features_TEST[:, thisfoldfeats.astype(int) - 1]
    print(thisfold_rsFC_features_TEST.shape)
    
    paramind = 0
    for thisc1 in eCCA_param_grid["c"][0]:
        for thisc2 in eCCA_param_grid["c"][1]:
            for thisl1_ratio in eCCA_param_grid["l1_ratio"]:
                eCCA_fit = ElasticCCA(c = (thisc1, thisc2), l1_ratio = thisl1_ratio, latent_dims=1).fit([thisfold_clin_features, thisfold_rsFC_features])
                eCCA_eachfold_c1[foldind,paramind] = thisc1
                eCCA_eachfold_c2[foldind,paramind] = thisc2
                eCCA_eachfold_l1_ratio[foldind,paramind] = thisl1_ratio
                print('params: ' + str(thisc1) + ', ' + str(thisc2) + ', ' + str(thisl1_ratio))
                eCCA_eachfold_score_TRAIN[foldind,paramind] = eCCA_fit.score((thisfold_clin_features, thisfold_rsFC_features))[0]
                print('CCA score (train): ' + str(eCCA_eachfold_score_TRAIN[foldind,paramind]))
                eCCA_eachfold_score_TEST[foldind,paramind] = eCCA_fit.score((thisfold_clin_features_TEST, thisfold_rsFC_features_TEST))[0]
                print('CCA score (test): ' + str(eCCA_eachfold_score_TEST[foldind,paramind]))

                paramind = paramind + 1

/Users/marshlab2/Desktop/OCS_bigdata_ML_local/data/replication/partitions2_rep01fold10.csv
<class 'numpy.ndarray'>
(2634, 1)
(2634, 8)
(2634, 7853)
(292, 8)
(292, 7853)




params: 0.001, 0.01, 0.25
CCA score (train): 0.9818665531670472
CCA score (test): 0.24524451036301165




params: 0.001, 0.01, 0.5
CCA score (train): 0.9558678155816083
CCA score (test): 0.27172839048152775




params: 0.001, 0.01, 0.75
CCA score (train): 0.9292773234373344
CCA score (test): 0.29478251795411436




params: 0.001, 0.1, 0.25
CCA score (train): 0.7753955071439962
CCA score (test): 0.26964243666928955




params: 0.001, 0.1, 0.5
CCA score (train): 0.6340885155719884
CCA score (test): 0.1271415691419011




params: 0.001, 0.1, 0.75
CCA score (train): 0.5027421679509019
CCA score (test): 0.06879497796313272




params: 0.001, 0.3, 0.25
CCA score (train): 0.5129145502769972
CCA score (test): 0.08521070410940101




params: 0.001, 0.3, 0.5
CCA score (train): 0.18115645877155906
CCA score (test): 0.0891738504910875


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.001, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.01, 0.01, 0.25
CCA score (train): 0.9817680052069209
CCA score (test): 0.2481673366396413




params: 0.01, 0.01, 0.5
CCA score (train): 0.9557749700783513
CCA score (test): 0.2723258682663743




params: 0.01, 0.01, 0.75
CCA score (train): 0.9290365857536185
CCA score (test): 0.29382722445488874




params: 0.01, 0.1, 0.25
CCA score (train): 0.7748780868312104
CCA score (test): 0.26998577955477865




params: 0.01, 0.1, 0.5
CCA score (train): 0.6322929483462785
CCA score (test): 0.12655749990456178




params: 0.01, 0.1, 0.75
CCA score (train): 0.5018448847290089
CCA score (test): 0.06178845808820421




params: 0.01, 0.3, 0.25
CCA score (train): 0.5129898263243906
CCA score (test): 0.08354837132307269




params: 0.01, 0.3, 0.5
CCA score (train): 0.17346458943454257
CCA score (test): 0.07945891948756612


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.01, 0.3, 0.75


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


CCA score (train): nan
CCA score (test): nan




params: 0.1, 0.01, 0.25
CCA score (train): 0.981156214281647
CCA score (test): 0.2625293484627913




params: 0.1, 0.01, 0.5
CCA score (train): 0.9550791922406772
CCA score (test): 0.27170643105267134




params: 0.1, 0.01, 0.75
CCA score (train): 0.9238173882511678
CCA score (test): 0.2894052155173652




params: 0.1, 0.1, 0.25
CCA score (train): 0.7700640505676766
CCA score (test): 0.2668365650220246




params: 0.1, 0.1, 0.5
CCA score (train): 0.6164008557963172
CCA score (test): 0.12770647411941094




params: 0.1, 0.1, 0.75
CCA score (train): 0.4809772475610634
CCA score (test): 0.07453911948425684




params: 0.1, 0.3, 0.25
CCA score (train): 0.5038428435953324
CCA score (test): 0.08975066761465489




params: 0.1, 0.3, 0.5
CCA score (train): 0.12197268506653458
CCA score (test): -0.01439294291276183


  - np.outer(score, score) @ residual / np.dot(score, score).item()


params: 0.1, 0.3, 0.75
CCA score (train): nan
CCA score (test): nan


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


In [21]:
#scratch - check that output is working
eCCA_eachfold_score_TEST

array([[0.30222446088483745, 0.32420034684307675, 0.33429326504795753,
        0.2875740026810609, 0.15480230636800374, 0.10791159923769089,
        0.11292152097447494, 0.01742833588393289, nan,
        0.3013560609582586, 0.32342823189159375, 0.3240042418352318,
        0.2870933835012517, 0.15130460464974593, 0.08876832646959554,
        0.10862727415279005, 0.03470273023402126, nan,
        0.2823496515268751, 0.27662005111159926, 0.25379533174224456,
        0.2830147760703039, 0.17786646561276576, 0.0984419094949418,
        0.09610001901114629, 0.03136961707369146, nan],
       [0.21963502497958798, 0.2734917927859495, 0.312199542353387,
        0.2402829741264676, 0.14088763240907687, 0.12147914819185579,
        0.11262054901760199, 0.017811970674206368, nan,
        0.2241768573621683, 0.27586954023676635, 0.3140603840120937,
        0.24018517992204025, 0.15800529295420906, 0.10976897588709944,
        0.1130936328584089, 0.023489152454849282, nan, 0.258815138954827,
       

In [22]:
#average TRAIN and TEST correlations over CV folds

#print(eCCA_eachfold_score_TRAIN[0:4,:])
eCCA_average_score_TRAIN = np.nanmean(eCCA_eachfold_score_TRAIN[:,:], axis=0)
print(eCCA_average_score_TRAIN)

print('\n')

eCCA_average_score_TEST = np.nanmean(eCCA_eachfold_score_TEST[:,:], axis=0)
print(eCCA_average_score_TEST)
print(max(eCCA_average_score_TEST))

print('\n')
print(eCCA_eachfold_c1)
print('\n')
print(eCCA_eachfold_c2)
print('\n')
print(eCCA_eachfold_l1_ratio)

#generate visualization/heatmap of correlation by params

ZeroDivisionError: division by zero

In [24]:
# save files
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/eCCA_eachfold_c1.csv', eCCA_eachfold_c1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/eCCA_eachfold_c2.csv', eCCA_eachfold_c2, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/eCCA_eachfold_l1_ratio.csv', eCCA_eachfold_l1_ratio, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/eCCA_eachfold_score_TRAIN.csv', eCCA_eachfold_score_TRAIN, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/eCCA_eachfold_score_TEST.csv', eCCA_eachfold_score_TEST, delimiter=',')

# END OF HYPERPARAMETER OPTIMIZATION CODE


In [6]:
#Rerun elastic net CCA using full discovery sample and optimized hyperparameters
from cca_zoo.models import ElasticCCA
eCCA_model = ElasticCCA(max_iter=1000000000)

optim_c1 = 0.01
optim_c2 = 0.01
optim_l1_ratio = 0.75
#optim_c1 = 0.001
#optim_c2 = 0.01
#optim_l1_ratio = 0.75

#load selected feature indices file
allsubsample_selected_features = np.transpose(stable_features[:,:])
print(allsubsample_selected_features.shape)

#load selected feature indices file - alternative code block
#alldiscovery_selected_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/allDiscovery_selectedFeatsPearson.csv'
#alldiscovery_selected_features_file = userhome + r'/Desktop/OCS_bigdata_ML_local/data/allDiscovery_selectedFeatsSpearman.csv'
#alldiscovery_selected_features = pd.read_csv(alldiscovery_selected_features_file, header = None)
#alldiscovery_selected_features = alldiscovery_selected_features.to_numpy()
#print(alldiscovery_selected_features.shape)

allsubsample_rsFC_features = rsFC_features[:, allsubsample_selected_features[:,0] - 1]
#print(alldiscovery_rsFC_features)
print(allsubsample_rsFC_features.shape)

dims = min(np.linalg.matrix_rank(allsubsample_rsFC_features),np.linalg.matrix_rank(clin_features))
print('dims: ' + str(dims))

eCCA_fit = ElasticCCA(c = (optim_c1, optim_c2), l1_ratio = optim_l1_ratio, latent_dims=dims).fit([clin_features, allsubsample_rsFC_features])
print('params: ' + str(optim_c1) + ', ' + str(optim_c2) + ', ' + str(optim_l1_ratio))
allsubsample_scores = eCCA_fit.score((clin_features, allsubsample_rsFC_features))
print('All discovery CCA scores: ' + str(allsubsample_scores))

#calculate transformed canonical variate values
U,V = eCCA_fit.transform([clin_features, allsubsample_rsFC_features])
#U1,V1 = eCCA_fit.fit_transform([clin_features, alldiscovery_rsFC_features])



(7095, 1)
(2846, 7095)
dims: 8




params: 0.01, 0.01, 0.75
All discovery CCA scores: [0.91598883 0.86340471 0.80823536 0.68006699 0.64355504 0.70307455
 0.69152796 0.62821598]


In [8]:
# View clinical loadings
#alldiscovery_loadings = eCCA_fit.get_loadings((clin_features, alldiscovery_rsFC_features))
allsubsample_loadings = eCCA_fit.get_loadings((clin_features, allsubsample_rsFC_features), normalize=True)
allsubsample_loadings_u = allsubsample_loadings[0]
allsubsample_loadings_v = allsubsample_loadings[1]

print(allsubsample_loadings_u.shape)
print(allsubsample_loadings_v.shape)

print('All subsample - clinical loadings CV1: \n' + str(allsubsample_loadings_u[:,0]))
print('All subsample - clinical loadings CV2: \n' + str(allsubsample_loadings_u[:,1]))
print('All subsample - clinical loadings CV3: \n' + str(allsubsample_loadings_u[:,2]))
print('All subsample - clinical loadings CV4: \n' + str(allsubsample_loadings_u[:,3]))
print('All subsample - clinical loadings CV5: \n' + str(allsubsample_loadings_u[:,4]))
print('All subsample - clinical loadings CV6: \n' + str(allsubsample_loadings_u[:,5]))
print('All subsample - clinical loadings CV7: \n' + str(allsubsample_loadings_u[:,6]))
print('All subsample - clinical loadings CV8: \n' + str(allsubsample_loadings_u[:,7]))

print(clin_features@allsubsample_loadings_u[:,0])


(8, 8)
(7095, 8)
All subsample - clinical loadings CV1: 
[-29.87728398 -32.84983081 -17.7899728  -21.83395646 -33.11171109
 -32.24213927 -31.70820739 -30.56246484]
All subsample - clinical loadings CV2: 
[-19.50721211 -20.39291668 -45.32096311 -38.27131303  -5.98648856
   7.85647917  -0.66478871 -30.43564435]
All subsample - clinical loadings CV3: 
[-29.0733797   -0.80026369  -8.08636641  -8.75547724 -53.27694366
 -14.3327187    0.5671636  -25.27907334]
All subsample - clinical loadings CV4: 
[-0.90608016 66.27968297 16.16605231 20.17873163  6.16098722  5.11959725
  4.68256584 14.69063035]
All subsample - clinical loadings CV5: 
[-67.075833   -17.07174243   0.15018403 -16.27749895 -23.37626424
 -18.46033506 -18.69409442 -34.62252024]
All subsample - clinical loadings CV6: 
[18.83955419 19.36591198 14.86992767 39.82479859 19.54860999 53.70089834
  9.1889731  18.99078554]
All subsample - clinical loadings CV7: 
[-10.63135178 -15.92450723   7.74643613 -60.27605197  -6.28378588
  -5.457676

In [10]:
# Save clin loadings
#discovery
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV1norm.csv', allsubsample_loadings_u[:,0], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV2norm.csv', allsubsample_loadings_u[:,1], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV3norm.csv', allsubsample_loadings_u[:,2], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV4norm.csv', allsubsample_loadings_u[:,3], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV5norm.csv', allsubsample_loadings_u[:,4], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV6norm.csv', allsubsample_loadings_u[:,5], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV7norm.csv', allsubsample_loadings_u[:,6], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_u_CV8norm.csv', allsubsample_loadings_u[:,7], delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_clinloadings_allnorm.csv', allsubsample_loadings_u, delimiter=',')

#replication
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV1norm.csv', allsubsample_loadings_u[:,0], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV2norm.csv', allsubsample_loadings_u[:,1], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV3norm.csv', allsubsample_loadings_u[:,2], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV4norm.csv', allsubsample_loadings_u[:,3], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV5norm.csv', allsubsample_loadings_u[:,4], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV6norm.csv', allsubsample_loadings_u[:,5], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV7norm.csv', allsubsample_loadings_u[:,6], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_u_CV8norm.csv', allsubsample_loadings_u[:,7], delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_clinloadings_allnorm.csv', allsubsample_loadings_u, delimiter=',')


In [12]:
# View rs-FC loadings
allsubsample_loadings_v_CV1 = np.argsort(abs(allsubsample_loadings_v[:,0]))
#print(alldiscovery_loadings_v_CV1.shape)
print('CV1')
print(allsubsample_loadings_v_CV1[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV1[-20:],0])

allsubsample_loadings_v_CV2 = np.argsort(abs(allsubsample_loadings_v[:,1]))
print('CV2')
print(allsubsample_loadings_v_CV2[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV2[-20:],1])

allsubsample_loadings_v_CV3 = np.argsort(abs(allsubsample_loadings_v[:,2]))
print('CV3')
print(allsubsample_loadings_v_CV3[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV3[-20:],2])

allsubsample_loadings_v_CV4 = np.argsort(abs(allsubsample_loadings_v[:,3]))
print('CV4')
print(allsubsample_loadings_v_CV4[-20:] + 1)
print(allsubsample_loadings_v[allsubsample_loadings_v_CV4[-20:],3])

allsubsample_loadings_v_CV5 = np.argsort(abs(allsubsample_loadings_v[:,4]))
allsubsample_loadings_v_CV6 = np.argsort(abs(allsubsample_loadings_v[:,5]))
allsubsample_loadings_v_CV7 = np.argsort(abs(allsubsample_loadings_v[:,6]))
allsubsample_loadings_v_CV8 = np.argsort(abs(allsubsample_loadings_v[:,7]))

CV1
[4166 1603 1977 4383 3952  548 5346  545  762  243  763 2112 1792 3693
 1644 2314 6720 1628 6858 4419]
[ 4.57232405  4.57554598 -4.57782768 -4.61338109  4.61433741 -4.61516687
 -4.62774579 -4.62936985  4.6442467   4.69393567  4.7235962  -4.73059787
  4.75079366  4.83121524  4.8409002  -4.99066643  4.99685928 -5.02899547
  5.08979611  5.26335618]
CV2
[2673 6872 2590 6747 2678 6333 7061 3430 6334 6874 6330 2278 2285 6853
 6363 6339 6851 2310 6746 6446]
[ 4.51107257 -4.52521575  4.55528421 -4.57023451  4.57416041 -4.58818153
 -4.64289745  4.66471644 -4.67947967 -4.71046835 -4.71195027  4.71728935
  4.73602716 -4.7570617  -4.81742786 -4.85478599 -4.85732828  5.08943003
 -5.10504996 -5.23908342]
CV3
[2297 4932 4612 3461  595 1391 1388 4428 2533  856 3168 4764 6034 1854
 2804 1890 3848 6627 1708  964]
[-4.08465832 -4.0858767   4.09209689  4.09283472  4.1182668  -4.13030042
  4.14215869 -4.14308641 -4.14443257  4.16352929 -4.18789475  4.35178338
 -4.3822232  -4.38667182  4.4262888  -4.462

In [14]:
# Save rsFC loadings
#discovery
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV1.csv', allsubsample_loadings_v_CV1 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV1.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV1,0], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV2.csv', allsubsample_loadings_v_CV2 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV2.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV2,1], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV3.csv', allsubsample_loadings_v_CV3 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV3.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV3,2], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV4.csv', allsubsample_loadings_v_CV4 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV4.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV4,3], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV5.csv', allsubsample_loadings_v_CV5 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV5.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV5,4], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV6.csv', allsubsample_loadings_v_CV6 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV6.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV6,5], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV7.csv', allsubsample_loadings_v_CV7 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV7.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV7,6], delimiter=',')

np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCinds_v_CV8.csv', allsubsample_loadings_v_CV8 + 1, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/alldiscovery_rsFCloadings_v_CV8.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV8,7], delimiter=',')

#replication
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV1.csv', allsubsample_loadings_v_CV1 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV1.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV1,0], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV2.csv', allsubsample_loadings_v_CV2 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV2.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV2,1], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV3.csv', allsubsample_loadings_v_CV3 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV3.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV3,2], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV4.csv', allsubsample_loadings_v_CV4 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV4.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV4,3], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV5.csv', allsubsample_loadings_v_CV5 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV5.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV5,4], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV6.csv', allsubsample_loadings_v_CV6 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV6.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV6,5], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV7.csv', allsubsample_loadings_v_CV7 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV7.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV7,6], delimiter=',')

#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCinds_v_CV8.csv', allsubsample_loadings_v_CV8 + 1, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/allreplication_rsFCloadings_v_CV8.csv', allsubsample_loadings_v[allsubsample_loadings_v_CV8,7], delimiter=',')

In [15]:
# Save files
#discovery
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/transformed_clin_U.csv', U, delimiter=',')
np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/FINAL/discovery/results/elasticnetCCA_scaled/transformed_rsFC_V.csv', V, delimiter=',')

#replication
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/transformed_clin_U.csv', U, delimiter=',')
#np.savetxt(userhome + r'/Desktop/OCS_bigdata_ML_local/results/replication/transformed_rsFC_V.csv', V, delimiter=',')


In [32]:
alldiscovery_scores = eCCA_fit.score((clin_features, alldiscovery_rsFC_features))
print('All discovery CCA scores: ' + str(alldiscovery_scores))



NameError: name 'alldiscovery_rsFC_features' is not defined

In [41]:
np.corrcoef(U[:,1], V[:,1])

array([[1.      , 0.792935],
       [0.792935, 1.      ]])