In [1]:
import numpy as np
import scipy as sp
import scipy.stats
import os

import matplotlib.pyplot as plt

import tqdm

from joblib import Parallel, delayed

In [2]:
group_edge_counts = np.load('../../../gabriele/group_edge_counts.npy')
group_morphed_obs = np.load('../../../gabriele/group_morphed_obs.npy')

cortical_parcels = 102

In [3]:
subject_coverage = group_edge_counts[0,-cortical_parcels:, -cortical_parcels:,:]
subject_coverage = np.delete(subject_coverage, [0, 51], axis=0)
subject_coverage = np.delete(subject_coverage, [0, 51], axis=1)
subject_coverage[np.isnan(subject_coverage)] = 0
# save edge coverage in separate variable
edge_coverage = subject_coverage.copy()

In [4]:
frequencies = np.array(
        [
            110.        , 130.        , 150.        , 170.        ,
            190.        , 210.        , 230.        , 250.        ,
            270.        , 290.        , 310.        , 330.        ,
            350.        , 370.        , 390.        , 410.        ,
            430.        , 450.        
            ])

In [5]:

# no matter how many channel one subjects contributes to the overall group mean
# if he has one edge, he counts as one subject sampling that edge
subject_coverage  = np.sum((subject_coverage > 0).astype(int), axis=-1)
corr_splits = np.zeros((3, 100, 4))
struct_corr_splits= np.zeros((3, 100, 4))
count_ = np.zeros((3, 100, 4))
lh_mask = np.zeros((100, 100), dtype=bool)
rh_mask = np.zeros((100, 100), dtype=bool)
lr_mask = np.zeros((100, 100), dtype=bool)
lh_mask[:50, :50] = 1
rh_mask[-50:, -50:] = 1
lr_mask[:50, -50:] = 1
lr_mask[-50:, :50] = 1
hemi_mask = np.stack((lh_mask, rh_mask, lr_mask))
for subj_threshold in range(1,5):
    # read splits from files
#     splits = np.genfromtxt('../../../gabriele/At_Least_%d_Subj_per_Schaefer-Edge_BestSplit.csv'%subj_threshold, delimiter=',')
    splits = np.load('cov_{}.npy'.format(subj_threshold))
    n_splits, n_subjects = splits.shape
    for idx in range(n_splits):    
        # for each splits get coh1 and coh2 subject indices
        coh1_indices = splits[idx,:34].astype(int)
        coh2_indices = splits[idx,34:].astype(int)
        plv1 = group_morphed_obs[-18:,-cortical_parcels:, -cortical_parcels:,coh1_indices]
        plv2 = group_morphed_obs[-18:,-cortical_parcels:, -cortical_parcels:,coh2_indices]
        
#         plv1 *= group_edge_counts[-18:, -cortical_parcels:, -cortical_parcels:, coh1_indices] > subj_threshold
#         plv2 *= group_edge_counts[-18:, -cortical_parcels:, -cortical_parcels:, coh2_indices] > subj_threshold
        
        plv_coh1 = np.nanmean(plv1, axis=-1)
        plv_coh2 = np.nanmean(plv2, axis=-1)
        plv_coh1 = np.delete(plv_coh1, [0, 51], axis=1)
        plv_coh1 = np.delete(plv_coh1, [0, 51], axis=2)
        plv_coh2 = np.delete(plv_coh2, [0, 51], axis=1)
        plv_coh2 = np.delete(plv_coh2, [0, 51], axis=2)
        # create the full adjaceny matrix such as first dim is cohort
        # then frequency x parcel x parcel 
        plv_grouped = np.stack((plv_coh1, plv_coh2))
        # get edges that are sampled by at least 
        # a min number of subjects            
        edge_mask = subject_coverage >= subj_threshold
#         plv_grouped[np.isnan(plv_grouped)] = 0
        for idx_mask in range(3):
            edge_masks_ = np.logical_and(hemi_mask[idx_mask], edge_mask)
            count_[idx_mask, subj_threshold-1] = np.sum(edge_masks_)
            plv1 = np.nanmean(plv_grouped[0,:,edge_masks_], axis=0)
            plv2 = np.nanmean(plv_grouped[1,:,edge_masks_], axis=0)
            values_mask = np.logical_and.reduce([plv1 > 0, plv2 > 0])
            edge_cov_values1 = edge_coverage[..., coh1_indices ][edge_masks_]
            edge_cov_values2 = edge_coverage[..., coh2_indices ][edge_masks_]
            struct_corr_splits[idx_mask, idx, subj_threshold-1] = np.corrcoef(edge_cov_values1.sum(axis=1),
                                                                             edge_cov_values2.sum(axis=1))[0,1]
            corr_splits[idx_mask, idx, subj_threshold-1] = np.corrcoef(plv1[values_mask], 
                                                                      plv2[values_mask])[0,1]



In [5]:
splits = np.zeros((100000, 68))

In [9]:
def _joblib_wrapper(edge_coverage, min_count=1):
    orig_indicies = np.arange(68)
    np.random.shuffle(orig_indicies)
    
#     cov_x = np.sum([edge_coverage[..., i]*(edge_coverage[..., i] > min_count) for i in orig_indicies[:34]], axis=0)
#     cov_y = np.sum([edge_coverage[..., i]*(edge_coverage[..., i] > min_count) for i in orig_indicies[34:]], axis=0)
    
    split_idx = np.random.randint(20, 48)
    
    cov_x = edge_coverage[..., orig_indicies[:split_idx]]*(edge_coverage[..., orig_indicies[:split_idx]] >= min_count)
    cov_y = edge_coverage[..., orig_indicies[split_idx:]]*(edge_coverage[..., orig_indicies[split_idx:]] >= min_count)
    
    cov_x = cov_x.sum(axis=2)
    cov_y = cov_y.sum(axis=2)
    
    mask = np.triu((cov_x > 0) & (cov_y > 0), 1)
    coef = np.corrcoef(cov_x[mask], cov_y[mask])[0,1]
#     coef = np.sqrt(np.sum(np.power(cov_x[mask] - cov_y[mask], 2)))
    
    return orig_indicies, coef

for cnt in range(1,2):
    n_rounds = 100000
    results = Parallel(n_jobs=32)(delayed(_joblib_wrapper)(edge_coverage, min_count=cnt) for _ in tqdm.tnrange(n_rounds))

    all_splits = np.zeros((n_rounds, 68))
    all_coeffs = np.zeros(n_rounds)

    for idx, (s, c) in enumerate(results):
        all_splits[idx] = s
        all_coeffs[idx] = c
    
    best_splits = all_splits[np.argsort(all_coeffs)[:100]].astype(int)
    np.save('cov_{}'.format(cnt), best_splits)


HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))




In [27]:
def _sim_metric(x, y):
    xv = x[:34]
    yv = y[:34]
    
    return len(set(xv) ^ set(yv))/2