In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
cd /content/gdrive/MyDrive/Thesis/ASD

/content/gdrive/MyDrive/Thesis/ASD


In [None]:
cd /content/gdrive/MyDrive/Thesis/Thesis/ASD

/content/gdrive/.shortcut-targets-by-id/1eA5gTf1FtgP2bh0p-wWtNYDI9KsYbaiU/Thesis/ASD


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from functools import reduce
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import time
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import sys
import pickle
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import KFold, StratifiedKFold
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import functools
import numpy.ma as ma # for masked arrays
import glob
import random
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tqdm.notebook import tqdm
from itertools import groupby
import sklearn

In [4]:
!pip install captum

Collecting captum
  Downloading captum-0.5.0-py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 4.1 MB/s 
Installing collected packages: captum
Successfully installed captum-0.5.0


In [5]:
from sklearn.model_selection import train_test_split
from captum.attr import IntegratedGradients
from captum.attr import Saliency
from captum.attr import DeepLift
from captum.attr import NoiseTunnel
from captum.attr import visualization as viz
from captum.attr import Saliency
import torchvision

### Helper functions for computing correlations

In [6]:
def get_corr_data(df):
              
    with np.errstate(invalid="ignore"):
        corr = np.nan_to_num(np.corrcoef(df.T))
        mask = np.invert(np.tri(corr.shape[0], k=-1, dtype=bool))
        m = ma.masked_where(mask == 1, mask)
        return ma.masked_where(m, corr).compressed()
        
def get_corr_matrix(filename,data_path):
    # returns correlation matrix
    for file in os.listdir(data_path):
        if file.startswith(filename):
            df = pd.read_csv(os.path.join(data_path, file), sep='\t')
    with np.errstate(invalid="ignore"):
        corr = np.nan_to_num(np.corrcoef(df.T))
        return corr

def confusion(g_turth,predictions):
    tn, fp, fn, tp = confusion_matrix(g_turth,predictions).ravel()
    accuracy = (tp+tn)/(tp+fp+tn+fn)
    sensitivity = (tp)/(tp+fn)
    specificty = (tn)/(tn+fp)
    return accuracy,sensitivity,specificty

## Loading the data 

In [7]:
def get_key(filename):
    f_split = filename.split('_')
    if f_split[3] == 'rois':
        key = '_'.join(f_split[0:3]) 
    else:
        key = '_'.join(f_split[0:2])
    return key

In [8]:
data_df = pd.read_csv('./Phenotypes/Phenotypic_V1_0b_preprocessed949.csv', encoding= 'unicode_escape')
data_df.DX_GROUP = data_df.DX_GROUP.map({1: 1, 2:0})

data_path = './Datasets/CPAC/rois_cc200/'
data_df['FILE_PATH'] = data_df['FILE_ID'].apply(lambda x : os.path.join(data_path,x + '_rois_cc200.1D')) 

print('Length of data frame : ', len(data_df))
print(data_df.head())

Length of data frame :  949
   Unnamed: 0  SUB_ID  X  subject SITE_ID       FILE_ID  DX_GROUP  DSM_IV_TR  \
0           0   50003  2    50003    PITT  Pitt_0050003         1          1   
1           1   50004  3    50004    PITT  Pitt_0050004         1          1   
2           2   50006  5    50006    PITT  Pitt_0050006         1          1   
3           3   50007  6    50007    PITT  Pitt_0050007         1          1   
4           4   50009  8    50009    PITT  Pitt_0050009         1          1   

   AGE_AT_SCAN  SEX  ... qc_anat_rater_2  qc_anat_notes_rater_2  \
0        24.45    1  ...              OK                    NaN   
1        19.09    1  ...              OK                    NaN   
2        13.37    1  ...              OK                    NaN   
3        17.78    1  ...              OK                    NaN   
4        33.86    1  ...              OK                    NaN   

   qc_func_rater_2       qc_func_notes_rater_2  qc_anat_rater_3  \
0               OK   

In [9]:
fpaths = data_df['FILE_PATH'].values
print('Number of Subjects available: ', len(fpaths))

Number of Subjects available:  949


In [None]:
all_corr = {}

for i,path in enumerate(fpaths):

    # Extracting SFC
    fname = path.split('/')[-1]
    key = fname.split('_rois')[0]
    x = np.loadtxt(path)
    x = np.array(x, dtype = 'float32')
    corr = get_corr_data(x)
    # Extracting corresponding labels
    y = data_df[data_df['FILE_ID'] == key]['DX_GROUP'].item()

    all_corr[key] = (corr,y)

print('Length of correlations vector : ', len(all_corr))
pickle.dump(all_corr, open('./data/SFC_CC200.pkl', 'wb'))

Length of correlations vector :  949


In [10]:
all_corr = pickle.load(open('./data/SFC_CC200.pkl', 'rb'))
flist = np.array(list(all_corr.keys()))
labels = np.array([all_corr[f][1] for f in flist], dtype = 'uint8')
print('Length of Input subjects : ', len(flist))
print('Length of Output subjects : ', len(labels))
print('Distribution of Labels : ', np.unique(labels, return_counts = True))

Length of Input subjects :  949
Length of Output subjects :  949
Distribution of Labels :  (array([0, 1], dtype=uint8), array([530, 419]))


# DataLoader

In [11]:
class ASDDataset(Dataset):
    def __init__(self, all_corr, subjects):
        self.corr = all_corr
        self.subjects = subjects
        pass
    def __getitem__(self,idx):
        return torch.tensor(self.corr[self.subjects[idx]][0],dtype=torch.float),torch.tensor(self.corr[self.subjects[idx]][1],dtype=torch.float)
        pass
    def __len__(self):
        return len(self.subjects)
        pass

# Network 

In [12]:
# Auto Encoder and Classifier
class Network(nn.Module):
    def __init__(self, num_inputs = 19900):
        super(Network, self).__init__()
        
        self.num_inputs = num_inputs
        
        self.fc_encoder = nn.Sequential (
                nn.Linear(self.num_inputs,2048),
                nn.Tanh(),
                nn.Linear(2048,512),
                nn.Tanh())
        
        self.fc_decoder = nn.Sequential (
                nn.Linear(512,2048),
                nn.Tanh(),
                nn.Linear(2048,self.num_inputs),
                nn.Tanh())
         
        self.classifier = nn.Sequential (
            nn.Dropout(p=0.25),
            nn.Linear(512, 1),
        )

        self.sigmoid = nn.Sigmoid()           
         
    def forward(self, x, eval_classifier = True):

        x = self.fc_encoder(x)
        if eval_classifier:
            x_logit = self.classifier(x)   #   .squeeze(1)
            # x_logit = self.sigmoid(x_logit)
            return x_logit 

        x = self.fc_decoder(x)        
        return x

# Defining training and testing functions

In [13]:
def train(model, criterion, data_loader, mode='clf'):
    model.train()
    clf_loss = []
    ae_loss = []
    
    if mode == 'clf':
        final_targets = []
        final_predictions = []
    else:
        final_targets = None
        final_predictions = None    
    
    for (inputs, targets) in data_loader :
        if len(inputs) != batch_size:          
            continue

        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        inputs.requires_grad_(True)
        inputs.retain_grad()

        if mode == 'ae':        
            reconstructed = model(inputs, False)
            loss_ae = criterion(reconstructed, inputs) / len(inputs)              
            loss_total = loss_ae
            loss_ae_np = loss_ae.detach().cpu().numpy()
            ae_loss.append(loss_ae_np)           

        elif mode == 'clf':
            logits = model(inputs, True)
            logits = np.squeeze(logits, 1)
            loss_clf = criterion(logits, targets)
            logits = torch.sigmoid(logits)
            proba = logits.detach().cpu().numpy()
            predictions = np.round(proba)           
            final_targets.append(targets.detach().cpu().numpy())
            final_predictions.append(predictions)
            
            loss_total = loss_clf
            loss_clf_np = loss_clf.detach().cpu().numpy()           
            clf_loss.append(loss_clf_np)
            
        loss_total.backward()
        optimizer.step()
    
    if (final_targets is not None) and (final_predictions is not None):
        final_targets = np.concatenate(final_targets)
        final_predictions = np.concatenate(final_predictions)
        train_accuracy = np.mean(final_targets == final_predictions)

        return np.mean(clf_loss), train_accuracy
    else:
        return np.mean(ae_loss), None


def validate(model, criterion, data_loader, mode='clf'):
    model.eval()
    clf_loss = []
    ae_loss = []
    
    if mode == 'clf':
        final_targets = []
        final_predictions = []
    else:
        final_targets = None
        final_predictions = None    
    
    for (inputs, targets) in data_loader :
        if len(inputs) != batch_size:           
            continue

        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        inputs.requires_grad_(True)
        inputs.retain_grad()

        if mode == 'ae':        
            reconstructed = model(inputs, False)
            loss_ae = criterion(reconstructed, inputs) / len(inputs)              
            loss_total = loss_ae
            loss_ae_np = loss_ae.detach().cpu().numpy()
            ae_loss.append(loss_ae_np)           

        elif mode == 'clf':
            logits = model(inputs, True)
            logits = np.squeeze(logits, 1)
            loss_clf = criterion(logits, targets)
            logits = torch.sigmoid(logits)
            proba = logits.detach().cpu().numpy()
            predictions = np.round(proba)           
            final_targets.append(targets.detach().cpu().numpy())
            final_predictions.append(predictions)
            
            loss_total = loss_clf
            loss_clf_np = loss_clf.detach().cpu().numpy()           
            clf_loss.append(loss_clf_np)
            
        loss_total.backward()
        optimizer.step()
    
    if (final_targets is not None) and (final_predictions is not None):
        final_targets = np.concatenate(final_targets)
        final_predictions = np.concatenate(final_predictions)
        train_accuracy = np.mean(final_targets == final_predictions)

        return np.mean(clf_loss), train_accuracy
    else:
        return np.mean(ae_loss), None

def test(model, criterion, data_loader, mode = 'clf'):

    clf_loss = []
    ae_loss = []
    
    if mode == 'clf':
        final_targets = []
        final_predictions = []
    else:
        final_targets = None
        final_predictions = None    
    
    with torch.no_grad():
        model.eval()
        for (inputs, targets) in data_loader :
            if len(inputs) != batch_size:           
                continue

            inputs, targets = inputs.to(device), targets.to(device)
            if mode == 'ae':        
                reconstructed = model(inputs, False)
                loss_ae = criterion_ae(reconstructed, inputs) / len(inputs)           
                loss_total = loss_ae
                loss_ae_np = loss_ae.detach().cpu().numpy()
                ae_loss.append(loss_ae_np)           

            if mode == 'clf':
                logits = model(inputs, True)
                logits = np.squeeze(logits, 1)
                loss_clf = criterion_clf(logits, targets)
                logits = torch.sigmoid(logits)
                proba = logits.detach().cpu().numpy()
                predictions = np.round(proba)           
                final_targets.append(targets.detach().cpu().numpy())
                final_predictions.append(predictions)
                
                loss_total = loss_clf
                loss_clf_np = loss_clf.detach().cpu().numpy()           
                clf_loss.append(loss_clf_np)
              
    
    if (final_targets is  None) and (final_predictions is None):
        return np.mean(ae_loss), None
    
    final_targets = np.concatenate(final_targets)
    final_predictions = np.concatenate(final_predictions)
    mlp_acc, mlp_sens, mlp_spef = confusion(final_targets, final_predictions)
    metrics_dict = {'accuracy': np.round(mlp_acc, 4), 
                    'senstivity' : np.round(mlp_sens,4), 
                    'specificity' : np.round(mlp_spef,4), 
                    'loss' : np.round(np.mean(clf_loss),4)}               
    return  metrics_dict  

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [15]:
def attribute_image_features(algorithm, inputs):
    model.zero_grad()
    model.eval()
    tensor_attributions = algorithm.attribute(inputs = inputs, target = 0, return_convergence_delta=True)  
    return tensor_attributions

# ASD 2 Layer Model Training

In [16]:
# Define Parameters
p_fold = 10
batch_size = 16
lr_ae, lr_clf = 0.0001, 0.0001
ae_epochs, clf_epochs = 50, 50     # 50, 50
weight_decay_ae, weight_decay_clf = 0.1, 0.1
n_inputs = 19900  # Automate this according to the atlas 

In [17]:
crossval_acc, crossval_sen, crossval_spec, crossval_loss, attributions = [], [], [], [], [] 
all_folds_splits = {}
kf = StratifiedKFold(n_splits = p_fold, random_state = 1, shuffle = True)

start = time.time()
for fold,(train_index, test_index) in enumerate(kf.split(flist, labels)):

    train_subjects, test_subjects = flist[train_index],flist[test_index]
    train_labels = labels[train_index]   
    train_subjects, val_subjects, train_labels, val_labels = train_test_split(train_subjects, train_labels, 
                                                      test_size = 0.20, random_state = 42, stratify = train_labels)
    
    print('Number of train subjects : ', len(train_subjects))
    print('Number of val subjects : ', len(val_subjects))
    print('Number of test subjects : ', len(test_subjects))

    fold_splits_dict = {} 
    fold_splits_dict['train'] = train_subjects
    fold_splits_dict['val'] = val_subjects
    fold_splits_dict['test'] = test_subjects

    all_folds_splits[fold] = fold_splits_dict
    verbose = (True if (fold == 0) else False)
   
    train_dataset = ASDDataset(all_corr, train_subjects)
    val_dataset = ASDDataset(all_corr, val_subjects)
    test_dataset = ASDDataset(all_corr, test_subjects)
    
    train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
    val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)
    test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)                           

    model = Network(num_inputs = n_inputs)
    model = model.to(device)

    criterion_ae = nn.MSELoss(reduction='sum')         
    optimizer = optim.Adam(model.parameters(), lr = lr_ae, weight_decay = weight_decay_ae)          
    best_ae_model, count = None, 1
    best_ae_loss = sys.float_info.max
    
    print("Auto Encoder training Started-----------")
    for epoch in range(1, ae_epochs+1):

        print(f'Epoch {epoch}/{ae_epochs}')
        ae_train_loss, _ = train(model, criterion_ae, train_dataloader, mode = 'ae')
        print(f'AE Train loss: {(ae_train_loss):.4f}')

        ae_val_loss, _ = validate(model, criterion_ae, val_dataloader, mode = 'ae')
        print(f'AE Val loss: {(ae_val_loss):.4f}')

        if(ae_val_loss < best_ae_loss):     # Early Stopping 
            best_ae_model = model
            best_ae_loss = ae_val_loss
            count = 1
        else:
            count += 1     
        if(count == 10):  # Criteria
            break
              
    best_clf_model, best_clf_acc, count = None, 0.0, 1
    model = best_ae_model
    criterion_clf = nn.BCEWithLogitsLoss()      
    optimizer = optim.Adam(model.parameters(), lr = lr_clf, weight_decay = weight_decay_clf)

    print("Classifier training Started-----------")
    for epoch in range(1, clf_epochs+1):

        print(f'Epoch {epoch}/{clf_epochs}')
        clf_train_loss, train_acc = train(model, criterion_clf, train_dataloader, mode='clf')
        print(f'CLF Train loss: {(clf_train_loss):.4f}, Train Accuracy: {(train_acc):.4f}')

        clf_val_loss, val_acc = validate(model, criterion_clf, val_dataloader, mode='clf')
        print(f'CLF Val loss: {(clf_val_loss):.4f}, Validation Accuracy: {(val_acc):.4f}')

        if(val_acc > best_clf_acc):    # Early Stopping Criteria
            best_clf_model = model
            best_clf_acc = val_acc
            count = 1
        else:
            count += 1
        if(count == 10):
            break        

    metrics_dict = test(best_clf_model, criterion_clf, test_dataloader, mode = 'clf')

    print(f'Fold {fold+1}/{p_fold}')
    print(f'{metrics_dict}')
    print("--------------------------------------------")
    
    torch.save(best_clf_model.state_dict(), f'./data/Weights/Fold_{fold+1}.pth')    # To save the weights
    print(f'Fold {fold+1} weights are saved')

    crossval_acc.append(metrics_dict['accuracy'])
    crossval_sen.append(metrics_dict['senstivity'])
    crossval_spec.append(metrics_dict['specificity'])
    crossval_loss.append(metrics_dict['loss'])

print(f'Average Value after 10 Folds')
print(f'Accuracy: {np.round(np.mean(crossval_acc),4)}, Senstivity: {np.round(np.mean(crossval_sen),4)}, Specificity: {np.round(np.mean(crossval_spec),4)}, Loss: {np.round(np.mean(crossval_loss),4)}')
pickle.dump(all_folds_splits, open('./data/All_Folds_Subjects.pkl', 'wb'))
print(f'Total time taken : {time.time()-start}')

Number of train subjects :  683
Number of val subjects :  171
Number of test subjects :  95
Auto Encoder training Started-----------
Epoch 1/50
AE Train loss: 823.0101
AE Val loss: 711.7103
Epoch 2/50
AE Train loss: 670.3008
AE Val loss: 651.9384
Epoch 3/50
AE Train loss: 614.5040
AE Val loss: 604.3463
Epoch 4/50
AE Train loss: 571.0931
AE Val loss: 564.4747
Epoch 5/50
AE Train loss: 535.3869
AE Val loss: 531.1636
Epoch 6/50
AE Train loss: 504.8393
AE Val loss: 499.9503
Epoch 7/50
AE Train loss: 478.3432
AE Val loss: 473.2162
Epoch 8/50
AE Train loss: 453.2615
AE Val loss: 449.3344
Epoch 9/50
AE Train loss: 431.1058
AE Val loss: 427.9079
Epoch 10/50
AE Train loss: 410.7231
AE Val loss: 407.6306
Epoch 11/50
AE Train loss: 392.7664
AE Val loss: 388.7137
Epoch 12/50
AE Train loss: 377.8387
AE Val loss: 373.3183
Epoch 13/50
AE Train loss: 362.8664
AE Val loss: 358.2180
Epoch 14/50
AE Train loss: 347.3077
AE Val loss: 344.0820
Epoch 15/50
AE Train loss: 336.8014
AE Val loss: 331.3573
Epoch 

# Backup

In [None]:
crossval_acc, crossval_sen, crossval_spec, crossval_loss, attributions = [], [], [], [], [] 
all_folds_splits = {}
kf = StratifiedKFold(n_splits = p_fold, random_state = 1, shuffle = True)

start = time.time()
for fold,(train_index, test_index) in enumerate(kf.split(flist, labels)):

    train_subjects, test_subjects = flist[train_index],flist[test_index]
    train_labels = labels[train_index]   
    train_subjects, val_subjects, train_labels, val_labels = train_test_split(train_subjects, train_labels, 
                                                      test_size = 0.20, random_state = 42, stratify = train_labels)
    
    print('Number of train subjects : ', len(train_subjects))
    print('Number of val subjects : ', len(val_subjects))
    print('Number of test subjects : ', len(test_subjects))

    fold_splits_dict = {} 
    fold_splits_dict['train'] = train_subjects
    fold_splits_dict['val'] = val_subjects
    fold_splits_dict['test'] = test_subjects

    all_folds_splits[fold] = fold_splits_dict
    verbose = (True if (fold == 0) else False)
   
    train_dataset = ASDDataset(all_corr, train_subjects)
    val_dataset = ASDDataset(all_corr, val_subjects)
    test_dataset = ASDDataset(all_corr, test_subjects)
    
    train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
    val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)
    test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)                           

    model = Network(num_inputs = n_inputs)
    model = model.to(device)

    criterion_ae = nn.MSELoss(reduction='sum')         
    optimizer = optim.Adam(model.parameters(), lr = lr_ae, weight_decay = weight_decay_ae)          
    best_ae_model, count = None, 1
    best_ae_loss = sys.float_info.max
    
    print("Auto Encoder training Started-----------")
    for epoch in range(1, ae_epochs+1):

        print(f'Epoch {epoch}/{ae_epochs}')
        ae_train_loss, _ = train(model, criterion_ae, train_dataloader, mode = 'ae')
        print(f'AE Train loss: {(ae_train_loss):.4f}')

        ae_val_loss, _ = validate(model, criterion_ae, val_dataloader, mode = 'ae')
        print(f'AE Val loss: {(ae_val_loss):.4f}')

        if(ae_val_loss < best_ae_loss):     # Early Stopping 
            best_ae_model = model
            best_ae_loss = ae_val_loss
            count = 1
        else:
            count += 1     
        if(count == 10):  # Criteria
            break
              
    best_clf_model, best_clf_acc, count = None, 0.0, 1
    model = best_ae_model
    criterion_clf = nn.BCEWithLogitsLoss()      
    optimizer = optim.Adam(model.parameters(), lr = lr_clf, weight_decay = weight_decay_clf)

    print("Classifier training Started-----------")
    for epoch in range(1, clf_epochs+1):

        print(f'Epoch {epoch}/{clf_epochs}')
        clf_train_loss, train_acc = train(model, criterion_clf, train_dataloader, mode='clf')
        print(f'CLF Train loss: {(clf_train_loss):.4f}, Train Accuracy: {(train_acc):.4f}')

        clf_val_loss, val_acc = validate(model, criterion_clf, val_dataloader, mode='clf')
        print(f'CLF Val loss: {(clf_val_loss):.4f}, Validation Accuracy: {(val_acc):.4f}')

        if(val_acc > best_clf_acc):    # Early Stopping Criteria
            best_clf_model = model
            best_clf_acc = val_acc
            count = 1
        else:
            count += 1
        if(count == 10):
            break        

    metrics_dict = test(best_clf_model, criterion_clf, test_dataloader, mode = 'clf')

    print(f'Fold {fold+1}/{p_fold}')
    print(f'{metrics_dict}')
    print("--------------------------------------------")
    
    # torch.save(best_clf_model.state_dict(), f'./data/Weights/Fold_{fold+1}.pth')    # To save the weights
    # print(f'Fold {fold+1} weights are saved')

    crossval_acc.append(metrics_dict['accuracy'])
    crossval_sen.append(metrics_dict['senstivity'])
    crossval_spec.append(metrics_dict['specificity'])
    crossval_loss.append(metrics_dict['loss'])

print(f'Average Value after 10 Folds')
print(f'Accuracy: {np.round(np.mean(crossval_acc),4)}, Senstivity: {np.round(np.mean(crossval_sen),4)}, Specificity: {np.round(np.mean(crossval_spec),4)}, Loss: {np.round(np.mean(crossval_loss),4)}')
# pickle.dump(all_folds_splits, open('./data/All_Folds_Subjects.pkl', 'wb'))
print(f'Total time taken : {time.time()-start}')

Number of train subjects :  683
Number of val subjects :  171
Number of test subjects :  95
Auto Encoder training Started-----------
Epoch 1/50
AE Train loss: 820.2217
AE Val loss: 712.1801
Epoch 2/50
AE Train loss: 673.4880
AE Val loss: 650.5835
Epoch 3/50
AE Train loss: 614.2871
AE Val loss: 602.4144
Epoch 4/50
AE Train loss: 569.9550
AE Val loss: 563.0824
Epoch 5/50
AE Train loss: 534.4946
AE Val loss: 528.7589
Epoch 6/50
AE Train loss: 503.1908
AE Val loss: 498.3627
Epoch 7/50
AE Train loss: 476.1094
AE Val loss: 471.3995
Epoch 8/50
AE Train loss: 450.6628
AE Val loss: 447.4457
Epoch 9/50
AE Train loss: 429.4308
AE Val loss: 424.7357
Epoch 10/50
AE Train loss: 409.2567
AE Val loss: 404.9640
Epoch 11/50
AE Train loss: 391.1585
AE Val loss: 387.2185
Epoch 12/50
AE Train loss: 375.7087
AE Val loss: 371.7448
Epoch 13/50
AE Train loss: 360.6676
AE Val loss: 357.3597
Epoch 14/50
AE Train loss: 347.0031
AE Val loss: 342.6172
Epoch 15/50
AE Train loss: 334.0288
AE Val loss: 329.3523
Epoch 

In [None]:
rep_val_acc, rep_val_sen, rep_val_spec, rep_val_loss = [], [], [], []
for repetition in range(5):
    crossval_acc, crossval_sen, crossval_spec, crossval_loss, attributions = [], [], [], [], [] 
    all_folds_splits = {}
    kf = StratifiedKFold(n_splits = p_fold, random_state = 1, shuffle = True)

    start = time.time()
    for fold,(train_index, test_index) in enumerate(kf.split(flist, labels)):

        train_subjects, test_subjects = flist[train_index],flist[test_index]
        train_labels = labels[train_index]   
        train_subjects, val_subjects, train_labels, val_labels = train_test_split(train_subjects, train_labels, 
                                                          test_size = 0.20, random_state = 42, stratify = train_labels)
        
        print('Number of train subjects : ', len(train_subjects))
        print('Number of val subjects : ', len(val_subjects))
        print('Number of test subjects : ', len(test_subjects))

        fold_splits_dict = {} 
        fold_splits_dict['train'] = train_subjects
        fold_splits_dict['val'] = val_subjects
        fold_splits_dict['test'] = test_subjects

        all_folds_splits[fold] = fold_splits_dict
        verbose = (True if (fold == 0) else False)
      
        train_dataset = ASDDataset(all_corr, train_subjects)
        val_dataset = ASDDataset(all_corr, val_subjects)
        test_dataset = ASDDataset(all_corr, test_subjects)
        
        train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
        val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)
        test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)                           

        model = Network(num_inputs = n_inputs)
        model = model.to(device)

        criterion_ae = nn.MSELoss(reduction='sum')         
        optimizer = optim.Adam(model.parameters(), lr = lr_ae, weight_decay = weight_decay_ae)          
        best_ae_model, count = None, 1
        best_ae_loss = sys.float_info.max
        
        print("Auto Encoder training Started-----------")
        for epoch in range(1, ae_epochs+1):

            print(f'Epoch {epoch}/{ae_epochs}')
            ae_train_loss, _ = train(model, criterion_ae, train_dataloader, mode = 'ae')
            print(f'AE Train loss: {(ae_train_loss):.4f}')

            ae_val_loss, _ = test(model, criterion_ae, val_dataloader, mode = 'ae')
            print(f'AE Val loss: {(ae_val_loss):.4f}')

            if(ae_val_loss < best_ae_loss):     # Early Stopping 
                best_ae_model = model
                best_ae_loss = ae_val_loss
                count = 1
            else:
                count += 1     
            if(count == 10):  # Criteria
                break
                  
        best_clf_model, best_clf_acc, count = None, 0.0, 1
        model = best_ae_model
        criterion_clf = nn.BCELoss()      
        optimizer = optim.Adam(model.parameters(), lr = lr_clf, weight_decay = weight_decay_clf)

        print("Classifier training Started-----------")
        for epoch in range(1, clf_epochs+1):

            print(f'Epoch {epoch}/{clf_epochs}')
            clf_train_loss, train_acc = train(model, criterion_clf, train_dataloader, mode='clf')
            print(f'CLF Train loss: {(clf_train_loss):.4f}, Train Accuracy: {(train_acc):.4f}')

            val_metrics = test(model, criterion_clf, val_dataloader, mode='clf')
            print(f'CLF Val loss: {(val_metrics["loss"]):.4f}, Validation Accuracy: {(val_metrics["accuracy"]):.4f}')

            if(val_metrics['accuracy'] > best_clf_acc):    # Early Stopping Criteria
                best_clf_model = model
                best_clf_acc = val_metrics['accuracy']
                count = 1
            else:
                count += 1
            if(count == 10):
                break        

        metrics_dict = test(best_clf_model, criterion_clf, test_dataloader, mode = 'clf')

        print(f'Fold {fold+1}/{p_fold}')
        print(f'{metrics_dict}')
        print("--------------------------------------------")
        
        # torch.save(best_clf_model.state_dict(), f'./data/Weights/Fold_{fold+1}.pth')    # To save the weights
        # print(f'Fold {fold+1} weights are saved')

        crossval_acc.append(metrics_dict['accuracy'])
        crossval_sen.append(metrics_dict['senstivity'])
        crossval_spec.append(metrics_dict['specificity'])
        crossval_loss.append(metrics_dict['loss'])

    print(f'Average Value after 10 Folds')
    crossval_acc = np.mean(crossval_acc)
    crossval_sen = np.mean(crossval_sen)
    crossval_spec = np.mean(crossval_spec)
    crossval_loss = np.mean(crossval_loss)
    print(f'Accuracy: {np.round(crossval_acc,4)}, Senstivity: {np.round(crossval_sen,4)}, Specificity: {np.round(crossval_spec,4)}, Loss: {np.round(crossval_loss,4)}')
    # pickle.dump(all_folds_splits, open('./data/All_Folds_Subjects.pkl', 'wb'))
    print(f'Total time taken : {time.time()-start}')
    rep_val_acc.append(crossval_acc)
    rep_val_sen.append(crossval_sen)
    rep_val_spec.append(crossval_spec)
    rep_val_loss.append(crossval_loss)
print('***********************************************************************')
print('Average Metrics after 5 repetitions')
print(f'Accuracy: {np.round(np.mean(rep_val_acc),4)}, Senstivity: {np.round(np.mean(rep_val_sen),4)}, Specificity: {np.round(np.mean(rep_val_spec),4)}, Loss: {np.round(np.mean(rep_val_loss),4)}')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 3/50
AE Train loss: 574.5273
AE Val loss: 603.0812
Epoch 4/50
AE Train loss: 516.8508
AE Val loss: 579.8524
Epoch 5/50
AE Train loss: 469.5497
AE Val loss: 562.8291
Epoch 6/50
AE Train loss: 429.8829
AE Val loss: 551.5500
Epoch 7/50
AE Train loss: 395.7996
AE Val loss: 543.0872
Epoch 8/50
AE Train loss: 365.0174
AE Val loss: 533.9638
Epoch 9/50
AE Train loss: 338.5560
AE Val loss: 527.2704
Epoch 10/50
AE Train loss: 314.0179
AE Val loss: 520.1889
Epoch 11/50
AE Train loss: 292.0209
AE Val loss: 516.6202
Epoch 12/50
AE Train loss: 273.8509
AE Val loss: 513.0476
Epoch 13/50
AE Train loss: 257.7824
AE Val loss: 510.1452
Epoch 14/50
AE Train loss: 243.0550
AE Val loss: 507.4470
Epoch 15/50
AE Train loss: 231.7488
AE Val loss: 504.6485
Epoch 16/50
AE Train loss: 221.6187
AE Val loss: 502.1226
Epoch 17/50
AE Train loss: 211.7143
AE Val loss: 501.1657
Epoch 18/50
AE Train loss: 202.6893
AE Val loss: 497.0148
Epoch 19/50
AE

# Visualization

In [None]:
def attribute_image_features(algorithm, inputs, target = 1):
    # model.zero_grad()
    # model.eval()
    tensor_attributions = algorithm.attribute(inputs = inputs, target = target, return_convergence_delta=True)  
    return tensor_attributions

In [None]:
attributions = []
all_folds = pickle.load(open('./data/All_Folds_Subjects.pkl', 'rb'))

for fold in range(10):
    fold_weights = torch.load(f'./data/Weights/Fold_{fold+1}.pth', map_location=torch.device('cpu'))
    best_clf_model = Network(num_inputs = 19900) 
    best_clf_model.load_state_dict(fold_weights)
    best_clf_model = best_clf_model.to('cpu')

    test_subjects = all_folds[fold]['test']
    x_asd, y_asd = [], []
    for sample in test_subjects : 
        if(all_corr[sample][1] == 1):
          x_asd.append(all_corr[sample][0])
          y_asd.append(all_corr[sample][1])
    print('Number of ASD subjects in test set : ', len(x_asd))

    x_asd = torch.tensor(x_asd, dtype=torch.float)
    y_asd = np.array(y_asd)  

    y_asd_pred = best_clf_model(x_asd)
    y_asd_pred = y_asd_pred.detach().cpu().numpy()
    y_asd_pred = np.round(y_asd_pred)
    y_asd_pred = np.squeeze(y_asd_pred, axis = 1)

    right_indices = np.where(y_asd_pred == 1)
    x_asd_ig = x_asd[right_indices]
    print('Number of correctly predicted ASD subjects : ', len(x_asd_ig))

    ig_asd = IntegratedGradients(best_clf_model)        
    grads_asd, delta_asd = attribute_image_features(ig_asd, inputs = x_asd_ig, target = 0)                     
    grads_asd = torch.mean(grads_asd, axis = 0)
    grads_asd = np.array(grads_asd)
    attributions.append(grads_asd)
                            
attributions = np.array(attributions)
# np.save('./data/IG_Attributions.npy', attributions)
print("Attributions shape : ", attributions.shape)

Number of ASD samples in test set :  42




Number of correctly predicted ASD samples :  28
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  23
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  26
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  28
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  31
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  32
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  29
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  32
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  23
Number of ASD samples in test set :  41
Number of correctly predicted ASD samples :  19
Attributions shape :  (10, 19900)


In [None]:
rois_count = {}    # {roi : number of times it is repeated in all 10 folds}

for grads_asd in attributions :
  
    attr_vals_asd = grads_asd.copy()
    thresh = np.percentile(attr_vals_asd, 99)
    attr_vals_asd = np.where(attr_vals_asd > thresh,  1 , 0) # check1
    corr_matrix_asd = np.zeros((200,200))
    corr_matrix_asd[np.triu_indices(200, 1)] = attr_vals_asd
    print('Number of unique elements in corr_matrix : ', np.unique(corr_matrix_asd, return_counts = True))

    max_sum_rows = np.sum(corr_matrix_asd, axis = 1)      # check 2
    top_indices = np.argsort(max_sum_rows)            # Max value indices
    top_values = max_sum_rows[top_indices]      # Max values

    top20_indices  = top_indices[-20 : ]
    top20_values = top_values[-20 : ]

    print('Most repeated ROIS in ASD (Not index values): ', top20_indices + 1)
    print('Number of times ROIS repeated in ASD : ', top20_values)
    
    for index, roi in enumerate(top_indices): 
        if(roi in rois_count):
            rois_count[roi] += top_values[index]
        else : 
          rois_count[roi] = top_values[index]        

rois_count_sorted = dict(sorted(rois_count.items(), key = lambda x : x[1], reverse = True))
print(rois_count_sorted)

Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [154  30 107 105 106  65  31  10  63  28  71  21  32  59 117  57  64  49
  94  55]
Number of times ROIS repeated in ASD :  [3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 5. 5. 5. 6. 6. 6. 6. 6. 7. 8.]
Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [107  21  33  28  90  93  45  71 154 130  49 106  59  46  32  55  30  17
  77  64]
Number of times ROIS repeated in ASD :  [3. 3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 5. 5. 5. 5. 6. 6. 6. 6. 7.]
Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [128  94 130  93  55   8  66  31  16  36  32  45  49  57  63  64  42  46
  30  59]
Number of times ROIS repeated in ASD :  [3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 4. 4. 4. 5. 5. 6.]
Number of unique elements in corr_matrix :  (arra

In [None]:
print(rois_count_sorted)

{63: 61.0, 58: 56.0, 31: 49.0, 48: 48.0, 54: 46.0, 29: 44.0, 93: 40.0, 45: 37.0, 20: 37.0, 16: 35.0, 27: 35.0, 116: 34.0, 105: 33.0, 14: 29.0, 52: 28.0, 62: 28.0, 70: 26.0, 89: 25.0, 44: 25.0, 7: 24.0, 83: 24.0, 56: 24.0, 35: 23.0, 32: 22.0, 41: 22.0, 88: 22.0, 64: 22.0, 46: 21.0, 73: 21.0, 4: 21.0, 10: 21.0, 15: 20.0, 30: 20.0, 72: 19.0, 92: 19.0, 34: 19.0, 106: 19.0, 9: 19.0, 76: 18.0, 86: 17.0, 12: 17.0, 38: 17.0, 163: 17.0, 65: 17.0, 43: 17.0, 3: 16.0, 165: 16.0, 113: 16.0, 11: 16.0, 118: 16.0, 33: 16.0, 109: 16.0, 129: 16.0, 104: 16.0, 167: 15.0, 90: 15.0, 130: 15.0, 82: 15.0, 127: 15.0, 74: 15.0, 153: 15.0, 161: 14.0, 77: 13.0, 2: 13.0, 128: 12.0, 98: 11.0, 91: 11.0, 17: 11.0, 154: 11.0, 85: 10.0, 169: 10.0, 67: 10.0, 139: 10.0, 125: 10.0, 102: 10.0, 146: 10.0, 66: 9.0, 36: 9.0, 23: 9.0, 99: 9.0, 111: 8.0, 24: 8.0, 94: 8.0, 117: 7.0, 101: 7.0, 189: 7.0, 158: 7.0, 28: 7.0, 164: 7.0, 84: 7.0, 186: 6.0, 187: 6.0, 19: 6.0, 180: 6.0, 182: 6.0, 61: 6.0, 0: 5.0, 134: 5.0, 103: 5.0, 47: 

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

  after removing the cwd from sys.path.


In [None]:
cc200_labels = pd.read_csv('./data/CC200_ROI_labels.csv')
# cc200_labels.head(5)
print('ASD Associated Regions : ')
rois = [64, 59, 32, 49, 55, 30, 94, 46]
asd_rois = cc200_labels[cc200_labels['ROI number'].isin(rois)]
display(asd_rois)

ASD Associated Regions : 


Unnamed: 0,ROI number,volume,center of mass,Dosenbach,AAL,Eickhoff-Zilles,Talairach-Tournoux,Harvard-Oxford
29,30,194,(2.9;-28.0;-36.0),"[""None"": 1.00]","[""None"": 1.00]","[""None"": 1.00]","[""None"": 1.00]","[""None"": 1.00]"
31,32,170,(43.5;9.9;-36.2),"[""None"": 1.00]","[""Temporal_Pole_Mid_R"": 0.59][""Temporal_Inf_R"": 0.35]","[""Right Medial Temporal Pole"": 0.61][""Right Inferior Temporal Gyrus"": 0.26][""None"": 0.12]","[""Right Middle Temporal Gyrus"": 0.44][""None"": 0.31][""Right Superior Temporal Gyrus"": 0.25]","[""Right Temporal Pole"": 0.96]"
45,46,227,(1.9;-38.2;32.2),"[""None"": 0.84]","[""Cingulum_Mid_R"": 0.33][""Cingulum_Post_L"": 0.23][""Cingulum_Mid_L"": 0.18][""Cingulum_Post_R"": 0.18]","[""Left Posterior Cingulate Cortex"": 0.26][""Right Middle Cingulate Cortex"": 0.24][""Right Posterior Cingulate Cortex"": 0.24][""Left Middle Cingulate Cortex"": 0.14]","[""Right Cingulate Gyrus"": 0.44][""Left Cingulate Gyrus"": 0.26][""Right Posterior Cingulate"": 0.14][""Left Posterior Cingulate"": 0.13]","[""Right Cingulate Gyrus; posterior division"": 0.74][""Left Cingulate Gyrus; posterior division"": 0.25]"
48,49,222,(61.9;-21.1;-15.6),"[""None"": 0.94]","[""Temporal_Mid_R"": 0.64][""Temporal_Inf_R"": 0.34]","[""Right Inferior Temporal Gyrus"": 0.53][""Right Middle Temporal Gyrus"": 0.47]","[""Right Inferior Temporal Gyrus"": 0.41][""Right Middle Temporal Gyrus"": 0.41][""Right Fusiform Gyrus"": 0.14]","[""Right Middle Temporal Gyrus; posterior division"": 0.75][""Right Inferior Temporal Gyrus; posterior division"": 0.23]"
54,55,247,(0.3;16.3;32.3),"[""None"": 0.83]","[""Cingulum_Mid_L"": 0.32][""Cingulum_Mid_R"": 0.22][""Cingulum_Ant_L"": 0.21][""Cingulum_Ant_R"": 0.20]","[""Left Anterior Cingulate Cortex"": 0.32][""Left Middle Cingulate Cortex"": 0.24][""Right Middle Cingulate Cortex"": 0.22][""Right Anterior Cingulate Cortex"": 0.20]","[""Left Cingulate Gyrus"": 0.44][""Right Cingulate Gyrus"": 0.29][""Right Anterior Cingulate"": 0.13][""Left Anterior Cingulate"": 0.13]","[""Right Cingulate Gyrus; anterior division"": 0.52][""Left Cingulate Gyrus; anterior division"": 0.27][""Left Paracingulate Gyrus"": 0.13]"
58,59,240,(36.7;17.2;3.6),"[""None"": 0.92]","[""Insula_R"": 0.61][""Putamen_R"": 0.20][""Frontal_Inf_Tri_R"": 0.11]","[""Right Insula Lobe"": 0.64][""Right Putamen"": 0.15]","[""Right Insula"": 0.46][""Right Inferior Frontal Gyrus"": 0.40][""Right Claustrum"": 0.14]","[""Right Insular Cortex"": 0.46][""Right Frontal Operculum Cortex"": 0.33]"
63,64,230,(28.8;-0.3;56.8),"[""None"": 1.00]","[""Frontal_Sup_R"": 0.46][""Frontal_Mid_R"": 0.35][""Precentral_R"": 0.20]","[""Right Superior Frontal Gyrus"": 0.39][""Right Middle Frontal Gyrus"": 0.37][""RightPrecentral Gyrus"": 0.23]","[""Right Middle Frontal Gyrus"": 0.80][""Right Superior Frontal Gyrus"": 0.12]","[""Right Superior Frontal Gyrus"": 0.38][""Right Precentral Gyrus"": 0.30][""Right Middle Frontal Gyrus"": 0.28]"
93,94,87,(14.2;-0.5;17.5),"[""None"": 1.00]","[""Caudate_R"": 0.84][""Thalamus_R"": 0.14]","[""Right Caudate Nucleus"": 0.77][""Right Thalamus"": 0.16]","[""Right Caudate"": 0.68][""Right Thalamus"": 0.23]","[""Right Caudate"": 0.71][""Right Thalamus"": 0.26]"


In [None]:
# ROIS : 
Right middle frontal gyrus(64), Right insula lobe(59), Right temporal pole(32), Right middle temporal gyrus(49), Right Caudate Nucleus(94) 

# Deep Lift

# New Section

In [None]:
dl_attributions = []
all_folds = pickle.load(open('./data/AllFoldssubjects.pkl', 'rb'))
for fold in range(10):
    fold_weights = torch.load(f'data/Weights/Fold_{fold+1}.pth', map_location=torch.device('cpu'))
    best_clf_model = MTAutoEncoder(tied = False, num_inputs = 19900, num_latent = 512, use_dropout = True) 
    best_clf_model.load_state_dict(fold_weights)
    best_clf_model = best_clf_model.to('cpu')

    test_subjects = all_folds[fold]['test']
    x_asd, y_asd = [], []
    for sample in test_subjects : 
        if(all_corr[sample][1] == 1):
          x_asd.append(all_corr[sample][0])
          y_asd.append(all_corr[sample][1])
    print('Number of ASD subjects in test set : ', len(x_asd))

    x_asd = torch.tensor(x_asd, dtype=torch.float)
    y_asd = np.array(y_asd)  

    y_asd_pred = best_clf_model(x_asd)
    y_asd_pred = y_asd_pred.detach().cpu().numpy()
    y_asd_pred = np.round(y_asd_pred)
    y_asd_pred = np.squeeze(y_asd_pred, axis = 1)

    right_indices = np.where(y_asd_pred == 1)
    x_asd_dl = x_asd[right_indices]
    print('Number of correctly predicted ASD subjects : ', len(x_asd_dl))

    dl_asd = DeepLift(best_clf_model)        
    grads_asd, delta_asd = attribute_image_features(dl_asd, inputs = x_asd_dl, target = 0)                     
    grads_asd = torch.mean(grads_asd, axis = 0)
    grads_asd = grads_asd.detach().cpu().numpy()
    dl_attributions.append(grads_asd)
                            
dl_attributions = np.array(dl_attributions)
# np.save('./data/IG_Attributions.npy', attributions)
print("Attributions shape : ", dl_attributions.shape)

Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  28


  "required_grads has been set automatically." % index
               activations. The hooks and attributes will be removed
            after the attribution is finished
  after the attribution is finished"""


Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  22
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  25
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  26
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  31
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  32
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  30
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  32
Number of ASD samples in test set :  42
Number of correctly predicted ASD samples :  23
Number of ASD samples in test set :  41
Number of correctly predicted ASD samples :  20
Attributions shape :  (10, 19900)


In [None]:
rois_count = {}    # {roi : number of times it is repeated in all 10 folds}

for grads_asd in dl_attributions :
  
    attr_vals_asd = grads_asd.copy()
    thresh = np.percentile(attr_vals_asd, 99)
    attr_vals_asd = np.where(attr_vals_asd > thresh,  1 , 0) # check1
    corr_matrix_asd = np.zeros((200,200))
    corr_matrix_asd[np.triu_indices(200, 1)] = attr_vals_asd
    print('Number of unique elements in corr_matrix : ', np.unique(corr_matrix_asd, return_counts = True))

    max_sum_rows = np.sum(corr_matrix_asd, axis = 1)      # check 2
    top_indices = np.argsort(max_sum_rows)            # Max value indices
    top_values = max_sum_rows[top_indices]      # Max values

    top20_indices  = top_indices[-20 : ]
    top20_values = top_values[-20 : ]

    print('Most repeated ROIS in ASD (Not index values): ', top20_indices + 1)
    print('Number of times ROIS repeated in ASD : ', top20_values)
    
    for index, roi in enumerate(top_indices): 
        if(roi in rois_count):
            rois_count[roi] += top_values[index]
        else : 
          rois_count[roi] = top_values[index]        

rois_count_sorted = dict(sorted(rois_count.items(), key = lambda x : x[1], reverse = True))
print(rois_count_sorted)

Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [ 89 106 154 130  31  30  43  28  65  21  10  71  32  57  49  59  64 117
  94  55]
Number of times ROIS repeated in ASD :  [3. 3. 3. 4. 4. 4. 4. 4. 4. 4. 5. 5. 5. 5. 6. 6. 6. 6. 7. 8.]
Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [ 78  93  84  33  94  28  71 130  46  49 154  77 106  32  21  59  55  17
  30  64]
Number of times ROIS repeated in ASD :  [3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 5. 5. 5. 5. 5. 6. 6. 6. 7.]
Number of unique elements in corr_matrix :  (array([0., 1.]), array([39801,   199]))
Most repeated ROIS in ASD (Not index values):  [ 66  28 128  15  31  93  94  39  42 105  63  36  17  57  32  49  64  30
  46  59]
Number of times ROIS repeated in ASD :  [3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 4. 4. 4. 4. 4. 5. 5. 6.]
Number of unique elements in corr_matrix :  (arra

In [None]:
delta_asd

tensor([ 0.0448, -0.0280, -0.0142,  0.0200, -0.0104, -0.0270,  0.0045, -0.0501,
         0.0089, -0.0137, -0.0063,  0.0045, -0.0020, -0.0252,  0.0453,  0.0029,
        -0.0170,  0.0187,  0.0781,  0.0047])

In [None]:
rois = [64, 59, 49, 32, 30, 55, 94, 46]
asd_rois = cc200_labels[cc200_labels['ROI number'].isin(rois)]
display(asd_rois)

In [None]:
# print(rois_count_sorted).  # Without thresholding and taking the sum. 

{63: 0.28833801775652723, 34: 0.16755825132131255, 14: 0.15259633067985268, 70: 0.13047253297211048, 58: 0.12235610972428701, 41: 0.12204838184740958, 54: 0.11999169962167079, 29: 0.119460891259341, 48: 0.11912324991494355, 52: 0.11543538350486209, 127: 0.11251435212226324, 128: 0.10693960790720322, 88: 0.10345871149475779, 130: 0.10229100258155885, 129: 0.10204925380761753, 90: 0.09465215779146104, 20: 0.09419945080018129, 30: 0.08784245042409317, 83: 0.08576062744973681, 16: 0.08506434713670755, 72: 0.08309108378294525, 105: 0.07935774375355967, 89: 0.07755795605543256, 91: 0.07673209027984447, 82: 0.07601929354820683, 46: 0.07480984718746976, 109: 0.07152370382524624, 19: 0.06990669182987254, 45: 0.06989307003257972, 111: 0.06933357619506557, 118: 0.06875127717805271, 93: 0.06501561961407462, 65: 0.06432255460224846, 163: 0.060248144174904025, 66: 0.05892478769611261, 161: 0.05559441087351655, 110: 0.05502748499986784, 43: 0.05473448984499586, 117: 0.05314646348940971, 116: 0.053124