# Goals:
## 1. Design and implement an analysis pipeline to highlight the important findings of each experiment
## 2. Visualize results in a way to make it ready for publications
## 3. Compare different experiments based on a given factor to find the optimum pipeline for a given cohort

### The idea is to create a draft for an analysis script to be added to the src folder

## McNemar’s Test to Compare Two Machine Learning Classifiers

In [2]:
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import dill
import nibabel.freesurfer.io as fio
import sys
import json
sns.set()

In [3]:
"""
This directory is created when running python main.py -i ../experiments/whatevername -o ../output/whateverfolder. It assumes
that inside this "main_exp_directoriey", There are multiple directories containing the results of different experiments being
factored by a single factor (i.e. different behavioral test, different data representation, different normalization method,
 ... etc) JUST A SINGLE FACTOR
"""
main_exp_directory = "../output/Agebetween10t13_severTD_alltests_minmax_percentile" 

In [4]:
sub_exps = [x for x in os.listdir(main_exp_directory) if os.path.isdir(os.path.join(main_exp_directory, x))]

In [5]:
sub_exps, len(sub_exps)

(['comm_median_std_sever_td',
  'awa_median_std_sever_td',
  'man_median_std_sever_td',
  'cog_median_std_sever_td',
  'tot_median_std_sever_td',
  'mot_median_std_sever_td'],
 6)

In [6]:
# Main analyzing loop
for exp in sub_exps:
    exp_descr = exp.split('_')[0]
    print(f"Results of {exp_descr}")
    full_path = os.path.join(main_exp_directory, exp)


Results of comm
Results of awa
Results of man
Results of cog
Results of tot
Results of mot


In [7]:
# Analysis of RFE
def analyzeRFE(fldr):
    with open(os.path.join(fldr, 'selected_feats.json'), 'r') as f:
        feats_dict = json.load(f)
    df = pd.read_csv(os.path.join(fldr, "group_df_afterFixation.csv"))
    print(f"Total # of features: {len(df.columns)}")
    df_pseudo = pd.read_csv(os.path.join(fldr, "pseudo_metrics.csv"), index_col=[0,1])
    best_rfe = df_pseudo.index[df_pseudo['acc'].argmax()][0]
    print(f'Best RFE based on pseudo metrics is: {best_rfe}')
    sfeats = feats_dict[best_rfe]
    print(f'# of selected features using {best_rfe} is {len(sfeats)}')
    return sfeats
        

In [12]:
# Analysis of ML
def analyzeML(fldr):
    with open(os.path.join(fldr, 'ML_obj.p'), 'rb') as f:
        ml_dict = dill.load(f)
    return ml_dict

In [8]:
comm_feats = analyzeRFE(os.path.join(main_exp_directory, "comm_median_std_sever_td"))
man_feats = analyzeRFE(os.path.join(main_exp_directory, "man_median_std_sever_td"))
cog_feats = analyzeRFE(os.path.join(main_exp_directory, "cog_median_std_sever_td"))
mot_feats = analyzeRFE(os.path.join(main_exp_directory, "mot_median_std_sever_td"))
tot_feats = analyzeRFE(os.path.join(main_exp_directory, "tot_median_std_sever_td"))
awa_feats = analyzeRFE(os.path.join(main_exp_directory, "awa_median_std_sever_td"))


Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 385
Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 101
Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 18
Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 37
Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 233
Total # of features: 1095
Best RFE based on pseudo metrics is: lsvm
# of selected features using lsvm is 275


# What are the most common features among all reports?

In [9]:
all_selected_feats = []
all_selected_feats.extend(comm_feats)
all_selected_feats.extend(man_feats)
all_selected_feats.extend(cog_feats)
all_selected_feats.extend(mot_feats)
all_selected_feats.extend(tot_feats)
all_selected_feats.extend(awa_feats)


In [12]:
from collections import Counter

In [16]:
sored_sfeats_occur = sorted(Counter(all_selected_feats).items(), key=lambda kv:(kv[1],kv[0]), reverse=True)

In [22]:
sored_sfeats_occur[:200]

[('volume_rtemporalpole_PERC80', 6),
 ('area_ltransversetemporal_PERC60', 6),
 ('volume_rinferiortemporal_PERC80', 5),
 ('thickness_rtransversetemporal_PERC80', 5),
 ('thickness_rinsula_PERC80', 5),
 ('curv_rfusiform_PERC80', 5),
 ('curv_lposteriorcingulate_PERC20', 5),
 ('curv_lpostcentral_PERC20', 5),
 ('area_rlateraloccipital_PERC80', 5),
 ('area_rinferiorparietal_PERC80', 5),
 ('area_ltransversetemporal_PERC80', 5),
 ('area_listhmuscingulate_PERC80', 5),
 ('area_linsula_PERC80', 5),
 ('volume_rtransversetemporal_PERC60', 4),
 ('volume_rparsorbitalis_PERC40', 4),
 ('volume_rparsopercularis_PERC60', 4),
 ('volume_rcaudalmiddlefrontal_PERC60', 4),
 ('volume_lsuperiorparietal_PERC80', 4),
 ('volume_lparsopercularis_PERC80', 4),
 ('thickness_rparacentral_PERC80', 4),
 ('thickness_rbankssts_PERC80', 4),
 ('thickness_ltransversetemporal_PERC40', 4),
 ('thickness_lsupramarginal_PERC60', 4),
 ('thickness_lrostralanteriorcingulate_PERC80', 4),
 ('thickness_lcaudalmiddlefrontal_PERC80', 4),
 

In [20]:
 len([('volume_rtransversetemporal_PERC60', 4),
 ('volume_rparsorbitalis_PERC40', 4),
 ('volume_rparsopercularis_PERC60', 4),
 ('volume_rcaudalmiddlefrontal_PERC60', 4),
 ('volume_lsuperiorparietal_PERC80', 4),
 ('volume_lparsopercularis_PERC80', 4),
 ('thickness_rparacentral_PERC80', 4),
 ('thickness_rbankssts_PERC80', 4),
 ('thickness_ltransversetemporal_PERC40', 4),
 ('thickness_lsupramarginal_PERC60', 4),
 ('thickness_lrostralanteriorcingulate_PERC80', 4),
 ('thickness_lcaudalmiddlefrontal_PERC80', 4),
 ('curv_rtemporalpole_PERC60', 4),
 ('curv_rsupramarginal_PERC40', 4),
 ('curv_rsuperiortemporal_PERC40', 4),
 ('curv_rsuperiorparietal_PERC20', 4),
 ('curv_rprecentral_PERC20', 4),
 ('curv_rpostcentral_PERC40', 4),
 ('curv_rparsopercularis_PERC80', 4),
 ('curv_rmiddletemporal_PERC60', 4),
 ('curv_rlateraloccipital_PERC60', 4),
 ('curv_rinferiorparietal_PERC20', 4),
 ('curv_rentorhinal_PERC80', 4),
 ('curv_rcuneus_PERC80', 4),
 ('curv_rcuneus_PERC60', 4),
 ('curv_rcuneus_PERC20', 4),
 ('curv_rcaudalmiddlefrontal_PERC20', 4),
 ('curv_ltemporalpole_PERC80', 4),
 ('curv_lprecuneus_PERC20', 4),
 ('curv_lparstriangularis_PERC60', 4),
 ('curv_lparsopercularis_PERC80', 4),
 ('curv_lparsopercularis_PERC20', 4),
 ('curv_llingual_PERC80', 4),
 ('curv_llateralorbitofrontal_PERC80', 4),
 ('curv_linferiorparietal_PERC20', 4),
 ('curv_lcaudalanteriorcingulate_PERC80', 4),
 ('area_rsupramarginal_PERC40', 4),
 ('area_rparstriangularis_PERC60', 4),
 ('area_rparsopercularis_PERC40', 4),
 ('area_rlateraloccipital_PERC60', 4),
 ('area_rcuneus_PERC60', 4),
 ('area_rcaudalmiddlefrontal_PERC80', 4),
 ('area_lmedialorbitofrontal_PERC20', 4),
 ('area_lentorhinal_PERC80', 4),
 ('area_lcaudalmiddlefrontal_PERC80', 4)])

45

In [23]:
 len([('volume_rtransversetemporal_PERC80', 3),
 ('volume_rparsorbitalis_PERC60', 3),
 ('volume_rparsopercularis_PERC80', 3),
 ('volume_risthmuscingulate_PERC80', 3),
 ('volume_rinferiorparietal_PERC80', 3),
 ('volume_rfrontalpole_PERC80', 3),
 ('volume_rcuneus_PERC60', 3),
 ('volume_ltransversetemporal_PERC20', 3),
 ('volume_lposteriorcingulate_PERC60', 3),
 ('volume_lpostcentral_PERC80', 3),
 ('volume_lpericalcarine_PERC60', 3),
 ('volume_lpericalcarine_PERC20', 3),
 ('volume_lfrontalpole_PERC80', 3),
 ('volume_lfrontalpole_PERC60', 3),
 ('volume_lfrontalpole_PERC40', 3),
 ('volume_lcaudalanteriorcingulate_PERC40', 3),
 ('volume_lcaudalanteriorcingulate_PERC20', 3),
 ('volume_lbankssts_PERC20', 3),
 ('thickness_rtransversetemporal_PERC60', 3),
 ('thickness_rparsorbitalis_PERC80', 3),
 ('thickness_rparsopercularis_PERC80', 3),
 ('thickness_rparacentral_PERC60', 3),
 ('thickness_rparacentral_PERC20', 3),
 ('thickness_rmedialorbitofrontal_PERC20', 3),
 ('thickness_rlateralorbitofrontal_PERC60', 3),
 ('thickness_rlateralorbitofrontal_PERC20', 3),
 ('thickness_risthmuscingulate_PERC20', 3),
 ('thickness_rbankssts_PERC60', 3),
 ('thickness_ltransversetemporal_PERC60', 3),
 ('thickness_lpericalcarine_PERC20', 3),
 ('thickness_lparstriangularis_PERC60', 3),
 ('thickness_lparstriangularis_PERC40', 3),
 ('thickness_lparahippocampal_PERC80', 3),
 ('thickness_listhmuscingulate_PERC80', 3),
 ('curv_rtransversetemporal_PERC80', 3),
 ('curv_rsupramarginal_PERC20', 3),
 ('curv_rsuperiortemporal_PERC20', 3),
 ('curv_rsuperiorparietal_PERC40', 3),
 ('curv_rrostralmiddlefrontal_PERC20', 3),
 ('curv_rrostralanteriorcingulate_PERC60', 3),
 ('curv_rprecuneus_PERC40', 3),
 ('curv_rprecuneus_PERC20', 3),
 ('curv_rparahippocampal_PERC80', 3),
 ('curv_rparacentral_PERC80', 3),
 ('curv_rmedialorbitofrontal_PERC40', 3),
 ('curv_rlingual_PERC80', 3),
 ('curv_rlateraloccipital_PERC80', 3),
 ('curv_rinsula_PERC60', 3),
 ('curv_rinsula_PERC40', 3),
 ('curv_rinferiorparietal_PERC80', 3),
 ('curv_rinferiorparietal_PERC40', 3),
 ('curv_rentorhinal_PERC60', 3),
 ('curv_rentorhinal_PERC40', 3),
 ('curv_rcuneus_PERC40', 3),
 ('curv_rcaudalanteriorcingulate_PERC40', 3),
 ('curv_rbankssts_PERC80', 3),
 ('curv_ltransversetemporal_PERC60', 3),
 ('curv_ltemporalpole_PERC60', 3),
 ('curv_lsupramarginal_PERC80', 3),
 ('curv_lsuperiortemporal_PERC80', 3),
 ('curv_lsuperiorparietal_PERC20', 3),
 ('curv_lposteriorcingulate_PERC40', 3),
 ('curv_lparstriangularis_PERC40', 3),
 ('curv_lparsopercularis_PERC40', 3),
 ('curv_lparahippocampal_PERC80', 3),
 ('curv_lparahippocampal_PERC60', 3),
 ('curv_lmedialorbitofrontal_PERC80', 3),
 ('curv_llingual_PERC60', 3),
 ('curv_linferiortemporal_PERC20', 3),
 ('curv_lcaudalanteriorcingulate_PERC60', 3),
 ('curv_lbankssts_PERC20', 3),
 ('area_rtransversetemporal_PERC60', 3),
 ('area_rtransversetemporal_PERC20', 3),
 ('area_rprecuneus_PERC80', 3),
 ('area_rpostcentral_PERC20', 3),
 ('area_rparstriangularis_PERC40', 3),
 ('area_rparsorbitalis_PERC80', 3),
 ('area_rparacentral_PERC60', 3),
 ('area_rparacentral_PERC40', 3),
 ('area_rmedialorbitofrontal_PERC40', 3),
 ('area_risthmuscingulate_PERC20', 3),
 ('area_rinferiortemporal_PERC40', 3),
 ('area_rinferiorparietal_PERC60', 3),
 ('area_rentorhinal_PERC60', 3),
 ('area_rentorhinal_PERC40', 3),
 ('area_rcaudalmiddlefrontal_PERC60', 3),
 ('area_lrostralanteriorcingulate_PERC80', 3),
 ('area_lprecuneus_PERC20', 3),
 ('area_lpostcentral_PERC20', 3),
 ('area_lparstriangularis_PERC20', 3),
 ('area_lparahippocampal_PERC40', 3),
 ('area_lfusiform_PERC60', 3),
 ('area_lcaudalanteriorcingulate_PERC40', 3),
 ('area_lbankssts_PERC80', 3)])

94

# Findings
### There are 2 features found in all 6 reports
### There are 10 features found in 5 reports
### There are 45 features found in 4 reports
### There are 94 features found in 3 reports


# What are the most common morphological features?

In [29]:
len(all_selected_feats)

1049

In [30]:
morph_feats = list(map(lambda x: x.split('_')[0], all_selected_feats))

In [33]:
sorted(Counter(morph_feats).items(), key=lambda kv:(kv[1],kv[0]), reverse=True)

[('curv', 369), ('area', 258), ('volume', 213), ('thickness', 209)]

# What are the most common brain regions?

In [35]:
breg_feats = list(map(lambda x: x.split('_')[1], all_selected_feats))

In [36]:
sorted(Counter(breg_feats).items(), key=lambda kv:(kv[1],kv[0]), reverse=True)

[('ltransversetemporal', 34),
 ('rcuneus', 32),
 ('rtransversetemporal', 30),
 ('rparsopercularis', 26),
 ('lcaudalanteriorcingulate', 26),
 ('rinferiorparietal', 25),
 ('rcaudalmiddlefrontal', 25),
 ('rparsorbitalis', 24),
 ('lparstriangularis', 24),
 ('rparacentral', 23),
 ('lparsopercularis', 23),
 ('rentorhinal', 22),
 ('lsuperiorparietal', 22),
 ('rparstriangularis', 21),
 ('lpostcentral', 21),
 ('lparahippocampal', 21),
 ('listhmuscingulate', 21),
 ('lposteriorcingulate', 19),
 ('rmedialorbitofrontal', 18),
 ('rlateraloccipital', 18),
 ('risthmuscingulate', 18),
 ('lpericalcarine', 18),
 ('rsuperiortemporal', 17),
 ('rbankssts', 17),
 ('lbankssts', 17),
 ('rtemporalpole', 16),
 ('rsupramarginal', 16),
 ('rpostcentral', 16),
 ('lfusiform', 16),
 ('lfrontalpole', 16),
 ('rprecuneus', 15),
 ('rlateralorbitofrontal', 15),
 ('rinferiortemporal', 15),
 ('lrostralanteriorcingulate', 15),
 ('llingual', 15),
 ('linsula', 15),
 ('lcaudalmiddlefrontal', 15),
 ('rparahippocampal', 14),
 ('rm

# Which hemisphere is mostly involved?

In [37]:
hemi = list(map(lambda x: x[0], breg_feats))

In [38]:
sorted(Counter(hemi).items(), key=lambda kv:(kv[1],kv[0]), reverse=True)

[('r', 559), ('l', 490)]

In [39]:
559/(490+559)

0.5328884652049571