# Run Dataset

```
submit_subjects \
  --upload_metadata \
  --save_details \
  --stagger \
  -q reTHINQ-c5-spot \
  -t 1.0.0-rc.11 \
  -I s3://cmet-scratch/maclaren-cmeds/ \
  -o s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/
```

# Copy Data Locally

```
mkdir -p /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
cd /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
aws s3 cp s3://cmet-scratch/maclaren-cmeds/demographics.tsv .
aws s3 cp \
  --recursive \
  --exclude "*" \
  --include "*subject_info.json" \
  --include "*.pdf" \
  s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/maclaren-cmeds/ .
find . -type d -name 'cache' -exec rm -rf {} \;
```

In [1]:
import json
import os
import fnmatch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# imports find_json_files(); load_json_file(); load_dataset();
from cmeds import *
# imports calc_cvs(); session_permute(); monte_carlo_perm_test
from test_retest import *

In [2]:
# Local Vars
maclaren_dir = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/'
maclaren_tsv = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/demographics.tsv'

In [3]:
# Load MacLaren data into dataframes
maclaren_vol_df, maclaren_norm_df = load_dataset(maclaren_dir, maclaren_tsv)

Ignoring Subject (did it error out?) sub-01_run-39
Ignoring Subject (did it error out?) sub-01_run-02
Ignoring Subject (did it error out?) sub-01_run-09
Ignoring Subject (did it error out?) sub-01_run-08
Ignoring Subject (did it error out?) sub-01_run-24
Ignoring Subject (did it error out?) sub-01_run-33
Ignoring Subject (did it error out?) sub-01_run-13
Ignoring Subject (did it error out?) sub-01_run-16
Ignoring Subject (did it error out?) sub-01_run-14
Ignoring Subject (did it error out?) sub-01_run-32
Ignoring Subject (did it error out?) sub-01_run-01
Ignoring Subject (did it error out?) sub-01_run-36
Ignoring Subject (did it error out?) sub-01_run-06
Ignoring Subject (did it error out?) sub-01_run-26
Ignoring Subject (did it error out?) sub-01_run-27
Ignoring Subject (did it error out?) sub-01_run-40
Ignoring Subject (did it error out?) sub-01_run-03
Ignoring Subject (did it error out?) sub-01_run-19
Ignoring Subject (did it error out?) sub-01_run-18
Ignoring Subject (did it error 

In [7]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Cerebellum',
    'Left-Hippocampus',
    'Left-Lateral-Ventricle',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-White-Matter',
    'Right-Amygdala',
    'Right-Caudate',
    'Right-Cerebellum',
    'Right-Hippocampus',
    'Right-Lateral-Ventricle',
    'Right-Putamen',
    'Right-Thalamus',
    'Right-White-Matter',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_occipital_volume',
    'lh_parietal_volume',
    'lh_temporal_volume',
    'rh_cortex_volume',
    'rh_frontal_volume',
    'rh_occipital_volume',
    'rh_parietal_volume',
    'rh_temporal_volume',
    ]

In [5]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_parietal_volume',
    'lh_occipital_volume',
    'lh_temporal_volume',
    'Left-White-Matter',
    'Left-Lateral-Ventricle',
    'Left-Hippocampus',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-Cerebellum',
    ]

In [6]:
# The column name that holds session info in the demographics.tsv
session_col='session'
# The column name that holds subject info in the demographics.tsv
subject_col='subject_num'

session_list= [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
subject_list= [2,3]

# The MacLaren dataset can be processed by either the maclaren method, or the generalized gluer method.
# A good sanity check is that both methods give the same results for this dataset
cvs_macmethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='maclaren')
cvs_gluemethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='gluer')

## This should be comprable to Table 1 in [1]

In [9]:
n = 1000
monte_carlo_perm_test(maclaren_vol_df, subject_list, session_list, subject_col, session_col, structs_of_interest, n_itrs=n, method='gluer')

Unnamed: 0,BrainSegVolNotVentSurf,Left-Amygdala,Left-Caudate,Left-Cerebellum,Left-Hippocampus,Left-Lateral-Ventricle,Left-Putamen,Left-Thalamus,Left-White-Matter,Right-Amygdala,...,lh_cortex_volume,lh_frontal_volume,lh_occipital_volume,lh_parietal_volume,lh_temporal_volume,rh_cortex_volume,rh_frontal_volume,rh_occipital_volume,rh_parietal_volume,rh_temporal_volume
mean-vol,73.162649,0.102252,0.224333,4.423637,0.283202,0.558095,0.370266,0.436063,15.682639,0.114649,...,16.573086,6.310468,1.588643,3.603883,3.866487,16.613011,6.055136,1.659518,3.862087,3.869639
total-cov,0.281098,1.983079,1.348791,0.737636,1.091552,1.738994,2.180141,0.921,0.617627,1.462906,...,1.139878,1.21709,1.588815,1.875929,2.098227,0.923001,1.389838,1.266195,1.236199,1.095878
session-cov,0.191779,1.917091,1.12088,0.413556,1.303782,1.096777,1.831705,0.843307,0.528821,1.444954,...,0.870651,0.915012,1.115156,1.309606,1.312375,0.790312,1.155843,1.150174,1.203751,1.013302
abs-diff-cov,0.089318,0.065988,0.227911,0.32408,0.21223,0.642217,0.348435,0.077693,0.088805,0.017952,...,0.269228,0.302078,0.473659,0.566323,0.785852,0.132689,0.233996,0.116021,0.032448,0.082576
p-vals,0.0,0.687,0.04,0.0,0.017,0.0,0.036,0.304,0.087,0.888,...,0.005,0.003,0.001,0.0,0.0,0.074,0.039,0.238,0.745,0.348
