# Run Dataset

```
submit_subjects \
  --upload_metadata \
  --save_details \
  --stagger \
  -q reTHINQ-c5-spot \
  -t 1.0.0-rc.11 \
  -I s3://cmet-scratch/maclaren-cmeds/ \
  -o s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/
```

# Copy Data Locally

```
mkdir -p /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
cd /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
aws s3 cp s3://cmet-scratch/maclaren-cmeds/demographics.tsv .
aws s3 cp \
  --recursive \
  --exclude "*" \
  --include "*subject_info.json" \
  --include "*.pdf" \
  s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/maclaren-cmeds/ .
find . -type d -name 'cache' -exec rm -rf {} \;
```

In [1]:
import json
import os
import fnmatch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# imports find_json_files(); load_json_file(); load_dataset();
from cmeds import *
# imports calc_cvs(); session_permute(); monte_carlo_perm_test
from test_retest import *

In [2]:
# Local Vars
maclaren_dir = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/'
maclaren_tsv = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/demographics.tsv'

In [4]:
# Load MacLaren data into dataframes.  For volumetric data we can either load in vals in mm^3, or %icv.
# And each load returns the measurement value as well as the normative percentile estimate
maclaren_vol_df, maclaren_vol_norm_df = load_dataset(maclaren_dir, maclaren_tsv, drop_subjects=[], vol_data_src='volume')
maclaren_picv_df, maclaren_picv_norm_df = load_dataset(maclaren_dir, maclaren_tsv, drop_subjects=[], vol_data_src='volume_percent_icv')

Ignoring Subject (did it error out?) sub-01_run-39
Ignoring Subject (did it error out?) sub-01_run-02
Ignoring Subject (did it error out?) sub-01_run-09
Ignoring Subject (did it error out?) sub-01_run-08
Ignoring Subject (did it error out?) sub-01_run-24
Ignoring Subject (did it error out?) sub-01_run-33
Ignoring Subject (did it error out?) sub-01_run-13
Ignoring Subject (did it error out?) sub-01_run-16
Ignoring Subject (did it error out?) sub-01_run-14
Ignoring Subject (did it error out?) sub-01_run-32
Ignoring Subject (did it error out?) sub-01_run-01
Ignoring Subject (did it error out?) sub-01_run-36
Ignoring Subject (did it error out?) sub-01_run-06
Ignoring Subject (did it error out?) sub-01_run-26
Ignoring Subject (did it error out?) sub-01_run-27
Ignoring Subject (did it error out?) sub-01_run-40
Ignoring Subject (did it error out?) sub-01_run-03
Ignoring Subject (did it error out?) sub-01_run-19
Ignoring Subject (did it error out?) sub-01_run-18
Ignoring Subject (did it error 

In [4]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Cerebellum',
    'Left-Hippocampus',
    'Left-Lateral-Ventricle',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-White-Matter',
    'Right-Amygdala',
    'Right-Caudate',
    'Right-Cerebellum',
    'Right-Hippocampus',
    'Right-Lateral-Ventricle',
    'Right-Putamen',
    'Right-Thalamus',
    'Right-White-Matter',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_occipital_volume',
    'lh_parietal_volume',
    'lh_temporal_volume',
    'rh_cortex_volume',
    'rh_frontal_volume',
    'rh_occipital_volume',
    'rh_parietal_volume',
    'rh_temporal_volume',
    ]

In [6]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_parietal_volume',
    'lh_occipital_volume',
    'lh_temporal_volume',
    'Left-White-Matter',
    'Left-Lateral-Ventricle',
    'Left-Hippocampus',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-Cerebellum',
    ]

In [8]:
# The column name that holds session info in the demographics.tsv
session_col='session'
# The column name that holds subject info in the demographics.tsv
subject_col='subject_num'

session_list= [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
subject_list= [2,3]

# The MacLaren dataset can be processed by either the maclaren method, or the generalized gluer method.
# A good sanity check is that both methods give the same results for this dataset
cvs_macmethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='maclaren')
cvs_gluemethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='gluer')

# Same as above but use percent_icv data as input instead of mm^3
cvs_macmethod_icv = calc_cvs(maclaren_picv_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='maclaren')
cvs_gluemethod_icv = calc_cvs(maclaren_picv_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='gluer')

## This should be comprable to Table 1 in [1]

In [13]:
n = 1000
monte_carlo_perm_test(maclaren_vol_df, subject_list, session_list, subject_col, session_col, structs_of_interest, n_itrs=n, method='gluer')

Unnamed: 0,BrainSegVolNotVentSurf,TotalGrayVol,White-Matter,lh_cortex_volume,lh_frontal_volume,lh_parietal_volume,lh_occipital_volume,lh_temporal_volume,Left-White-Matter,Left-Lateral-Ventricle,Left-Hippocampus,Left-Amygdala,Left-Caudate,Left-Putamen,Left-Thalamus,Left-Cerebellum
mean-vol,1112280.0,604897.9,481112.5875,252287.775,95962.275,54824.475,24258.2625,58952.5625,238154.65,8567.565,4289.37375,1552.05,3417.185,5642.37,6592.445,67584.10375
total-cov,0.4137697,0.677517,0.486445,1.152907,1.303041,1.878735,1.607167,2.02355,0.610462,1.577902,1.099042,1.994225,1.27964,2.168283,0.944958,0.706172
session-cov,0.3274156,0.670641,0.44774,0.940043,0.979732,1.389655,1.192399,1.331097,0.508252,0.967618,1.247299,1.922322,1.12534,1.765608,0.861241,0.319214
abs-diff-cov,0.08635402,0.006876,0.038705,0.212864,0.323309,0.48908,0.414768,0.692453,0.10221,0.610283,0.148257,0.071902,0.1543,0.402675,0.083717,0.386958
p-vals,0.021,0.911,0.361,0.03,0.001,0.001,0.002,0.0,0.028,0.0,0.082,0.689,0.15,0.022,0.265,0.0
