# Run Dataset

```
submit_subjects \
  --upload_metadata \
  --save_details \
  --stagger \
  -q reTHINQ-c5-spot \
  -t 1.0.0-rc.11 \
  -I s3://cmet-scratch/maclaren-cmeds/ \
  -o s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/
```

# Copy Data Locally

```
mkdir -p /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
cd /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4
aws s3 cp s3://cmet-scratch/maclaren-cmeds/demographics.tsv .
aws s3 cp \
  --recursive \
  --exclude "*" \
  --include "*subject_info.json" \
  --include "*.pdf" \
  s3://cmet-scratch/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/maclaren-cmeds/ .
find . -type d -name 'cache' -exec rm -rf {} \;
```

In [1]:
import json
import os
import fnmatch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# imports find_json_files(); load_json_file(); load_dataset(), add_regions();
from cmeds import *
# imports calc_cvs(); session_permute(); monte_carlo_perm_test
from test_retest import *

In [2]:
# Local Vars
maclaren_dir = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/'
maclaren_tsv = '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/demographics.tsv'

In [3]:
# Load MacLaren data into dataframes.  For volumetric data we can either load in vals in mm^3, or %icv.
# And each load returns the measurement value as well as the normative percentile estimate
maclaren_vol_df, maclaren_vol_norm_df = load_dataset(maclaren_dir, maclaren_tsv, drop_subjects=[], vol_data_src='volume')
maclaren_picv_df, maclaren_picv_norm_df = load_dataset(maclaren_dir, maclaren_tsv, drop_subjects=[], vol_data_src='volume_percent_icv')

FileNotFoundError: [Errno 2] File /home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/demographics.tsv does not exist: '/home/paul/cmet/data/20200609-mclaren-1.0.0-rc.11-42-g8d976b0--take4/demographics.tsv'

In [4]:
# Add some lateral regions together so we can directly compare to table 1 in [1]
regions = [             
            [ ['Left-Hippocampus', 'Right-Hippocampus'],'Hippocampus' ],
            [ ['Left-Lateral-Ventricle', 'Right-Lateral-Ventricle'],'Lateral-Ventricles' ],
            [ ['Left-Amygdala', 'Right-Amygdala',],'Amygdala' ],
            [ ['Left-Putamen', 'Right-Putamen'],'Putamen' ],
            [ ['Left-Caudate', 'Right-Caudate'],'Caudate' ],
            [ ['Left-Thalamus', 'Right-Thalamus'],'Thalamus' ]
          ]
maclaren_vol_df = add_regions(maclaren_vol_df,regions)
maclaren_vol_norm_df = add_regions(maclaren_vol_norm_df,regions)
maclaren_picv_df = add_regions(maclaren_picv_df,regions)
maclaren_picv_norm_df = add_regions(maclaren_picv_norm_df,regions)

In [11]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Cerebellum',
    'Left-Hippocampus',
    'Left-Lateral-Ventricle',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-White-Matter',
    'Right-Amygdala',
    'Right-Caudate',
    'Right-Cerebellum',
    'Right-Hippocampus',
    'Right-Lateral-Ventricle',
    'Right-Putamen',
    'Right-Thalamus',
    'Right-White-Matter',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_occipital_volume',
    'lh_parietal_volume',
    'lh_temporal_volume',
    'rh_cortex_volume',
    'rh_frontal_volume',
    'rh_occipital_volume',
    'rh_parietal_volume',
    'rh_temporal_volume',
    ]

In [29]:
structs_of_interest = [
    'BrainSegVolNotVentSurf',
    'TotalGrayVol',
    'White-Matter',
    'lh_cortex_volume',
    'lh_frontal_volume',
    'lh_parietal_volume',
    'lh_occipital_volume',
    'lh_temporal_volume',
    'Left-White-Matter',
    'Left-Lateral-Ventricle',
    'Left-Hippocampus',
    'Left-Amygdala',
    'Left-Caudate',
    'Left-Putamen',
    'Left-Thalamus',
    'Left-Cerebellum',
    ]

In [7]:
structs_of_interest = [ 
    'Hippocampus', 
    'Lateral-Ventricles', 
    'Amygdala',
    'Putamen',
    'Caudate',
    'Thalamus'
]

In [8]:
# The column name that holds session info in the demographics.tsv
session_col='session'
# The column name that holds subject info in the demographics.tsv
subject_col='subject_num'

session_list= [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
subject_list= [2,3]

# The MacLaren dataset can be processed by either the maclaren method, or the generalized gluer method.
# A good sanity check is that both methods give the same results for this dataset
cvs_macmethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='maclaren')
cvs_gluemethod = calc_cvs(maclaren_vol_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='gluer')

# Same as above but use percent_icv data as input instead of mm^3
cvs_macmethod_icv = calc_cvs(maclaren_picv_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='maclaren')
cvs_gluemethod_icv = calc_cvs(maclaren_picv_df,subject_list,session_list,subject_col,session_col,structs_of_interest,method='gluer')

## This should be comprable to Table 1 in [1]

In [9]:
n = 1000
monte_carlo_perm_test(maclaren_vol_df, subject_list, session_list, subject_col, session_col, structs_of_interest, n_itrs=n, method='gluer')

Unnamed: 0,Hippocampus,Lateral-Ventricles,Amygdala,Putamen,Caudate,Thalamus
mean-vol,8660.34375,15643.9925,3290.56625,11229.76125,7051.12625,13035.01375
total-cov,0.732712,1.428306,1.213812,1.103016,0.928835,0.625535
session-cov,0.754715,0.950413,1.216737,0.979911,0.789903,0.542555
abs-diff-cov,0.022003,0.477893,0.002925,0.123105,0.138931,0.08298
p-vals,0.711,0.0,0.986,0.171,0.05,0.079


In [12]:
n = 1000
monte_carlo_perm_test(maclaren_vol_norm_df, subject_list, session_list, subject_col, session_col, structs_of_interest, n_itrs=n, method='gluer')

Unnamed: 0,BrainSegVolNotVentSurf,Left-Amygdala,Left-Caudate,Left-Cerebellum,Left-Hippocampus,Left-Lateral-Ventricle,Left-Putamen,Left-Thalamus,Left-White-Matter,Right-Amygdala,...,lh_cortex_volume,lh_frontal_volume,lh_occipital_volume,lh_parietal_volume,lh_temporal_volume,rh_cortex_volume,rh_frontal_volume,rh_occipital_volume,rh_parietal_volume,rh_temporal_volume
mean-vol,55.011868,37.421335,41.405132,55.869449,44.693023,47.220954,61.083857,58.992008,23.996521,50.761324,...,74.533588,92.773924,62.698019,33.92627,61.182705,81.33172,78.547911,62.180212,65.054309,85.936847
total-cov,7.308646,26.006592,12.772852,11.324928,16.788254,5.110548,21.915473,20.446066,13.538717,16.491264,...,10.809711,3.993229,9.235421,29.101389,24.089442,7.136662,9.597228,9.123576,11.204808,5.44443
session-cov,5.078402,26.380076,11.134683,5.28008,21.163843,3.47318,15.392453,16.347158,10.57611,15.917936,...,8.917491,3.020233,7.469712,19.67608,17.710075,6.21479,7.945281,8.171012,11.545097,4.877856
abs-diff-cov,2.230244,0.373485,1.638169,6.044849,4.37559,1.637368,6.52302,4.098907,2.962607,0.573329,...,1.89222,0.972996,1.765709,9.425309,6.379367,0.921872,1.651947,0.952564,0.340289,0.566574
p-vals,0.001,0.878,0.145,0.0,0.004,0.0,0.002,0.076,0.008,0.727,...,0.098,0.014,0.061,0.0,0.013,0.16,0.022,0.328,0.734,0.194
