# Analysis - zBrains outputs at 3T and 7T in epilepsy
1. zBrain/wBrain (surface)  
    a. Histograms of vertex wise scores  
        i. sub-comparisons with different smoothing kernels  
    b. Quantifying extreme vertex groups  
        i. number of identified abnormal areas  
        ii. size of each abnormal area (number of adjacent extreme vertices)  
2. Brainstats (surface)  
    a. t-scores for 3T and 7T  
    b. cohen's D map between 3T and 7T images  


## 0. Initialize

In [None]:
import os, sys
import pandas as pd
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import vrtx
import plots

sys.path.append('/host/verges/tank/data/daniel/')  # Replace with the path to Utils
from Utils import gen


In [None]:
# print HHMM_DD-MMM-YYYY
date = dt.datetime.now().strftime('%d%b%Y_%H%M')
#print(f'Running on {date}')

In [None]:
import importlib

importlib.reload(vrtx)
importlib.reload(gen)
importlib.reload(plots)

In [None]:
# define directories
output_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs"
values_dir = "values"
processed_output_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/fig_stats"


# 3T-7T ID correspondence
correps_IDs = {
    "path": "/host/verges/tank/data/daniel/3T7T/z/data/pt/IDs_ses_analyses_12Mar.csv",
    "3T_ID": "3T_ID",
    "7T_ID": "7T_ID",
    "3T_SES": "3T_SES",
    "7T_SES": "7T_SES"
}

#id_corresp = pd.read_csv(corresp_ID)

# Study names
MICs = {"name": "MICs"}

PNI = {"name": "PNI"}

#studies = ["MICs", "PNI"]

# zBrain analysis regions
cortex = {
    "region": "cortex",
    "surfaces": ["midthickness", "white"],
    "resolution": "32k",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [10]
    "smoothing": [2,5,10]
}

hippocampus = {
    "region": "hippocampus",
    "surfaces": ["midthickness"],
    "resolution": "0p5mm",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [5]
    "smoothing": [1,2,5]
}

subcortex = {
    "region": "subcortex",
    "features": ["ADC", "T1map", "volume"],
    "smoothing": [2,5,10]
}

regions = [cortex, hippocampus, subcortex]

In [None]:
# get list of corresponding 3T, 7T aggregate files
files_lst = plots.corresp_paths(regions, MICs, PNI, output_dir, values_dir)
#print(files_lst)
shape = gen.lstOlst_shape(files_lst,print=False)
print(f"raw shape of files_lst (num files, num studies): {shape}")

# get missing files
missing = plots.get_missingPths(files_lst)

# remove missing files from list
for m in missing:
    files_lst.remove(m)

shape = gen.lstOlst_shape(files_lst,print=False)
print(f"shape of files_lst (num files, num studies): {shape}")

In [None]:
print(files_lst)

In [None]:
importlib.reload(vrtx)

# summary statistics & prepare for group hists
- All analysed PX vs all PNE for each file type


In [None]:
# get summary stats for each file type
df_summary = pd.DataFrame()
clamp_files_lst = []
for lst in files_lst:
    clamp_lst = []
    for file in lst:
        print(os.path.basename(file))
        df = vrtx.summaryStats(file)
        df.insert(df.columns.get_loc("study") + 1, "region", os.path.dirname(file).split('/')[-1])
        
        df_summary = pd.concat([df_summary, df])

        # clamp values
        df_clamped = vrtx.clamp(file)
        
        if (df["study"] == "MICs").all():
            study = "MICs"
        elif (df["study"] == "PNI").all():
            study = "PNI"

        
        if (df["region"] == cortex).all():
            region = "cortex"
        elif (df["region"] == hippocampus).all():
            region = "hippocampus"
        elif (df["region"] == subcortex).all():
            region = "subcortex"
        
        clamp_name = os.path.basename(file).replace('.csv', '_clamp.csv')
        clamp_pth = os.path.join(output_dir,"values", study, region, "clamp", clamp_name)

        df_clamped.to_csv(clamp_pth, index=False)
        print(f"Clamped values saved to {clamp_pth}")
        clamp_lst.append(clamp_pth)
    clamp_files_lst.append(clamp_lst)

print(clamp_files_lst)

In [None]:
out_pth = os.path.join(processed_output_dir, f"sumStats_{date}.csv")
df_summary.to_csv(out_pth, index=False)
print  (f"Summary stats saved to {out_pth}")

In [None]:
out_pth = os.path.join(processed_output_dir, f"sumStats_{date}.csv")
df_summary.to_csv(out_pth, index=False)
print  (f"Summary stats saved to {out_pth}")

In [None]:
#df_summary

In [None]:
importlib.reload(plots)

In [None]:
# group histograms

for files in clamp_files_lst:
    # check that base names are the same except for the study
    histName_mics = os.path.basename(files[0])
    histName_mics = histName_mics.split('_')

    histName_pni = os.path.basename(files[1])
    histName_pni = histName_pni.split('_')

    if histName_mics[1:] != histName_pni[1:]:
        print("Error: file names do not match. Skipping: \n\t%s\n\t%s" %(histName_mics, histName_pni))
        continue
    else:
        #print("File names match")
        pass

    hemi = histName_mics[1].split('-')[1]
    lbl = histName_mics[3].split('-')[1]
    feat = histName_mics[4].split('-')[1]
    smth = histName_mics[5].split('-')[1]
    #print(f"hemisphere: {hemi}, label: {lbl}, feature: {feat}, smoothing: {smth}")

    title = f"{feat}, smoothing: {smth} ({hemi}, {lbl})"
    #print(title)


    # plot histograms
    save_path = "/host/verges/tank/data/daniel/3T7T/z/outputs/fig_stats/hist_grp"
    save_name = f"grpHist_{feat}_smth-{smth}_{hemi}_{lbl}_{date}.png"
    save = os.path.join(save_path, save_name)
    fig = plots.group_hist(files, labels=[title, "MICs","PNI"], save_path=save)


In [None]:
# Ridge plots (one line per participant)
# plot histogram

# check that file exists
# if not, continue to next file
for file in files_lst:
        
        
        # read in data
        df_mics = pd.read_csv(mics_file, index_col=False)
        df_pni = pd.read_csv(pni_file, index_col=False)
        # remove participants with Na in either df
        # df_mics = df_mics.dropna()
        # df_pni = df_pni.dropna()
        
        # keep only overlapping participants both dfs
        
        ## need to remap col names according to 3T-7T ID correspondence

        ## keep only overlapping columns
        # cols = df_mics.columns.intersection(df_pni.columns)
        # df_mics = df_mics[cols]
        # df_pni = df_pni[cols]

        ## Take histogram for each participant

        # print(df_mics.head())
        break
        # # construct histogram
        # fig = plots.ridge(df_mics, matrix_df = df_mics)
        # # show histogram
        # fig.show()

In [None]:

        print(path)
        
        df = pd.read_csv(path)
        fig = plots.histStack(df)
        # display plot
        fig.show()