## Read SCR files and compute changes and correlation with brain activity/connectivity

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import glob
import scipy

In [None]:
ses1_files = glob.glob('/media/Data/Lab_Projects/KPE_PTSD_Project/behavioral/results/sub-*_ses-1.csv')
ses2_files = glob.glob('/media/Data/Lab_Projects/KPE_PTSD_Project/behavioral/results/sub-*_ses-2.csv')
ses3_files = glob.glob('/media/Data/Lab_Projects/KPE_PTSD_Project/behavioral/results/sub-*_ses-3.csv')
ses4_files = glob.glob('/media/Data/Lab_Projects/KPE_PTSD_Project/behavioral/results/sub-*_ses-4.csv')
ses1_files.sort()
ses2_files.sort()
ses3_files.sort()
ses4_files.sort()


In [None]:
def generateSCR_data(files):
    # takes scr calculated files (from neurokit2.ipynb) and generates data frame
    subjects = []
    delta1 = []
    delta2 = []
    delta3 = []
    trauma1 = []
    relax1 = []
    trauma1_scr = []
    trauma1_time = []
    for file in files:
        sub = 'KPE' + file.split('sub-')[1].split('_ses')[0]
        # read file
        df = pd.read_csv(file, sep = '\t')
        # calculate contrast trauma - relax
        peakT = np.array(df['EDA_Peak_Amplitude'][df.Condition=='trauma'])
        peakR = np.array(df['EDA_Peak_Amplitude'][df.Condition=='relax'] )
        peakSCR_T = np.array(df['SCR_Peak_Amplitude'][df.Condition=='trauma'])
        peakSCR_Time = np.array(df['SCR_Peak_Amplitude_Time'][df.Condition=='trauma'])
        diff = peakT - peakR
        subjects.append(sub)
        delta1.append(diff[0])
        delta2.append(diff[1])
        delta3.append(diff[2])
        trauma1.append(peakT[0])
        relax1.append(peakR[0])
        trauma1_scr.append(peakSCR_T[0])
        trauma1_time.append(peakSCR_Time[0])
    data = pd.DataFrame({'scr_id':subjects, 'peakTrauma1': trauma1, 'peakRelax': relax1, 'T_R1': delta1,
                        'T_R2':delta2, 'T_R3': delta3, 'trauma1_scrMean': trauma1_scr,
                        'trauma1_timeSCR': trauma1_time})
    return data

In [None]:
data1 = generateSCR_data(ses1_files)
data2 = generateSCR_data(ses2_files)
data3 = generateSCR_data(ses3_files)
data4 = generateSCR_data(ses4_files)

In [None]:
# create dataset that only takes trauma peak
data2 = data2.rename(columns={'peakTrauma1': 'peakTrauma1_ses2'})
data3 = data3.rename(columns={'peakTrauma1': 'peakTrauma1_ses3'})
data4 = data4.rename(columns={'peakTrauma1': 'peakTrauma1_ses4'})

In [None]:
data1

In [None]:
# combine data frames but leave NaN if nothing is availble. 
dataSessions = pd.merge(data1[['scr_id','peakTrauma1']], data2[['scr_id','peakTrauma1_ses2']], how='outer')
dataSessions = pd.merge(dataSessions, data3[['scr_id', 'peakTrauma1_ses3']], how='outer')
dataSessions = pd.merge(dataSessions, data4[['scr_id', 'peakTrauma1_ses4']], how='outer')
dataSessions

In [None]:
medication_cond = pd.read_csv('/home/or/kpe_task_analysis/task_based_analysis/kpe_sub_condition.csv')
subject_list = np.array(medication_cond.scr_id)
condition_label = np.array(medication_cond.med_cond)

group_label = list(map(int, condition_label))
medication_cond['groupIdx'] = medication_cond.med_cond
medication_cond = medication_cond.replace(to_replace={'med_cond': {0.0:'midazolam', 1.0:'ketamine'}})

In [None]:

datAll = pd.merge(medication_cond, dataSessions)
datAll['trauma1_1'] = datAll.peakTrauma1 - datAll.peakTrauma1
datAll['trauma2_1'] = datAll.peakTrauma1_ses2 - datAll.peakTrauma1
datAll['trauma3_1'] = datAll.peakTrauma1_ses3 - datAll.peakTrauma1
datAll['trauma4_1'] = datAll.peakTrauma1_ses4 - datAll.peakTrauma1
datAll

In [None]:
sns.boxplot(y = 'trauma4_1', x= 'med_cond', data=datAll)

## Now lets run Bayesian model of the three data points 

In [None]:
# first - lets melt the data frame
dfAll_melt = datAll.melt(id_vars=['scr_id','groupIdx'],value_vars=['trauma1_1','trauma2_1','trauma3_1','trauma4_1'],
                         var_name='time', value_name='SCRDiff')
sns.boxplot('time','SCRDiff', hue='groupIdx',data=dfAll_melt)

In [None]:
# examine each subjects chagne using plotly
import plotly.express as px
px.line(x='time', y='SCRDiff', facet_col='groupIdx',
          data_frame=dfAll_melt, color='scr_id')

In [None]:
import pymc3 as pm
from pymc3.glm import GLM

with pm.Model() as model_glm:
    GLM.from_formula('SCRDiff ~ groupIdx* time', dfAll_melt)
    trace = pm.sample(draws=4000, tune=2000)

In [None]:
pm.summary(trace, credible_interval=.95)

In [None]:
# create time index
dfAll_melt.time = pd.Categorical(dfAll_melt.time)
timeIDX = dfAll_melt.time.cat.codes.values
print(timeIDX)

In [None]:
# run a bit more complicated model (similar to the one ran in Symptoms calculation)
with pm.Model() as hierarchical_model:
   
    a = pm.Normal('a', mu=0, sigma=10)#, shape=4)
    # Intercept for each timepoint, distributed around group mean mu_a
    b = pm.Normal('b', mu=0, sigma=10, shape=4) # for each time point
    
    # set another prior to the group (i..e medication condition)
    bMed = pm.Normal('bMed', mu=0, sigma=5)
    # Model error
    eps = pm.HalfCauchy('eps', 10)

    y_hat = a + b[timeIDX]*timeIDX + bMed*dfAll_melt.groupIdx 

    # Data likelihood
    likelihood = pm.Normal('likelihood', mu=y_hat,
                           sigma=eps, observed=dfAll_melt.SCRDiff)
    hierarchical_trace = pm.sample(4000, tune=3000, random_seed=1313)

In [None]:
pm.summary(hierarchical_trace, credible_interval=.95)

In [None]:
dfAll_melt['scr_id'] = dfAll_melt.scr_id.astype('category')
sub_idx = dfAll_melt.scr_id.cat.codes.values
n_subs = len(np.unique(dfAll_melt.scr_id))
print(n_subs)

In [None]:
# run with subjects as random variable - reparametized
with pm.Model() as random_model:
   
    
    a = pm.Normal('a', mu=0, sigma=10)#, shape=4)
    # Intercept for each timepoint, distributed around group mean mu_a
    b = pm.Normal('b', mu=0, sigma=10, shape=4) # for each time point
    
    sigma_a = pm.Exponential('sigma_a', 1)
    z = pm.Normal('z',mu=0, sigma=1, shape = n_subs)
    # set another prior to the group (i..e medication condition)
    bMed = pm.Normal('bMed', mu=0, sigma=5)
    # Model error
    eps = pm.HalfCauchy('eps', 10)

    y_hat = a + z[sub_idx]*sigma_a + b[timeIDX]*timeIDX + bMed*dfAll_melt.groupIdx 

    # Data likelihood
    likelihood = pm.Normal('likelihood', mu=y_hat,
                           sigma=eps, observed=dfAll_melt.SCRDiff)
    random_trace = pm.sample(4000, tune=2000, random_seed=123)

In [None]:
pm.summary(random_trace, credible_interval=.95)

In [None]:
# compare the two models to see what fit best
pm.compare({'hierarchical': hierarchical_trace, 'glm': trace, 'random_trace': random_trace}, ic='loo')

In [None]:
data2 = data2[["scr_id", "peakTrauma1",  "peakRelax", "T_R1", "T_R2", "T_R3"]] # pick just those values
data2 = data2.rename(columns={"peakTrauma1": "peakTrauma2", "peakRelax": "peakRelax2",
                              "T_R1": "T_R1_2", 
                              "T_R2": "T_R2_2", "T_R3": "T_R3_2"})
datAll = pd.merge(datAll, data2)
datAll

In [None]:
sns.boxplot(y = 'peakTrauma2', x= 'med_cond', data=datAll)
scipy.stats.ttest_ind(datAll.peakTrauma2[datAll.groupIdx==0], datAll.peakTrauma2[datAll.groupIdx==1])

In [None]:
datAll['TR1_2vs1'] = datAll.T_R1_2 - datAll.T_R1 
datAll['Trauma_2vs1'] = datAll.peakTrauma2 - datAll.peakTrauma1
datAll['Relax_2vs1'] = datAll.peakRelax2 - datAll.peakTrauma1

In [None]:
sns.barplot(y='TR1_2vs1', x = 'med_cond', data=datAll, ci=68)
scipy.stats.ttest_ind(datAll.TR1_2vs1[datAll.med_cond=='midazolam'], datAll.TR1_2vs1[datAll.med_cond=='ketamine'])

In [None]:
sns.stripplot(y='Trauma_2vs1', x = 'med_cond', data=datAll)
scipy.stats.ttest_ind(datAll.Trauma_2vs1[datAll.med_cond=='midazolam'], datAll.Trauma_2vs1[datAll.med_cond=='ketamine'])

In [None]:
datAll

In [None]:
import pymc3 as pm
from pymc3.glm import GLM

with pm.Model() as model_glm:
    GLM.from_formula('Trauma_2vs1 ~ groupIdx', datAll)
    trace = pm.sample(draws=2000, tune=3000)

In [None]:
pm.summary(trace, credible_interval=.95).round(2)

In [None]:
# set variables
y = 'Trauma_2vs1'
x = 'med_cond'
dfPlot = datAll
ci = np.quantile(trace.groupIdx, [.025,.975])
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(3, 5),gridspec_kw={'width_ratios': [1, .2],
                                                        'wspace':.1})
g1 = sns.stripplot(y= y, x=x, data=dfPlot, size = 8, ax=ax1)
sns.boxplot(y= y, x=x, data=dfPlot,  ax=ax1,
            boxprops=dict(alpha=.3))
g2 = sns.distplot(trace['groupIdx'], ax = ax2, vertical=True)
ax2.vlines(x=0.1,ymin=ci[0], ymax=ci[1], color='black', 
           linewidth = 2, linestyle = "-")

ylow = g1.get_ylim()[0] -.5 # set lower than actual lim
yhigh = g1.get_ylim()[1]
ax2.set_ylim(ylow, yhigh) # use first graph's limits to get the relevant for this one
ax2.yaxis.tick_right()
ax2.set_xticks([])
ax2.set_ylabel("Difference between groups", fontsize=14) 
ax2.yaxis.set_label_position("right")
ax1.set_ylim(ylow, yhigh)
ax1.set_ylabel("Change in SCR before/after treatment", fontsize=12)
ax1.set_xlabel("Group", fontsize=14)
fig.savefig('SCRBeforeAfter.png', dpi=300, bbox_inches='tight')

In [None]:
g1.get_ylim()[0]

In [None]:
sns.distplot(trace['groupIdx'])
sum(trace['groupIdx']>0) / len(trace['groupIdx'])

## Add session 3 (30 days follow-up)

In [None]:
data3 = data3[["scr_id", "peakTrauma1",  "peakRelax", "T_R1", "T_R2", "T_R3"]] # pick just those values
data3 = data3.rename(columns={"peakTrauma1": "peakTrauma3", "peakRelax": "peakRelax3", "T_R1": "T_R1_3", 
                              "T_R2": "T_R2_3", "T_R3": "T_R3_3"})
datAll_3= pd.merge(datAll, data3)
datAll_3.head()

In [None]:
datAll_3['TR1_3vs1'] = datAll_3.T_R1_3 - datAll_3.T_R1 
datAll_3['Trauma_3vs1'] = datAll_3.peakTrauma3 - datAll_3.peakTrauma1
datAll_3['Relax_3vs1'] = datAll_3.peakRelax3 - datAll_3.peakRelax

In [None]:
sns.boxplot(y='TR1_3vs1', x = 'med_cond', data=datAll_3)
scipy.stats.ttest_ind(datAll_3.TR1_3vs1[datAll_3.med_cond=='midazolam'], datAll_3.TR1_3vs1[datAll_3.med_cond=='ketamine'])

In [None]:
sns.boxplot(y='Relax_3vs1', x = 'med_cond', data=datAll_3)
scipy.stats.ttest_ind(datAll_3.Relax_3vs1[datAll_3.med_cond=='midazolam'], datAll_3.Relax_3vs1[datAll_3.med_cond=='ketamine'])

In [None]:
import dabest

datAllDabest = dabest.load(data=datAll, x='med_cond', y='Trauma_2vs1', idx=('midazolam','ketamine'), ci=95)
fig = plt.figure()
datAllDabest.mean_diff.plot()#.savefig('Figure1.pdf', dpi=300)


In [None]:
datAll.to_csv('scr_deltas.csv', index=False)

In [None]:
datAll

In [None]:
dat3 = datAll_3[["scr_id", "med_cond","peakTrauma1", "peakTrauma2", "peakTrauma3"]]
dflong = pd.melt(dat3,id_vars=['scr_id', "med_cond"], 
                 value_vars=["peakTrauma1", "peakTrauma2", "peakTrauma3"])
dflong