In [None]:
import pandas as pd
import os
import json
import numpy as np
from itertools import groupby
import matplotlib.pyplot as plt
from scipy import stats,signal
import matplotlib as mpl
from sklearn.linear_model import LogisticRegression
import random
import re
import csv
from IPython.display import HTML, display, Image
import tabulate
import math as m
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import itertools
from statsmodels.stats.anova import AnovaRM

In [None]:
current_path = os.path.abspath(os.getcwd())
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
grand_parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
main_path = os.path.abspath(os.path.join(grand_parent_path, os.pardir))

path_results = main_path+'/results/gabor/'

In [None]:
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, main_path+'/scr')
import my_functions as myf

In [None]:
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['axes.titlesize'] = 18
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['lines.markersize'] = 10
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams['axes.linewidth'] = 1
#mpl.rcParams['xtick.major.size'] = 20
mpl.rcParams['xtick.major.width'] = 1
#mpl.rcParams['xtick.minor.size'] = 10
mpl.rcParams['xtick.minor.width'] = 1
mpl.rcParams['ytick.major.width'] = 1
mpl.rcParams['ytick.minor.width'] = 1
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False

fday = [1,2,3,4,5,6,7,8,9,10]
fsession = [1,2]
unique_signals = [1,2,3]

adf = pd.read_csv(path_results+'preanalyzed.csv')  
df_diff = pd.read_csv(path_results+'per_difficulty.csv')  

userids = adf['userID_x'].unique()
userids = sorted(userids)
nsub = len(userids)
sessionids = adf['sessionID_x'].unique()
sessionids = sorted(sessionids)

key_PV = ['RT_no_0','RT_no_3','Doptout_0','Doptout_3','Soptout_0','Soptout_3']
key_SR = ['mood','food', 'real_stress','sleep']

In [None]:
mas = {}
ind = 0
for part in userids:
    ind += 1
    RT, DO = [],[]
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            user_sessionID = str(part)+'_'+str(sessionid)
            if user_sessionID!='1011_11' and user_sessionID!='1011_18' and user_sessionID!='1014_12':
                filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
                with open(filename) as f:
                    data = json.load(f)
                RT.append(data['RT_no'])
                DO.append(data['Doptout'])
    mas.update({part: {'RTas': np.nanmean(RT,axis=0),'DOas': np.nanmean(DO,axis=0)}})

In [None]:
df = pd.DataFrame(columns=['RT','RTeff','DO','DOeff','difficulty','session','subject'])
ind = 0
for part in userids:
    ind += 1
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            user_sessionID = str(part)+'_'+str(sessionid)
            if user_sessionID!='1011_11' and user_sessionID!='1011_18' and user_sessionID!='1014_12':
                filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
                with open(filename) as f:
                    data = json.load(f)
                df_new = pd.DataFrame({'RT':data['RT_no'],'RTeff':np.array(data['RT_no'])-mas[part]['RTas'],\
                                       'DO':data['Doptout'],'DOeff':np.array(data['Doptout'])-mas[part]['DOas'],\
                                       'difficulty':[1,2,3],'session':[sessionid]*3,'subject':[ind]*3})

                df = (pd.concat([df, df_new], ignore_index=True).reindex(columns=df.columns))

In [None]:
df.head()

In [None]:
userids[17]

In [None]:
df[df['RT']>2]

In [None]:
df.dropna()

In [None]:
dfMAS = df.groupby(['subject','difficulty']).mean().reset_index()
dfSTD = df.groupby(['subject','difficulty']).std().reset_index()

In [None]:
dfMAS.head()

In [None]:
r_, p_ = [],[]
for part in np.arange(1,24):
    subset = df[df['subject']==part]
    RTeff = subset['RTeff']
    DOeff = subset['DOeff']
    mask = ~np.isnan(RTeff) & ~np.isnan(DOeff)
    slope, intercept, r, p, se = stats.linregress(RTeff[mask], DOeff[mask])
    r_.append(r)
    p_.append(p)

In [None]:
sig_ = np.where(np.array(p_)<0.05)
nos_ = np.where(np.array(p_)>=0.05)
r_sig = [r_[elem] for elem in sig_[0]]
r_nos = [r_[elem] for elem in nos_[0]]
p_value = stats.ttest_1samp(r_,0)[1]

In [None]:
plt.hist([r_sig,r_nos],bins=np.arange(-1,1,0.1),alpha=0.7,histtype='bar', stacked=True, \
                        color=[[0.8,0.2,0.6],[1,0.9,0.95]], edgecolor=[0.8,0.2,0.6], linewidth=2) 
plt.text(-0.95,2.5,'mean r: '+str(myf.roundP(np.mean(r_))), ha='left', wrap=True,fontsize=18)
plt.text(-0.95,1.5,'ttest p: '+str(myf.roundP(p_value)), ha='left', wrap=True,fontsize=18)
plt.xlim(-1,1)
plt.ylabel('counts')
plt.xlabel('Pearson corr. coeff.')
plt.savefig('RT&DO_corr_per_diff.png')
plt.show()

In [None]:
fig = plt.figure(figsize=(18,10))
for part in np.arange(1,10):
    subMAS = dfMAS[dfMAS['subject']==part]
    subSTD = dfSTD[dfSTD['subject']==part]
    #fig = plt.figure()
    plt.errorbar( subMAS['DO'],subMAS['RT'],xerr = subSTD['DO']/np.sqrt(20),yerr=subSTD['RT']/np.sqrt(20))
    plt.ylabel('RT')
    plt.xlabel('DO')
plt.savefig('RTvsDO.png')

In [None]:
for part in np.arange(1,28):
    subset = df[df['subject']==part]
    print(part)
    print(AnovaRM(data=subset, depvar='RT', subject='session', within=['difficulty']).fit())

In [None]:
for part in np.arange(1,28):
    subset = df[df['subject']==part]
    print(part)
    print(AnovaRM(data=subset, depvar='DO', subject='session', within=['difficulty']).fit())

In [None]:
fig = plt.figure(figsize=(18,10))
for part in np.arange(1,10):
    subMAS = dfMAS[dfMAS['subject']==part]
    subSTD = dfSTD[dfSTD['subject']==part]
    #fig = plt.figure()
    plt.errorbar( subMAS['DOeff'],subMAS['RT'],xerr = subSTD['DOeff']/np.sqrt(20),yerr=subSTD['RT']/np.sqrt(20))
    plt.ylabel('RT')
    plt.xlabel('DOeff')
plt.savefig('RTvsDOeff.png')

In [None]:
r_, p_ = [],[]
for part in np.arange(1,24):
    subset = dfMAS[dfMAS['subject']==part]
    rt = subset['RT']
    DOeff = subset['DOeff']
    mask = ~np.isnan(rt) & ~np.isnan(DOeff)
    slope, intercept, r, p, se = stats.linregress(rt[mask], DOeff[mask])
    r_.append(r)
    p_.append(p)

In [None]:
sig_ = np.where(np.array(p_)<0.05)
nos_ = np.where(np.array(p_)>=0.05)
r_sig = [r_[elem] for elem in sig_[0]]
r_nos = [r_[elem] for elem in nos_[0]]

In [None]:
plt.hist([r_sig,r_nos],bins=np.arange(0.7,1,0.01),alpha=0.7,histtype='bar', stacked=True, \
                        color=['gray',[0.95,0.95,0.95]], edgecolor='gray', linewidth=2) 
plt.xlim(0.7,1)
plt.ylabel('counts')
plt.xlabel('Pearson corr. coeff.')
plt.savefig('RT&DO_corr_per_diff.png')
plt.show()

In [None]:
df1 = df[df['difficulty']==1]

In [None]:
df1.dropna()

In [None]:
mm_RT_DO = smf.mixedlm("RT ~ DO", df1, groups=df1["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)

In [None]:
vc = {'session': '0 + session'}
mm_RT_DO = smf.mixedlm("RT ~ DO:difficulty", df,vc_formula=vc, re_formula='1', groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)

In [None]:
vc = {'session': '0 + session','difficulty': '0 + difficulty'}
mm_RT_DO = smf.mixedlm("RT ~ DO ", df,vc_formula=vc, re_formula='1', groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)

In [None]:
vc = {'session': '0 + session'}
mm_RT_DO = smf.mixedlm("RT ~ DO:difficulty + difficulty", df,vc_formula=vc, re_formula='1', groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)

In [None]:
vc = {'session': '0 + session'}
mm_RT_DO = smf.mixedlm("RT ~ DO", df,vc_formula=vc, re_formula='1', groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)
print(mdf_RT_DO.aic)

In [None]:
mm_RT_DO = smf.mixedlm("RT ~ DO:difficulty", df, groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.pvalues)

In [None]:
vc = {'session': '0 + C(session)'}
mm_RT_DO = smf.mixedlm("RT ~ DO + difficulty", df,vc_formula=vc, re_formula='1', groups=df["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.pvalues)

In [None]:
Mdf = adf[['OKubj_RT_no','Dsubj_optout_oo','sessionID_x','userID']]

In [None]:
ids = list(Mdf['userID'].unique())

In [None]:
dict_ids = {}
for k in ids:
    dict_ids[k] = ids.index(k)+1

In [None]:
Mdf['subject'] =  Mdf['userID'].map(dict_ids)

In [None]:
Mdf.dropna()

In [None]:
mm_RT_DO = smf.mixedlm("OKubj_RT_no ~ Dsubj_optout_oo", Mdf, groups=Mdf["subject"])
mdf_RT_DO = mm_RT_DO.fit()
print(mdf_RT_DO.summary())
print(mdf_RT_DO.params)
print(mdf_RT_DO.pvalues)