# Analysis per difficulty

## Across subjects & sessions

In [None]:
import pandas as pd
import os
import json
import numpy as np
from itertools import groupby
import matplotlib.pyplot as plt
from scipy import stats
import matplotlib as mpl
from sklearn.linear_model import LogisticRegression
import random
import re
import csv
from IPython.display import HTML, display, Image
import tabulate
import math as m
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf

mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['axes.titlesize'] = 18
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['lines.markersize'] = 10
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams['axes.linewidth'] = 3
#mpl.rcParams['xtick.major.size'] = 20
mpl.rcParams['xtick.major.width'] = 4
#mpl.rcParams['xtick.minor.size'] = 10
mpl.rcParams['xtick.minor.width'] = 2
mpl.rcParams['ytick.major.width'] = 4
mpl.rcParams['ytick.minor.width'] = 2

fday = [1,2,3,4,5,6,7,8,9,10]
fsession = [1,2]
unique_signals = [1,2,3,4]

In [None]:
current_path = os.path.abspath(os.getcwd())
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
grand_parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
main_path = os.path.abspath(os.path.join(grand_parent_path, os.pardir))

path_results = main_path+'/results/dots/'

In [None]:
import sys
sys.path.insert(1, main_path+'/src')
import my_functions as myf

In [None]:
adf = pd.read_csv(path_results+'preanalyzed.csv')  
userids = adf['userID'].unique()

key_PV = ['RT_no_0','RT_no_3','Doptout_0','Doptout_3','Soptout_0','Soptout_3']
key_SR = ['mood','food', 'real_stress','sleep']

In [None]:
mas = {}
ind = 0
for part in userids:
    ind += 1
    RT, DO, SO = [],[],[]
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
            with open(filename) as f:
                data = json.load(f)
            RT.append(data['RT_no'])
            DO.append(data['Doptout'])
            SO.append(data['Soptout'])
    mas.update({part: {'RTas': np.nanmean(RT,axis=0),'DOas': np.nanmean(DO,axis=0),'SOas': np.nanmean(SO,axis=0)}})

In [None]:
col_names = ['RT_no','Doptout','Dperf_oo','Sperf_oo','perf_no','DRT_oo','SRT_oo','DRT_OKoo','SRT_OKoo','RT_noNOK',\
             'Soptout']

df = pd.DataFrame(columns=col_names+['RTeff','DOeff','SOeff','difficulty','sessionID','userID'])

dfANOVA = pd.DataFrame(columns=['perf','type','difficulty','sessionID','userID'])

df_LR = pd.DataFrame(columns=['slope_RT','slope_DO','slope_SO','intercept_RT','intercept_DO','intercept_SO',\
                              'sessionID','userID','user_sessionID'])
ind = 0
for part in userids:
    ind += 1
    dict_,dANOVA = {},{}
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            user_sessionID = str(part)+'_'+str(sessionid)
            
            filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
            with open(filename) as f:
                data = json.load(f)
                
            diff_vals = np.array([1, 2, 3, 4])
            diff_list = [1, 2, 3, 4]
            num_diff = len(diff_list)
            
            for key in col_names:
                dict_[key] = data[key]
            dict_.update({'RTeff':np.array(data['RT_no'])-mas[part]['RTas'],\
                                   'DOeff':np.array(data['Doptout'])-mas[part]['DOas'],\
                                   'SOeff':np.array(data['Soptout'])-mas[part]['SOas'],\
                                   'difficulty':diff_list,'sessionID':[sessionid]*num_diff,'userID':[part]*num_diff})
            dANOVA = {'perf':data['perf_no']+data['Dperf_oo']+data['Sperf_oo'],\
                      'type':['NO']*num_diff+['DO']*num_diff+['SO']*num_diff,\
                      'difficulty':diff_list*3,'sessionID':[sessionid]*num_diff*3,'userID':[part]*num_diff*3}
            
            df_new = pd.DataFrame(dict_)
            dfANOVA_new = pd.DataFrame(dANOVA)
            
            RT = np.array(data['RT_no'])
            DO = np.array(data['Doptout'])
            SO = np.array(data['Soptout'])

            slope_RT,intercept_RT,r_RT,p_RT,se_RT = stats.linregress(np.log(diff_vals[~np.isnan(RT)]), RT[~np.isnan(RT)])
            slope_DO,intercept_DO,r_DO,p_DO,se_DO = stats.linregress(np.log(diff_vals[~np.isnan(DO)]), DO[~np.isnan(DO)])
            slope_SO,intercept_SO,r_SO,p_SO,se_SO = stats.linregress(np.log(diff_vals[~np.isnan(SO)]), SO[~np.isnan(SO)])

            df_LR_new = pd.DataFrame({'slope_RT':[slope_RT],'slope_DO':[slope_DO],'slope_SO':[slope_SO],
                        'intercept_RT':[intercept_RT],'intercept_DO':[intercept_DO],'intercept_SO':[intercept_SO],\
                        'session':[sessionid],'subject':[part],'user_sessionID':[user_sessionID]})
            
            if user_sessionID!='3062_1' and user_sessionID!='3062_2' and user_sessionID!='3062_4':
                df = (pd.concat([df, df_new], ignore_index=True).reindex(columns=df.columns))
                dfANOVA = (pd.concat([dfANOVA, dfANOVA_new], ignore_index=True).reindex(columns=dfANOVA.columns))
                df_LR = (pd.concat([df_LR, df_LR_new], ignore_index=True).reindex(columns=df_LR.columns))

In [None]:
df.head()

In [None]:
dfLR = df_LR[['slope_RT', 'slope_DO', 'slope_SO', 'intercept_RT', 'intercept_DO',
       'intercept_SO', 'user_sessionID']]

In [None]:
len(dfLR['user_sessionID'].unique())

In [None]:
# DO NOT RUN AGAIN

dfLR.to_csv(path_results+'linearRegr_diff.csv',index=False)

In [None]:
sessionids = df['sessionID'].unique()
userids = df['userID'].unique()

In [None]:
dfMAS = df.groupby(['difficulty']).mean().reset_index()
dfSEM = df.groupby(['difficulty']).sem().reset_index()

In [None]:
fig,ax = plt.subplots(1,3,figsize=(18,5))
plt.subplots_adjust(wspace = 0.3)  
ax[0].errorbar(unique_signals,dfMAS['perf_no'],dfSEM['perf_no'],c='g')
ax[0].errorbar(unique_signals,dfMAS['Dperf_oo'],dfSEM['Dperf_oo'],c='r')
ax[0].errorbar(unique_signals,dfMAS['Sperf_oo'],dfSEM['Sperf_oo'],c='b')
ax[0].set_ylabel('performance')
ax[0].set_xlabel('Difficulty')
ax[0].legend(("non-optout","Doptout","Soptout"),loc='upper right', shadow=True)


ax[1].errorbar(unique_signals,dfMAS['RT_no'],dfSEM['RT_no'],c='g')
ax[1].errorbar(unique_signals,dfMAS['RT_noNOK'],dfSEM['RT_noNOK'],c='m')
ax[1].errorbar(unique_signals,dfMAS['DRT_OKoo'],dfSEM['DRT_OKoo'],c='r')
ax[1].errorbar(unique_signals,dfMAS['SRT_OKoo'],dfSEM['SRT_OKoo'],c='b')
ax[1].errorbar(unique_signals,dfMAS['DRT_oo'],dfSEM['DRT_oo'],c='r',ls='--')
ax[1].errorbar(unique_signals,dfMAS['SRT_oo'],dfSEM['SRT_oo'],c='b',ls='--')
ax[1].set_ylabel('Reaction Time')
ax[1].set_xlabel('Difficulty')
ax[1].legend(("correct non-optout","incorrect non-optout","correct Doptout","correct Soptout",\
             "Doptout","Soptout"),loc='upper right', shadow=True)

ax[2].errorbar(unique_signals,dfMAS['Doptout'],dfSEM['Doptout'],c='r')
ax[2].errorbar(unique_signals,dfMAS['Soptout'],dfSEM['Soptout'],c='b')
ax[2].set_ylabel('Optout')
ax[2].set_xlabel('Difficulty')
ax[2].legend(("Doptout","Soptout"),loc='upper left', shadow=True)

plt.show()

In [None]:
# DO NOT RUN AGAIN

df.to_csv(path_results+'per_difficulty.csv',index=False)

In [None]:
# DO NOT RUN AGAIN

dfANOVA.to_csv(path_results+'forANOVA.csv',index=False)

In [None]:
Rdf = adf[['sessionID_x','userID','user_sessionID','mood','real_stress','food','sleep']]

In [None]:
df_LR = pd.merge(Rdf,df_LR,on='user_sessionID') 

In [None]:
df_LR.head()

In [None]:
df_LR_mor = df_LR[df_LR['sessionID_x']%2==1]

In [None]:
key_PV_slope = ['slope_RT','slope_DO','slope_SO']
key_PV_intercept = ['intercept_RT','intercept_DO','intercept_SO']
key_SR = ['mood','real_stress','food','sleep']

In [None]:
pair_SR_PV_slope,pair_SR_PV_slope2plot = [],[]
for pv in key_PV_slope:
    aux = []
    for sr in key_SR:
        pair_SR_PV_slope.append((sr,pv))
        aux.append((sr,pv))
    pair_SR_PV_slope2plot.append(aux)

In [None]:
pair_SR_PV_intercept,pair_SR_PV_intercept2plot = [],[]
for pv in key_PV_intercept:
    aux = []
    for sr in key_SR:
        pair_SR_PV_intercept.append((sr,pv))
        aux.append((sr,pv))
    pair_SR_PV_intercept2plot.append(aux)

In [None]:
corr_SR_PV, p_SR_PV, SIG_SR_PV, NOS_SR_PV = {},{},{},{}
for pair in pair_SR_PV_slope:
    if 'sleep' in pair:  
        LR = [myf.Linear_Regr(np.array(df_LR_mor[df_LR_mor['userID_x']==part][pair[0]]),\
                            np.array(df_LR_mor[df_LR_mor['userID_x']==part][pair[1]])) for part in userids]
    else:
        LR = [myf.Linear_Regr(np.array(df_LR[df_LR['userID_x']==part][pair[0]]),\
                            np.array(df_LR[df_LR['userID_x']==part][pair[1]])) for part in userids]
    corr_SR_PV[pair] = [LR[k].r_value for k in range(len(userids))]
    p_SR_PV[pair] = stats.ttest_1samp(corr_SR_PV[pair],0)[1]
    
    LR_p_value = [LR[k].p_value for k in range(len(userids))]
    LR_p_value = np.array(LR_p_value)
    indSIG = np.where(LR_p_value<0.05)
    indNOS = np.where(LR_p_value>=0.05)
    SIG_SR_PV[pair] = [corr_SR_PV[pair][j] for j in indSIG[0]]
    NOS_SR_PV[pair] = [corr_SR_PV[pair][j] for j in indNOS[0]]

In [None]:
round_p_PV = np.array([myf.roundP(p_SR_PV[(pair)]) for pair in pair_SR_PV_slope])
round_p_PV = np.reshape(round_p_PV,(3,4))
p_value = [['p_value']+key_SR]
ind = -1
for key in key_PV_slope:
    ind += 1
    p_value.append([key]+list(round_p_PV[ind]))

display(HTML(tabulate.tabulate(p_value, tablefmt='html')))

In [None]:
corr_SR_PV, p_SR_PV, SIG_SR_PV, NOS_SR_PV = {},{},{},{}
for pair in pair_SR_PV_intercept:
    if 'sleep' in pair:  
        LR = [myf.Linear_Regr(np.array(df_LR_mor[df_LR_mor['userID_x']==part][pair[0]]),\
                            np.array(df_LR_mor[df_LR_mor['userID_x']==part][pair[1]])) for part in userids]
    else:
        LR = [myf.Linear_Regr(np.array(df_LR[df_LR['userID_x']==part][pair[0]]),\
                            np.array(df_LR[df_LR['userID_x']==part][pair[1]])) for part in userids]
    corr_SR_PV[pair] = [LR[k].r_value for k in range(len(userids))]
    p_SR_PV[pair] = stats.ttest_1samp(corr_SR_PV[pair],0)[1]
    
    LR_p_value = [LR[k].p_value for k in range(len(userids))]
    LR_p_value = np.array(LR_p_value)
    indSIG = np.where(LR_p_value<0.05)
    indNOS = np.where(LR_p_value>=0.05)
    SIG_SR_PV[pair] = [corr_SR_PV[pair][j] for j in indSIG[0]]
    NOS_SR_PV[pair] = [corr_SR_PV[pair][j] for j in indNOS[0]]

In [None]:
round_p_PV = np.array([myf.roundP(p_SR_PV[(pair)]) for pair in pair_SR_PV_intercept])
round_p_PV = np.reshape(round_p_PV,(3,4))
p_value = [['p_value']+key_SR]
ind = -1
for key in key_PV_intercept:
    ind += 1
    p_value.append([key]+list(round_p_PV[ind]))

display(HTML(tabulate.tabulate(p_value, tablefmt='html')))

In [None]:
dfANOVA_SO.head()

In [None]:
dfANOVA_DO = dfANOVA[dfANOVA['type']!='SO']
dfANOVA_SO = dfANOVA[dfANOVA['type']!='DO']
dfANOVA_NO = dfANOVA[dfANOVA['type']!='NO']

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

#perform two-way ANOVA
model = ols('perf ~ C(type) + C(difficulty)', data=dfANOVA_DO).fit()
sm.stats.anova_lm(model, typ=2)

In [None]:
#perform two-way ANOVA
model = ols('perf ~ C(type) + C(difficulty)', data=dfANOVA_SO).fit()
sm.stats.anova_lm(model, typ=2)

In [None]:
#perform two-way ANOVA
model = ols('perf ~ C(type) + C(difficulty)', data=dfANOVA_NO).fit()
sm.stats.anova_lm(model, typ=2)

In [None]:
import statsmodels.stats.multicomp as mc

interaction_groups = "type" + dfANOVA.type.astype(str) + " & " + "difficulty" + dfANOVA.difficulty.astype(str)

comp = mc.MultiComparison(dfANOVA["perf"], interaction_groups)
post_hoc_res = comp.tukeyhsd()
post_hoc_res.summary()