# Analysis per difficulty

## Across subjects & sessions

In [None]:
import pandas as pd
import os
import json
import numpy as np
from itertools import groupby
import matplotlib.pyplot as plt
from scipy import stats
import matplotlib as mpl
from sklearn.linear_model import LogisticRegression
import random
import re
import csv
from IPython.display import HTML, display, Image
import tabulate
import math as m
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
current_path = os.path.abspath(os.getcwd())
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
grand_parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
main_path = os.path.abspath(os.path.join(grand_parent_path, os.pardir))

path_results = main_path+'/results/gabor/'

In [None]:
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, main_path+'/scr')
import my_functions as myf

In [None]:
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['axes.titlesize'] = 18
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['lines.markersize'] = 10
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams['axes.linewidth'] = 3
#mpl.rcParams['xtick.major.size'] = 20
mpl.rcParams['xtick.major.width'] = 4
#mpl.rcParams['xtick.minor.size'] = 10
mpl.rcParams['xtick.minor.width'] = 2
mpl.rcParams['ytick.major.width'] = 4
mpl.rcParams['ytick.minor.width'] = 2

fday = [1,2,3,4,5,6,7,8,9,10]
fsession = [1,2]
unique_signals = [1,2,3]
excluded_miss = ['1011_11', '1011_18', '1014_12']
excluded_time = ['1008_2','1009_9']

excluded = excluded_miss+excluded_time

adf = pd.read_csv(path_results+'preanalyzed.csv')  
userids = adf['userID'].unique()

In [None]:
columnas=adf.columns

In [None]:
mas = {}
ind = 0
for part in userids:
    ind += 1
    RT, DO, SO = [],[],[]
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            if str(part)+'_'+str(sessionid) not in excluded:
                filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
                with open(filename) as f:
                    data = json.load(f)
                RT.append(data['RT_no'])
                DO.append(data['Doptout'])
                SO.append(data['Soptout'])
    mas.update({part: {'RTas': np.nanmean(RT,axis=0),'DOas': np.nanmean(DO,axis=0),'SOas': np.nanmean(SO,axis=0)}})

In [None]:
col_names = ['RT_no','Doptout','Dperf_oo','Sperf_oo','perf_no','DRT_oo','SRT_oo','DRT_OKoo','SRT_OKoo','RT_noNOK',\
             'Soptout']

df = pd.DataFrame(columns=col_names+['RTeff','DOeff','SOeff','difficulty','sessionID_x','userID'])

dfANOVA = pd.DataFrame(columns=['perf','type','difficulty','sessionID_x','userID'])

df_LR = pd.DataFrame(columns=['slope_RT','slope_DO','slope_SO','intercept_RT','intercept_DO','intercept_SO',\
                              'sessionID_x','userID','user_sessionID_x'])
ind = 0
for part in userids:
    ind += 1
    dict_,dANOVA = {},{}
    for Day in fday:
        for Ses in fsession:
            sessionid = 2*Day-2+Ses
            user_sessionID = str(part)+'_'+str(sessionid)
            if user_sessionID not in excluded:
                filename = path_results+'day'+str(Day)+'/session'+str(Ses)+'/diff_Sub'+str(part)+'_Day'+str(Day)+'_Sess'+str(Ses)+'.json'   
                with open(filename) as f:
                    data = json.load(f)

                diff_vals = np.array([1, 2, 3])
                diff_list = [1, 2, 3]
                num_diff = len(diff_list)

                for key in col_names:
                    dict_[key] = data[key]
                dict_.update({'RTeff':np.array(data['RT_no'])-mas[part]['RTas'],\
                                       'DOeff':np.array(data['Doptout'])-mas[part]['DOas'],\
                                       'SOeff':np.array(data['Soptout'])-mas[part]['SOas'],\
                                       'difficulty':diff_list,'sessionID_x':[sessionid]*num_diff,\
                                       'userID':[part]*num_diff})
                dANOVA = {'perf':data['perf_no']+data['Dperf_oo']+data['Sperf_oo'],\
                          'type':['NO']*num_diff+['DO']*num_diff+['SO']*num_diff,\
                          'difficulty':diff_list*3,'sessionID_x':[sessionid]*num_diff*3,'userID':[part]*num_diff*3}

                df_new = pd.DataFrame(dict_)
                dfANOVA_new = pd.DataFrame(dANOVA)

                RT = np.array(data['RT_no'])
                DO = np.array(data['Doptout'])
                SO = np.array(data['Soptout'])

                slope_RT,intercept_RT,r_RT,p_RT,se_RT = stats.linregress(np.log(diff_vals[~np.isnan(RT)]), RT[~np.isnan(RT)])
                slope_DO,intercept_DO,r_DO,p_DO,se_DO = stats.linregress(np.log(diff_vals[~np.isnan(DO)]), DO[~np.isnan(DO)])
                slope_SO,intercept_SO,r_SO,p_SO,se_SO = stats.linregress(np.log(diff_vals[~np.isnan(SO)]), SO[~np.isnan(SO)])

                df_LR_new = pd.DataFrame({'slope_RT':[slope_RT],'slope_DO':[slope_DO],'slope_SO':[slope_SO],
                            'intercept_RT':[intercept_RT],'intercept_DO':[intercept_DO],'intercept_SO':[intercept_SO],\
                            'session':[sessionid],'subject':[part],'user_sessionID_x':[user_sessionID]})
            
            
                df = (pd.concat([df, df_new], ignore_index=True).reindex(columns=df.columns))
                dfANOVA = (pd.concat([dfANOVA, dfANOVA_new], ignore_index=True).reindex(columns=dfANOVA.columns))
                df_LR = (pd.concat([df_LR, df_LR_new], ignore_index=True).reindex(columns=df_LR.columns))

In [None]:
dfLR = df_LR[['slope_RT', 'slope_DO', 'slope_SO', 'intercept_RT', 'intercept_DO',
       'intercept_SO', 'user_sessionID_x']]

In [None]:
dfLR.rename(columns={"user_sessionID_x": "user_sessionID"},inplace=True)

In [None]:
dfLR.columns

In [None]:
# DO NOT RUN AGAIN

dfLR.to_csv(path_results+'linearRegr_diff.csv',index=False)

In [None]:
# DO NOT RUN AGAIN

df.to_csv(path_results+'per_difficulty.csv',index=False)

In [None]:
# DO NOT RUN AGAIN

dfANOVA.to_csv(path_results+'forANOVA.csv',index=False)

In [None]:
df = pd.DataFrame()
for Day in fday:
    for Ses in fsession:
        sessionid = 2*Day-2+Ses
        path = path_results+'day'+str(Day)+'/session'+str(Ses)+'/'
        
        # sort files
        diff_files = [f for f in os.listdir(path) if f.startswith('diff')]
        subj_diff = [int(re.search('%s(.*)%s' % ('diff_Sub', '_Day'), f).group(1)) for f in diff_files]
        sorted_subj_diff = sorted(subj_diff)
        index_subj_diff = [subj_diff.index(elem) for elem in sorted_subj_diff]
        sorted_diff_files = [diff_files[i] for i in index_subj_diff]
        ind = -1
        
        for part in sorted_diff_files:
            dict_ = {}
            ind += 1
            partid = sorted_subj_diff[ind]
            part_sessid = str(partid)+'_'+str(sessionid)

            f = sorted_diff_files[ind]
            filename=path+f
            with open(filename) as f:
                data = json.load(f)
            data_names = []
            for k, v in data.items():
                globals()[k]=v
                if part=='diff_Sub1014_Day10_Sess2.json' and ('se' not in k) and ('sd' not in k):
                    data_names.append(k)
                for i in range(len(v)):
                    dict_.update({k+'_'+str(i):v[i],'user_sessionID': part_sessid,'userID':partid,'sessionID':sessionid})
            df = df.append(dict_,ignore_index=True)

In [None]:
df.head()

In [None]:
sessionids = df['sessionID'].unique()
userids = df['userID'].unique()

In [None]:
excluded = ['1011_11', '1011_18', '1014_12']

In [None]:
for elem in excluded:
    df = df[df['user_sessionID']!=elem]

In [None]:
df_column_names = list(df.columns)
# delete from column_names list those columns we keep their value
column_names = [x for x in df_column_names if x not in ['sessionID','userID','user_sessionID']]
# create dictionary with nan values 
nan_dict = {}
for i in column_names:
    nan_dict[i] = np.nan
# dictionaries for each excluded participant_session
replace = [{'sessionID':int(elem.split('_')[1]),'userID':int(elem.split('_')[0]),\
             'user_sessionID':elem} for elem in excluded]

for i in range(len(replace)):
    replace[i].update(nan_dict)
# append nan dict to df  
for elem in replace:
    df = df.append(elem, ignore_index=True)

In [None]:
# DO NOT RUN AGAIN

df.to_csv(path_results+'per_difficulty.csv',index=False)

In [None]:
ap, se = {},{}
for (var, op) in [(ap, np.nanmean), (se, myf.sem)]:
    for key in data_names:
        var[key] = [op(np.array(df[key+'_'+str(diff-1)])) for diff in unique_signals]

In [None]:
## DO NOT RUN AGAIN

# write the result in file
filename_ap=path_results+'ap_diff.json'
filename_se=path_results+'se_diff.json'
# Serializing json  
json_object_ap = json.dumps(ap) 
json_object_se = json.dumps(se)

# Writing to sample.json 
with open(filename_ap, "w") as outfile: 
    outfile.write(json_object_ap) 
with open(filename_se, "w") as outfile: 
    outfile.write(json_object_se) 

In [None]:
print(data_names)

In [None]:
fig,ax = plt.subplots(1,3,figsize=(18,5))
plt.subplots_adjust(wspace = 0.3)  
ax[0].errorbar(unique_signals,ap['perf_no'],se['perf_no'],c='g')
ax[0].errorbar(unique_signals,ap['Dperf_oo'],se['Dperf_oo'],c='r')
ax[0].errorbar(unique_signals,ap['Sperf_oo'],se['Sperf_oo'],c='b')
ax[0].set_ylabel('performance')
ax[0].set_xlabel('Difficulty')
ax[0].legend(("non-optout","Doptout","Soptout"),loc='upper right', shadow=True)


ax[1].errorbar(unique_signals,ap['RT_no'],se['RT_no'],c='g')
ax[1].errorbar(unique_signals,ap['RT_noNOK'],se['RT_noNOK'],c='m')
ax[1].errorbar(unique_signals,ap['DRT_OKoo'],se['DRT_OKoo'],c='r')
ax[1].errorbar(unique_signals,ap['SRT_OKoo'],se['SRT_OKoo'],c='b')
ax[1].errorbar(unique_signals,ap['DRT_oo'],se['DRT_oo'],c='r',ls='--')
ax[1].errorbar(unique_signals,ap['SRT_oo'],se['SRT_oo'],c='b',ls='--')
ax[1].set_ylabel('Reaction Time')
ax[1].set_xlabel('Difficulty')
ax[1].legend(("correct non-optout","incorrect non-optout","correct Doptout","correct Soptout",\
             "Doptout","Soptout"),loc='upper right', shadow=True)
ax[1].set_ylim(0.9,1.6)

ax[2].errorbar(unique_signals,ap['Doptout'],se['Doptout'],c='r')
ax[2].errorbar(unique_signals,ap['Soptout'],se['Soptout'],c='b')
ax[2].set_ylabel('Optout')
ax[2].set_xlabel('Difficulty')
ax[2].legend(("Doptout","Soptout"),loc='upper left', shadow=True)

plt.show()

**Figure 1**: Mean and standard error across subjects and sessions of psychometric variables vs. difficulty. Left panel: perfomance. Middle panel: normalized eaction time. Right panel: Optout election. Green: non-optout trials. Red: DO optout trials. Blue: SO optout trials. Correct trials for reaction time.

In [None]:
# write the result in file
filename_ap=path_results+'mean_per_difficulty.json'
filename_se=path_results+'se_per_difficulty.json'
# Serializing json  
json_object_ap = json.dumps(ap) 
json_object_se = json.dumps(se)

# Writing to sample.json 
with open(filename_ap, "w") as outfile: 
    outfile.write(json_object_ap) 
with open(filename_se, "w") as outfile: 
    outfile.write(json_object_se) 

## Across sessions

### Performance

In [None]:
perf_names = [elem for elem in data_names if 'perf' in elem]

In [None]:
fig, ax = plt.subplots(6,4,figsize=(18,25))
plt.subplots_adjust(wspace = 0.4)
plt.subplots_adjust(hspace = 0.4)
ind = -1
for part in userids:
    subset = df[df['userID']==part]
    ap, se = {},{}
    for (var, op) in [(ap, np.nanmean), (se, myf.sem)]:
        for key in perf_names:
            var[key] = [op(np.array(subset[key+'_'+str(diff-1)])) for diff in unique_signals]
    ind += 1
    ind1 = ind%6
    ind2 = int(round(ind/6,1))
    ax[ind1,ind2].errorbar(unique_signals,ap['perf_no'],se['perf_no'],c='g')
    ax[ind1,ind2].errorbar(unique_signals,ap['Dperf_oo'],se['Dperf_oo'],c='r')
    ax[ind1,ind2].errorbar(unique_signals,ap['Sperf_oo'],se['Sperf_oo'],c='b')
    ax[ind1,ind2].set_title('participant '+str(int(part)))
    ax[ind1,ind2].set_ylim(25,103)
    ax[ind1,0].set_ylabel('Performance')
    ax[ind1,ind2].set_xticks(np.arange(1,4))
    ax[5,ind2].set_xlabel('Difficulty')
ax[5,3].axis('off')

plt.show()

**Figure 2**: Mean and standard deviation across sessions of performance vs. difficulty for every subject. Green: non-optout trials. Red: DO optout trials. Blue: SO optout trials.

### Optout

In [None]:
oo_names = [elem for elem in data_names if 'optout' in elem]

In [None]:
fig, ax = plt.subplots(6,4,figsize=(18,25))
plt.subplots_adjust(wspace = 0.4)
plt.subplots_adjust(hspace = 0.4)
ind = -1
for part in userids:
    subset = df[df['userID']==part]
    ap, se = {},{}
    for (var, op) in [(ap, np.nanmean), (se, myf.sem)]:
        for key in oo_names:
            var[key] = [op(np.array(subset[key+'_'+str(diff-1)])) for diff in unique_signals]
    ind += 1
    ind1 = ind%6
    ind2 = int(round(ind/6,1))
    ax[ind1,ind2].errorbar(unique_signals,ap['Doptout'],se['Doptout'],c='r')
    ax[ind1,ind2].errorbar(unique_signals,ap['Soptout'],se['Soptout'],c='b')
    ax[ind1,ind2].set_title('participant '+str(int(part)))
    ax[ind1,ind2].set_ylim(25,103)
    ax[ind1,0].set_ylabel('Optout')
    ax[ind1,ind2].set_xticks(np.arange(1,4))
    ax[5,ind2].set_xlabel('Difficulty')
    ax[ind1,ind2].set_ylim(0,103)
ax[5,3].axis('off')

plt.show()

**Figure 3**: Mean and standard deviation across sessions of optout election vs. difficulty for every subject. Red: DO optout trials. Blue: SO optout trials.

### Reaction Time

In [None]:
RT_names = [elem for elem in data_names if 'RT' in elem]

In [None]:
fig, ax = plt.subplots(6,4,figsize=(18,25))
plt.subplots_adjust(wspace = 0.4)
plt.subplots_adjust(hspace = 0.4)
ind = -1
for part in userids:
    subset = df[df['userID']==part]
    ap, se = {},{}
    for (var, op) in [(ap, np.nanmean), (se, myf.sem)]:
        for key in RT_names:
            var[key] = [op(np.array(subset[key+'_'+str(diff-1)])) for diff in unique_signals]
    ind += 1
    ind1 = ind%6
    ind2 = int(round(ind/6,1))
    ax[ind1,ind2].errorbar(unique_signals,ap['RT_no'],se['RT_no'],c='g')
    ax[ind1,ind2].errorbar(unique_signals,ap['RT_noNOK'],se['RT_noNOK'],c='m')
    ax[ind1,ind2].errorbar(unique_signals,ap['DRT_OKoo'],se['DRT_OKoo'],c='r')
    ax[ind1,ind2].errorbar(unique_signals,ap['SRT_OKoo'],se['SRT_OKoo'],c='b')
    ax[ind1,ind2].set_title('participant '+str(int(part)))
    ax[ind1,0].set_ylabel('Reaction Time')
    ax[ind1,ind2].set_xticks(np.arange(1,4))
    ax[5,ind2].set_xlabel('Difficulty')
ax[5,3].axis('off')

plt.show()

**Figure 4**: Mean and standard deviation across sessions of normalized reaction time vs. difficulty for every subject. Green: non-optout correct trials. Magenta: non-optout incorrect trials. Red: DO optout correct trials. Blue: SO optout correct trials.