####### code that calculates the schema learning on day 1

In [None]:
import pandas as pd
import os
import numpy as np
import ast
import fnmatch
import glob
import matplotlib.pyplot as plt
import seaborn
import random

In [None]:
def convert_line(line):
    '''convert a string to a list, then extract the last element 
    
    e.g.
        ['2', '1'] -> 1
        ['2'] -> 2
        None -> 0
        
    '''
    line_list = ast.literal_eval(line)
    if line_list is None or 'NaN': 
        return 0
    else:
        return line_list[-1]
    
    
def simplify_condition_string(condition_string):
    '''condsFile -> two-letter code 
    '''
    new_name =''
    if 'schemaS' in condition_string:
        new_name+='S'
    elif 'schemaN' in condition_string:
        new_name+='N'
    else:
        raise ValueError('Neither S nor N is in the condition name')

    if 'pathA' in condition_string:
        new_name+='A'
    elif 'pathB' in condition_string:
        new_name+='B'
    else:
        raise ValueError('Neither A nor B is in the condition name')
    return new_name

def check_answer(schema_path, pred2, pred3):
    correct = np.array([False, False])
    if schema_path == 'SA':
        if pred2 == '2': 
            correct[0] = True
        if pred3 == '1':
            correct[1] = True
    elif schema_path == 'SB':
        if pred2 == '1':
            correct[0] = True
        if pred3 == '2':
            correct[1] = True
    elif schema_path == 'NA':
        if pred2 == '1':
            correct[0] = True                    
        if pred3 == '1':        
            correct[1] = True        
    elif schema_path == 'NB':
        if pred2 == '2': 
            correct[0] = True                    
        if pred3 == '2':                
            correct[1] = True                    
    return correct

In [None]:
final_correct_Pred2 = pd.DataFrame({'A' : []})
final_correct_Pred3 = pd.DataFrame({'A' : []})

for currBlock in [1,2,3,4]:
    
    path = '/Volumes/norman/scollin/schema/data/behav/sub*'

    files = [f for f in glob.glob(path + "**/*_Block%i.csv" %currBlock, recursive=True)]

    filename_list = sorted(files)
    
    all_data = pd.DataFrame()
    
    for i in filename_list:
        data = pd.read_csv(i, usecols = ['condsFile','key_resp_2.keys','key_resp_3.keys'])
        all_data = pd.concat([all_data,data])

    BlockStarts = all_data.loc[[3]]
    BlockEnds = all_data.loc[[18]]

    BlockStarts.replace(to_replace=['None'], value=np.nan, inplace=True)
    BlockEnds.replace(to_replace=['None'], value=np.nan, inplace=True)

    BlockStarts = BlockStarts.fillna("['0']")
    BlockEnds = BlockEnds.fillna("['0']")

    namesSt,key2St,key3St =[],[],[]
    for i, row in BlockStarts.iterrows():
        # condition name 
        long_condition_string = row['condsFile']
        short_condition_string = simplify_condition_string(long_condition_string)
        namesSt.append(short_condition_string)    
        # prediction 2 
        prediction2 = row['key_resp_2.keys']

        if len(prediction2) > 1:
            prediction2 = ast.literal_eval(prediction2)
            prediction2 = prediction2[-1]
        else:
            prediction2 = ast.literal_eval(prediction2)

        key2St.append(prediction2)
        # prediction 3 
        prediction3 = row['key_resp_3.keys']

        if len(prediction3) > 1:
            prediction3 = ast.literal_eval(prediction3)
            prediction3 = prediction3[-1]
        else:
            prediction3 = ast.literal_eval(prediction3)

        key3St.append(prediction3)    

    # build df 

    dSt = {
        'SchemaPath':namesSt,
        'Prediction2':key2St,
        'Prediction3':key3St
    }

    dfSt=pd.DataFrame(data=dSt)


    # check "corrects"

    dfSt_correct = pd.DataFrame(columns=dSt.keys())
    for i, row in dfSt.iterrows():
        correct = check_answer(row['SchemaPath'], row['Prediction2'], row['Prediction3'])
        dfSt_correct.loc[i] = [row['SchemaPath'], int(correct[0]), int(correct[1])]

    final_correct_Pred2 = final_correct_Pred2.append(dfSt_correct.Prediction2)
    final_correct_Pred3 = final_correct_Pred3.append(dfSt_correct.Prediction3)
            
    dfSt_correct.to_csv(r'/Volumes/norman/scollin/schema/data/behav/results/predictionsResults_blockStarts_Day1_block%i.csv' % (currBlock))
    
    # final wedd of each block
    namesEnd,key2End,key3End =[],[],[]
    for i, row in BlockEnds.iterrows():
        # condition name 
        long_condition_string = row['condsFile']
        short_condition_string = simplify_condition_string(long_condition_string)
        namesEnd.append(short_condition_string)    
        # prediction 2 
        prediction2 = row['key_resp_2.keys']

        if len(prediction2) > 1:
            prediction2 = ast.literal_eval(prediction2)
            prediction2 = prediction2[-1]
        else:
            prediction2 = ast.literal_eval(prediction2)

        key2End.append(prediction2)
        # prediction 3 
        prediction3 = row['key_resp_3.keys']

        if len(prediction3) > 1:
            prediction3 = ast.literal_eval(prediction3)
            prediction3 = prediction3[-1]
        else:
            prediction3 = ast.literal_eval(prediction3)

        key3End.append(prediction3)
        
        # build df 

        dEnd = {
            'SchemaPath':namesEnd,
            'Prediction2':key2End,
            'Prediction3':key3End
        }

        dfEnd=pd.DataFrame(data=dEnd)


        # check "corrects"

        dfEnd_correct = pd.DataFrame(columns=dEnd.keys())
        for i, row in dfEnd.iterrows():
            correct = check_answer(row['SchemaPath'], row['Prediction2'], row['Prediction3'])
            dfEnd_correct.loc[i] = [row['SchemaPath'], int(correct[0]), int(correct[1])]
            
    final_correct_Pred2 = final_correct_Pred2.append(dfEnd_correct.Prediction2)
    final_correct_Pred3 = final_correct_Pred3.append(dfEnd_correct.Prediction3)
            
    dfEnd_correct.to_csv(r'/Volumes/norman/scollin/schema/data/behav/results/predictionsResults_blockEnds_Day1_block%i.csv' % (currBlock))

In [None]:
MeanPred2 = final_correct_Pred2.mean(axis=1)
MeanPred3 = final_correct_Pred3.mean(axis=1)
SEMPred2 = final_correct_Pred2.sem(axis=1)
SEMPred3 = final_correct_Pred3.sem(axis=1)

In [None]:
final_correct_Pred2['PredictionL2'] = MeanPred2
final_correct_Pred3['PredictionL3'] = MeanPred3

In [None]:
SEML2 = pd.DataFrame(SEMPred2)
SEML2.index={1,2,3,4,5,6,7,8}
SEML2 = SEML2.rename(columns={0: "PredictionL2"})
SEML3 = pd.DataFrame(SEMPred3)
SEML3.index={1,2,3,4,5,6,7,8}
SEML3 = SEML3.rename(columns={0: "PredictionL3"})

In [None]:
L2 = pd.DataFrame(final_correct_Pred2.PredictionL2)
L2.index={1,2,3,4,5,6,7,8}
L3 = pd.DataFrame(final_correct_Pred3.PredictionL3)
L3.index={1,2,3,4,5,6,7,8}

In [None]:
RESULTS = pd.concat([L2, L3], sort=False, axis=1)
errors = pd.concat([SEML2, SEML3], axis=1)

In [None]:
# save results of all questions of all participants separately, in one file for coin-torch predictions and
# another file for egg-painting predictions

In [None]:
final_correct_Pred2.to_csv(r'/Volumes/norman/scollin/schema/data/behav/results/SchemaPrediction_coin_or_torch.csv')


In [None]:
final_correct_Pred3.to_csv(r'/Volumes/norman/scollin/schema/data/behav/results/SchemaPrediction_egg_or_painting.csv')


In [None]:
RESULTS['Mean'] = RESULTS.mean(axis=1)
errors['Mean'] = errors.mean(axis=1)

In [None]:
fig= plt.figure(figsize=(10,10))
plt.errorbar(x=np.arange(8),y=RESULTS.Mean,color='black')
plt.box(on=None)
plt.title('predictions day1', fontsize=18)
plt.ylabel('correct (%)', fontsize=18)
plt.xticks([0,1,2,3,4,5,6,7], ["1","6","7","12","13","18","19","24"], fontsize=18)
plt.xlabel('wedding (#)', fontsize=18)
plt.ylim((0,1))
plt.fill_between(np.arange(8), RESULTS.Mean-errors.Mean, RESULTS.Mean+errors.Mean, alpha=0.5, color='black')
plt.yticks([0.2,0.4,0.6,0.8,1],["0.2","0.4","0.6","0.8","1"], fontsize=18)


plt.axvline(x=3.5, color='black')
plt.axhline(y=0, color='k')
plt.axvline(x=-1, color='k')

fname='/Volumes/norman/scollin/schema/data/behav/results/Predictions_Day1_mean.eps'
plt.savefig(fname)

In [None]:
fig= plt.figure(figsize=(10,10))
plt.errorbar(x=np.arange(8),y=RESULTS.PredictionL2,color='blue', label='coin/torch')
plt.errorbar(x=np.arange(8),y=RESULTS.PredictionL3,color='orange', label='egg/painting')
plt.box(on=None)
plt.title('predictions day1', fontsize=18)
plt.ylabel('correct (%)', fontsize=18)
plt.xticks([0,1,2,3,4,5,6,7], ["1","6","7","12","13","18","19","24"], fontsize=18)
plt.xlabel('wedding (#)', fontsize=18)
plt.ylim((0,1))
plt.fill_between(np.arange(8), RESULTS.PredictionL2-errors.PredictionL2, RESULTS.PredictionL2+errors.PredictionL2, alpha=0.5)
plt.fill_between(np.arange(8), RESULTS.PredictionL3-errors.PredictionL3, RESULTS.PredictionL3+errors.PredictionL3, alpha=0.5)
plt.legend(prop={'size': 20})
plt.axvline(x=3.5, color='black')
plt.axhline(y=0, color='k')
plt.axvline(x=-1, color='k')
plt.yticks([0.2,0.4,0.6,0.8,1],["0.2","0.4","0.6","0.8","1"], fontsize=18)


fname='/Volumes/norman/scollin/schema/data/behav/results/Predictions_Day1_L2vsL3.png'
plt.savefig(fname)