# Analysis of Behavioral Results

This notebook is copied from the repository provided by the original authors, with some edits made. 

In [2]:
import sys, os, glob, scipy, sqlite3, json, matplotlib#, pymer4
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as ss
import matplotlib.ticker as mtick
import json
import glob
# import FigureTools

In [3]:
# Style
import matplotlib.style as style
#style.use('seaborn-poster') #sets the size of the charts
#style.use('seaborn-white')
sns.set_palette('tab10')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.family'] = "Helvetica"

# Load data

In [5]:
# Filepaths
class_dir = os.path.abspath('../../')
print(class_dir)
proj_dir = os.path.join(class_dir,'vanbaar2022')
print(proj_dir)
data_dir = os.path.join(proj_dir,'data/pilotB')
print(data_dir)

# sys.path.append('/'.join(os.path.realpath('..').split('/')[:4]) + '/Python')
# import FigureTools

/Users/Nora/Documents/Github/courses/psych251
/Users/Nora/Documents/Github/courses/psych251/vanbaar2022
/Users/Nora/Documents/Github/courses/psych251/vanbaar2022/data/pilotB


In [6]:
# Get a list of all CSV files in the folder
all_files = glob.glob(os.path.join(data_dir, "*.csv"))
print(f'The csv files are {all_files}')

# Get the number of CSV files in the folder
num_participants = len(all_files)
print(f'There are {num_participants} participants')

# Read each CSV into a DataFrame and store them in a list
list_of_dfs = [pd.read_csv(f) for f in all_files]

# Concatenate all DataFrames in the list into a single DataFrame
df = pd.concat(list_of_dfs, ignore_index=True)

# Examine df
print(f'The columns are {df.columns}')
df.head(3)

# Check how many participants
print('%i participants found'%len(df['subjectID'].unique()))

The csv files are ['/Users/Nora/Documents/Github/courses/psych251/vanbaar2022/data/pilotB/ys5qocn3rv_trials.csv']
There are 1 participants
The columns are Index(['view_history', 'rt', 'trial_type', 'trial_index', 'plugin_version',
       'time_elapsed', 'subjectID', 'prolificID', 'studyID', 'sessionID',
       'overallBonusPoints', 'task', 'response', 'question_order', 'success',
       'Matrix', 'S', 'T', 'R', 'P', 'GameType', 'choice', 'GivenAns',
       'Player', 'PlayerType', 'CorrAns', 'confidence', 'ScoreNum',
       'time_on_trial', 'stimulus'],
      dtype='object')
1 participants found


# Get Survey Data

In [None]:
# Filter for rows which hold the responses to the survey data
surveyDat = df[df['task'].isin(['demographics', 'technical'])]

In [None]:
# Remove unecessary columns
cols = ['subjectID', 'studyID', 'sessionID', 'task', 'response']
surveyDat = surveyDat[cols]

In [None]:
# Examine df
surveyDat.head(5)

# Get Task Data

In [None]:
# Filter for rows which hold the responses to the social prediction game
taskDat = df[df['task'] == 'socialPredictionGame']

In [None]:
# Remove unecessary columns
cols = ['rt', 'time_elapsed', 'subjectID', 'studyID', 'sessionID', 'task', 'Matrix', 'S', 'T', 'R', 'P', 'GameType', 'choice', 'GivenAns', 'Player', 'PlayerType', 'CorrAns', 'confidence', 'ScoreNum', 'stimulus']
taskDat = taskDat[cols]

In [None]:
# Examine df
taskDat.head(3)

In [None]:
# Rename columns to correspond with those used in paper
taskDat.rename(columns = {
    'subjectID': 'subID',
    'PlayerType': 'Type_Total',
    'confidence': 'Confidence',
    'ScoreNum': 'Score'
}, inplace=True)



# ['Type_Total', 'Type', 'Variant', 'Confidence', 'Score']

In [None]:
# Add 'Type' and 'Variant' columns from 'Type_Total'
taskDat[['Type', 'Variant']] = taskDat['Type_Total'].str.split('_', expand=True)

In [None]:
taskDat

In [None]:
# firstType = gameDat.loc[(gameDat['Trial']==0) & (gameDat['Block']==0), ['subID','Type_Total']].reset_index(drop=True)
# firstType.columns = ['subID','FirstType']
# thirdType = gameDat.loc[(gameDat['Trial']==0) & (gameDat['Block']==2), ['subID','Type_Total']].reset_index(drop=True)
# thirdType.columns = ['subID','ThirdType']
# gameDat = gameDat.merge(firstType,on='subID').merge(thirdType,on='subID')
# gameDat.head()

In [None]:
# gtOrder = ['HG','SG','SH','PD']
# roundOrder = range(4)
# ptOrder = ['opt_nat','pess_nat','opt_inv','pess_inv']

##### Best score: subject 2133. What did they discover?

In [None]:
# gameDat.loc[gameDat['subID']==2133,['Type_Total','SelfReport']].drop_duplicates()

## Overall performance by player type

In [None]:
sns.set_context('poster')
# change to analysis code -- blockDat changed bc code wasn't working
blockDat = (taskDat.groupby(['subID', 'Variant'], as_index=False)[['Confidence', 'Score']].mean())
# run this line instead (no grouping by subID if running code for only one participant)
# blockDat = (taskDat.groupby('Variant', as_index=False)[['Confidence', 'Score']].mean())
fig, ax = plt.subplots(1,1,figsize=[6,5])
sns.barplot(data=blockDat,x='Variant',y='Score', ax=ax, errwidth = 3, capsize=.1,
            order=['nat','inv'],alpha=0)
sns.swarmplot(data=blockDat,x='Variant',y='Score', ax=ax,
            order=['nat','inv'], alpha=.3, color = 'k')
ax.plot([-5,5],[.5,.5], 'k--', lw=2)
ax.set(ylim = [0,1.1], xlim = [-.5,1.5], xlabel = None, yticks = [0,.25,.5,.75,1],
       title = 'Performance by strategy type',
       xticklabels = ['Human\nStrategies', 'Artificial\nStrategies'], ylabel = 'Accuracy     ');
dat1 = blockDat.loc[blockDat['Variant']=='nat','Score'].values
dat2 = blockDat.loc[blockDat['Variant']=='inv','Score'].values
stats = scipy.stats.ttest_rel(dat2,dat1)
# FigureTools.add_sig_markers(ax, relationships=[[0,1,stats[1]]])
sns.despine(top=True,right=True)
ax.spines['left'].set_bounds(0,1)
ax.set_ylim([0,1.4])
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
# plt.savefig(baseDir+'/Figures/plot1b.pdf',transparent=True, bbox_inches='tight');

In [None]:
sns.set_context('poster')
# change to analysis code -- overallDat changed bc code wasn't working
overallDat = (taskDat.groupby(['subID'], as_index=False)[['Confidence', 'Score']].mean())
fig, ax = plt.subplots(1,1,figsize=[6,5])
sns.barplot(data=blockDat,y='Score', ax=ax, errwidth = 3, capsize=.1,
            alpha=0)
sns.swarmplot(data=blockDat,y='Score', ax=ax,
            alpha=.3, color = 'k')
ax.plot([-5,5],[.5,.5], 'k--', lw=2)
ax.set(ylim = [0,1.1], xlabel = None, yticks = [0,.25,.5,.75,1],
       title = 'Overall task performance',
       ylabel = 'Accuracy        ');
stats = scipy.stats.ttest_1samp(overallDat['Score'].values, 0.5)
# FigureTools.add_sig_markers(ax, relationships=[[0,0,stats[1]]])
sns.despine(top=True,right=True)
ax.spines['left'].set_bounds(0,1)
ax.set_ylim([0,1.4])
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
# plt.savefig(baseDir+'/Figures/plot1d.pdf',transparent=True, bbox_inches='tight');

In [None]:
def ttest_1samp(dat, popmean = .5, verbose = True):
    if verbose:
        print('SD: ',np.std(dat))
        print('Stats:')
    stats = scipy.stats.ttest_1samp(dat, popmean = popmean)
    if verbose:
        print(stats)
        print('Cohen d:')
    coh_d = (np.mean(dat) - popmean)/np.std(dat)
    if verbose:
        print(coh_d)
    return(stats, coh_d)

In [None]:
def ttest_2samp(dat1, dat2):
    print('SD dat 1: ',np.std(dat1))
    print('SD dat 2: ',np.std(dat2))
    print('Stats:')
    stats = scipy.stats.ttest_ind(dat1, dat2)
    print(stats)
    print('Cohen d:')
    nx = len(dat1)
    ny = len(dat2)
    dof = nx + ny - 2
    coh_d = ((np.mean(dat1) - np.mean(dat2)) /
             np.sqrt(((nx-1)*np.std(dat1, ddof=1) ** 2 + (ny-1)*np.std(y, ddof=1) ** 2) / dof))
    print(coh_d)
    return(stats, coh_d)

In [None]:
ttest_1samp(overallDat['Score'].values);

In [None]:
scipy.stats.wilcoxon(overallDat['Score'].values-0.5)

##### Plot model reproduction of this effect

In [None]:
sim_dat = pd.read_csv(baseDir + '/Data/Cleaned/Model_simulations_%s_%s.csv'%('CoGrRiNa','best'), index_col=0)
sim_dat.head()

In [None]:
sns.set_context('poster')
sim_block_dat = sim_dat[['subID','Type_Total','Variant','Confidence','model_score']
                  ].groupby(['subID','Variant']).mean().reset_index()
fig, ax = plt.subplots(1,1,figsize=[6,5])
sns.barplot(data=sim_block_dat,x='Variant',y='model_score', ax=ax, errwidth = 3, capsize=.1,
            order=['nat','inv'],alpha=0)
sns.swarmplot(data=sim_block_dat,x='Variant',y='model_score', ax=ax,
            order=['nat','inv'], alpha=.3, color = 'k')
ax.plot([-5,5],[.5,.5], 'k--', lw=2)
ax.set(ylim = [0,1.1], xlim = [-.5,1.5], xlabel = None, yticks = [0,.25,.5,.75,1],
       title = 'Model prediction',
       xticklabels = ['Human\nStrategies', 'Artificial\nStrategies'], ylabel = 'Accuracy     ');
dat1 = sim_block_dat.loc[sim_block_dat['Variant']=='nat','model_score'].values
dat2 = sim_block_dat.loc[sim_block_dat['Variant']=='inv','model_score'].values
stats = scipy.stats.ttest_rel(dat2,dat1)
FigureTools.add_sig_markers(ax, relationships=[[0,1,stats[1]]])
sns.despine(top=True,right=True)
ax.spines['left'].set_bounds(0,1)
ax.set_ylim([0,1.4])
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
# plt.savefig(baseDir+'/Figures/plot1c.pdf',transparent=True, bbox_inches='tight');

##### Mean performance, compare distribution of mean per subject against 50%

In [None]:
taskDat['Score'].describe()

In [None]:
meanPerSub = taskDat.groupby('subID').mean()['Score'].values
print(len(meanPerSub))
scipy.stats.ttest_1samp(meanPerSub, .5)

##### Compare human vs artificial strategy performance using within subjects t-test

In [None]:
stats

In [None]:
# modified bc of error (agg function failed [how->mean,dtype->object])
meanPerSubCondition = taskDat.groupby(['subID','Variant'], as_index=False)['Score'].mean().pivot(
    index='subID', columns='Variant', values='Score')
meanPerSubCondition.head()

Within natural

In [None]:
meanPerSubCondition['nat'].describe()

In [None]:
scipy.stats.ttest_1samp(meanPerSubCondition['nat'], .5)

Within artificial

In [None]:
meanPerSubCondition['inv'].describe()

In [None]:
scipy.stats.ttest_1samp(meanPerSubCondition['inv'], .5)

Between natural and artificial

In [None]:
scipy.stats.ttest_rel(meanPerSubCondition['inv'],meanPerSubCondition['nat'])


In [None]:
ttest_1samp((meanPerSubCondition['nat']-meanPerSubCondition['inv']), popmean = 0)