# Overview

This script generates the figures found in the paper

In [2]:
import piplite
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
await piplite.install("seaborn")
await piplite.install("openpyxl")
import seaborn as sns
sns.set(rc = {'figure.figsize':(16,8)})
sns.set_theme(style="whitegrid")
'''
Set some plot parameters
'''
newRCparams = {
    'font.weight': 'bold',
    'axes.titlesize':'xx-large',
    'axes.titleweight':'bold',
    'axes.labelsize':'xx-large',
    'axes.labelweight':'bold',
    'axes.labelpad': 30,
    'xtick.labelsize':'large',
    'ytick.labelsize':'large',
    'legend.title_fontsize': 'large',
    'legend.fontsize':'large',
    'legend.markerscale':2,
    'mathtext.default': 'bf'
    }
plt.rcParams.update(newRCparams)
palette = sns.color_palette(n_colors=6)

# Load in excel data
dfs = pd.read_excel('data/soe_ale_small.xlsx',sheet_name=None)

NameError: name 'piplite' is not defined

In [None]:
'''
This generates the figure with cronbachs resampled and extrapolates
out icc
'''
def sbp(n,r):
    # Spearman-Brown prophecy formula
    out = (n*r) / (1 + (n-1)*r)
    return out
x = np.arange(1,100)
preds = [[sbp(n,dfs['icc2_values'].icc2[k]) for n in x] for k in range(6)]
plt.figure()
ax1 = sns.scatterplot(x='numSubj',y='crona',hue='numP', palette=palette,
                      data=dfs['figure_1_cronbach'].groupby(['numP','numSubj']).mean())
sns.lineplot(x=x,y=preds[0],color=palette[0]); sns.lineplot(x=x,y=preds[1],color=palette[1])
sns.lineplot(x=x,y=preds[2],color=palette[2]); sns.lineplot(x=x,y=preds[3],color=palette[3])
sns.lineplot(x=x,y=preds[4],color=palette[4]); sns.lineplot(x=x,y=preds[5],color=palette[5])
plt.legend(title='Number of Presentations')
plt.legend(title='Number of Presentations')
plt.xlabel('Number of Participants'); plt.ylabel("Cronbach's Alpha")
plt.title('Reliability as a function of Stimulus Presentations and Participants')
plt.show()
#plt.savefig('figure_1.tiff', dpi=300, bbox_inches='tight')

In [None]:
'''
Create the second figure showing correlation between reliability and 
correlation to different models
'''
tmp = dfs['reliability_and_correlation']
models = tmp.modelName.unique(); 
corrs  = [tmp[tmp.modelName == i].corr()['reliability']['correlation'] for i in models] 
        
# Main Figure
fig = plt.figure()
ax = fig.add_subplot(111)
sns.regplot(x='reliability',y='correlation',
            data=tmp[tmp.modelName.isin(['Exp65'])], ci=None)
sns.regplot(x='reliability',y='correlation',
            data=tmp[tmp.modelName.isin(['w2v'])], ci=None)
plt.legend(ax.collections[0:2:],['Experiential','Word2Vec'],title='Model')
plt.title('Hello'); 
plt.annotate(f'$R^2 = {corrs[0]**2:.3f}$',[.32,.42],fontsize='x-large',color=palette[0])
plt.annotate(f'$R^2 = {corrs[3]**2:.3f}$',[.62,.37],fontsize='x-large',color=palette[1])
plt.show(); #plt.savefig()

In [None]:
# Supplemental figure
plt.figure()
for idx,name in enumerate(models[1:]):
    plt.subplot(2,3,idx+1)
    sns.regplot(x='reliability',y='correlation',
                data=tmp[tmp.modelName.isin([name])], ci=None)
    plt.xlim([.1,.8]); plt.ylim([.1,.6])
    plt.title(name); plt.xlabel(None); plt.ylabel(None)
    plt.annotate(f'$R^2 = {corrs[idx+1]**2:.3f}$',[.2,.5],fontsize='x-large',color=palette[0])
plt.show()

In [None]:
'''
Create figure that shows repetition suppression effect
'''
tmp = dfs['repetition_suppresion']
# Main fig
tmp2 = data=tmp[['subj','pres','ses','exp48']]
fig = plt.figure()
ax = fig.add_subplot(111)
sns.barplot(x='ses',y='exp48',hue='pres',data=tmp2,alpha=.4)
sns.stripplot(x='ses',y='exp48',hue='pres',data=tmp2,
              dodge=True,edgecolor='black',linewidth=.5)
plt.legend(ax.collections[:-2:],['1','2'],title='Presentation')
plt.xlabel('Session'); plt.ylabel('Correlation with Experiential Model')
plt.title('Presentation order and Correlation with Model')
plt.show(); #plt.savefig('figure_3.tiff',dpi=300,bbox_inches='tight')

In [None]:
# Supplement Fig
plt.figure()
names = ['exp48','sm8','glove','w2v','wordnet','cat10']
for idx,name in enumerate(names):
    plt.subplot(2,3,idx+1)
    sns.barplot(x='ses',y=name,hue='pres',data=tmp[['subj','pres','ses',name]],alpha=.4)
    sns.stripplot(x='ses',y=name,hue='pres',data=tmp[['subj','pres','ses',name]],
                  dodge=True,edgecolor='black',linewidth=.5)
    plt.legend().remove(); plt.title(name); plt.ylabel(None); plt.ylim([-.01,.07])
plt.show()


In [None]:
'''
This figure shows different presentation combinations
'''
order  = ['icc_246','icc_123','icc_1234','icc_135','icc_1235','icc_123456']
labels = ['2-4-6','1-2-3','1-2-3-4','1-3-5','1-2-3-5','1-2-3-4-5-6']
# Main figure
plt.figure()
tmp = dfs['model_presentation_combinations']
sns.barplot(x='comb',y='crea65',color=palette[0], data=tmp,alpha=.4,order=order)
sns.stripplot(x='comb',y='crea65',color=palette[0],data=tmp, order=order)
plt.xticks(ticks=[0,1,2,3,4,5],labels=labels);
plt.xlabel('Presentation Combination'); plt.ylabel('Correlation with Experiential Model');
plt.show(); #plt.savefig('figure_4.tiff',dpi=300,bbox_inches='tight')

In [None]:
# Supplemental with cronbach
plt.figure()
tmp = dfs['cron_fig_combination']; 
sns.barplot(x='comb',y='cron',color=palette[0], data=tmp,alpha=.4,order=order)
plt.xticks(ticks=[0,1,2,3,4,5],labels=labels);
plt.xlabel('Presentation Combination'); plt.ylabel("Cronbach's Alpha");
plt.show(); #plt.savefig('figure_4.tiff',dpi=300,bbox_inches='tight')

In [None]:
'''
Generates noise ceiling figure
'''
tmp = dfs['noise_ceiling']
tmp2 = dfs['noise_ceil_estimate']

plt.figure()
sns.stripplot(x='numSubj',y='value',hue='measure',data=tmp)
sns.lineplot(x=np.arange(len(tmp2)),y='eln',data=tmp2)
plt.axhline(tmp2.asymptote[0], color='black')
plt.show(); #plt.savefig()