## Make tables of results for Latex

In [1]:
import warnings
import sys 
if not sys.warnoptions:
    warnings.simplefilter("ignore")
import numpy as np 

from scipy import stats
from sklearn import preprocessing
import matplotlib.pyplot as plt 
import seaborn as sns 
import scipy.io
from scipy.io import loadmat
import pandas as pd

import matplotlib.style as style 
from matplotlib.offsetbox import AnchoredText

from scipy.signal import correlate, correlation_lags

from tqdm import tqdm
from numpy.random import RandomState

import imp
import time
from timeit import default_timer as timer
import matplotlib.patches as patches
import pickle as pickle
import os
import argparse

#get_ipython().run_line_magic('matplotlib', 'inline')
#get_ipython().run_line_magic('autosave', '5')
sns.set(style = 'whitegrid', context='poster', rc={"lines.linewidth": 2.5})
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
import matplotlib.patches as patches
#mpl.use('Agg')


#%matplotlib inline 
#%autosave 5

from context_helper import folders

#%matplotlib inline 
#%autosave 5

model='GPT2'

home_dir = folders['tiger']
save_plot='Y'
results_dir= home_dir + 'results/revision1/'
log_dir = home_dir + 'logs/'


In [2]:
drift_val=[3,5, 10]
#analyses= ['EntropyRatio_Average_Reynolds']
#analyses=['GPT2Embed-cosineRatio_Average_Reynolds']
analyses= ['KLDivergenceRatio_Average_Reynolds']
#analyses= ['SurpriseRatio_Average_Reynolds']
#analyses= ['GPT2Embed-cosine']


nfeatures=[20, 50, 100, 150, 200]

df=pd.DataFrame()
df_monkey=pd.DataFrame()
df_pieman=pd.DataFrame()
df_tunnel=pd.DataFrame()


In [3]:
df['Regression Window']=nfeatures
df_monkey['Regression Window']=nfeatures
df_pieman['Regression Window']=nfeatures
df_tunnel['Regression Window']=nfeatures

## Transient

In [5]:
for analysis in analyses:
    for drift in drift_val:
        correl_name = 'Correlation_drift%i' % drift
        percentile_name = 'Percentile_drift%i' % drift
        pval_all=np.zeros(len(nfeatures))
        pval_story_all = np.empty([len(nfeatures), 3])
        corr_score_all=np.zeros(len(nfeatures))
        corr_score_story_all = np.empty([len(nfeatures), 3])
        counter=0
        for feat in nfeatures:      
            #fname = results_dir+'perm_regression_outputs_%s_drift%i_%i_Lasso.npz' % (analysis,drift,feat )
            # for the newer analyses, the nozscore suffix was added.
            fname = results_dir+'perm_regression_outputs_%s_drift%i_%i_Lasso_noZscore.npz' % (analysis,drift,feat )


            #print(fname)
            betas_perm_file=np.load(fname)
            betas_perm_all=betas_perm_file['betas_perm_all']
            betas=betas_perm_file['betas']
            corr_perm_all=betas_perm_file['corr_perm_all']
            corr_score=betas_perm_file['corr_score']
            pval_all[counter]=np.round(stats.percentileofscore(np.mean(corr_perm_all,axis=1),np.mean(corr_score)),1)
            corr_score_all[counter]=np.round(np.mean(corr_score),2)
            corr_score_story_all[counter]= corr_score

            # compute p-val for each story
            
            for story in range(3):
                    pval_story_all[counter,story]=np.round(stats.percentileofscore(corr_perm_all[:,story],corr_score[story]),1)
            
            counter= counter+1
            
        df[percentile_name]=pval_all
        df[correl_name]=corr_score_all
        
        df_monkey[percentile_name]=pval_story_all[:,0]
        df_pieman[percentile_name]=pval_story_all[:,1]
        df_tunnel[percentile_name]=pval_story_all[:,2]
        
        df_monkey[correl_name]=np.round(corr_score_story_all[:,0],2)
        df_pieman[correl_name]=np.round(corr_score_story_all[:,1],2)
        df_tunnel[correl_name]=np.round(corr_score_story_all[:,2],2)
            #df_pieman[correl_name].iloc[counter]=corr_score[1]
        #df_tunnel[correl_name].iloc[counter]=corr_score[2]
df_transient=df
df_transient_monkey = df_monkey
df_transient_pieman = df_pieman
df_transient_tunnel  = df_tunnel
            

In [6]:
corr_score_story_all

array([[0.04971239, 0.06683587, 0.01535882],
       [0.0722452 , 0.07909474, 0.03002769],
       [0.08921811, 0.19142437, 0.0269756 ],
       [0.08659967, 0.22277218, 0.05048875],
       [0.08960816, 0.19856754, 0.0846642 ]])

In [7]:
column_order=['Regression Window', 'Correlation_drift3', 'Percentile_drift3', 'Correlation_drift5', 'Percentile_drift5', 'Correlation_drift10', 'Percentile_drift10']

In [8]:
print(df_monkey[column_order].to_latex(index=False))
print(analyses)

\begin{tabular}{rrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 \\
\midrule
                20 &                0.07 &               97.9 &                0.07 &               98.0 &                 0.05 &                95.4 \\
                50 &                0.07 &               98.0 &                0.07 &               98.0 &                 0.07 &                97.9 \\
               100 &                0.10 &              100.0 &                0.10 &               99.9 &                 0.09 &                99.8 \\
               150 &                0.08 &               98.7 &                0.08 &               98.9 &                 0.09 &                99.2 \\
               200 &                0.08 &               98.9 &                0.09 &               99.2 &                 0.09 &                99.2 \\
\bottomrule
\end{tabular}

['KLDi

In [9]:
print(df_pieman[column_order].to_latex(index=False))
print(analyses)

\begin{tabular}{rrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 \\
\midrule
                20 &                0.05 &               78.6 &                0.05 &               78.6 &                 0.07 &                85.0 \\
                50 &                0.09 &               89.8 &                0.08 &               88.0 &                 0.08 &                87.0 \\
               100 &                0.20 &              100.0 &                0.20 &              100.0 &                 0.19 &               100.0 \\
               150 &                0.22 &               99.8 &                0.22 &              100.0 &                 0.22 &               100.0 \\
               200 &                0.19 &               98.8 &                0.19 &               99.2 &                 0.20 &                99.8 \\
\bottomrule
\end{tabular}

['KLDi

In [10]:
print(df_tunnel[column_order].to_latex(index=False))
print(analyses)

\begin{tabular}{rrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 \\
\midrule
                20 &                0.04 &               89.9 &                0.03 &               87.8 &                 0.02 &                72.0 \\
                50 &                0.04 &               86.3 &                0.03 &               85.3 &                 0.03 &                84.9 \\
               100 &                0.04 &               87.0 &                0.03 &               81.4 &                 0.03 &                76.6 \\
               150 &                0.05 &               88.4 &                0.05 &               87.4 &                 0.05 &                84.7 \\
               200 &                0.08 &               95.2 &                0.09 &               95.8 &                 0.08 &                95.6 \\
\bottomrule
\end{tabular}

['KLDi

In [11]:
df[column_order]

Unnamed: 0,Regression Window,Correlation_drift3,Percentile_drift3,Correlation_drift5,Percentile_drift5,Correlation_drift10,Percentile_drift10
0,20,0.05,94.7,0.05,94.2,0.04,91.2
1,50,0.07,97.6,0.06,96.8,0.06,96.4
2,100,0.11,100.0,0.11,99.8,0.1,99.2
3,150,0.12,99.9,0.12,99.9,0.12,99.9
4,200,0.12,99.2,0.12,99.4,0.12,99.6


In [7]:
print(df[column_order].to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 \\
\midrule
                20 &               -0.02 &               27.8 &               -0.01 &               37.0 &                 0.00 &                49.0 \\
                50 &               -0.06 &                5.3 &               -0.05 &                9.2 &                -0.04 &                14.2 \\
               100 &                0.00 &               57.2 &                0.01 &               62.7 &                 0.02 &                68.6 \\
               150 &                0.01 &               56.6 &                0.01 &               61.7 &                 0.02 &                68.4 \\
               200 &               -0.02 &               37.8 &               -0.01 &               47.5 &                 0.02 &                68.8 \\
\bottomrule
\end{tabular}

['Entr

In [13]:
print(df[column_order].to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 \\
\midrule
                20 &                0.02 &               70.5 &                0.02 &               76.7 &                 0.03 &                83.2 \\
                50 &                0.05 &               91.3 &                0.04 &               86.9 &                 0.03 &                82.6 \\
               100 &                0.06 &               92.0 &                0.05 &               91.1 &                 0.04 &                85.2 \\
               150 &                0.00 &               48.4 &               -0.00 &               45.0 &                -0.01 &                37.0 \\
               200 &                0.02 &               73.7 &                0.03 &               77.2 &                 0.03 &                76.4 \\
\bottomrule
\end{tabular}

['GPT2

## Non-Transient

In [12]:
#analyses= ['KLDivergence','GPT2Embed-cosine', 'Entropy']
#analyses= ['Surprise']
#analyses= ['GPT2Embed-cosine']
analyses= ['KLDivergence']
nfeatures=[20, 50, 100, 150, 200]

df=pd.DataFrame()
df_monkey=pd.DataFrame()
df_pieman=pd.DataFrame()
df_tunnel=pd.DataFrame()

df['Regression Window']=nfeatures

In [13]:
for analysis in analyses:

    correl_name = 'Correlation' 
    percentile_name = 'Percentile' 
    pval_all=np.zeros(len(nfeatures))
    pval_story_all = np.empty([len(nfeatures), 3])
    corr_score_all=np.zeros(len(nfeatures))
    corr_score_story_all = np.empty([len(nfeatures), 3])
    counter=0
    for feat in nfeatures:      
        #fname = results_dir+'perm_regression_outputs_%s_%i_Lasso.npz' % (analysis,feat )
        # for the non transient analyses, the zscore suffix was added.
        fname = results_dir+'perm_regression_outputs_%s_%i_Lasso_yesZscore.npz' % (analysis,feat )


        #print(fname)
        betas_perm_file=np.load(fname)
        betas_perm_all=betas_perm_file['betas_perm_all']
        betas=betas_perm_file['betas']
        corr_perm_all=betas_perm_file['corr_perm_all']
        corr_score=betas_perm_file['corr_score']
        pval_all[counter]=np.round(stats.percentileofscore(np.mean(corr_perm_all,axis=1),np.mean(corr_score)),1)
        corr_score_all[counter]=np.round(np.mean(corr_score),2)
        corr_score_story_all[counter]= corr_score

        # compute p-val for each story

        for story in range(3):
                pval_story_all[counter,story]=np.round(stats.percentileofscore(corr_perm_all[:,story],corr_score[story]),1)

        counter= counter+1

    df[percentile_name]=pval_all
    df[correl_name]=corr_score_all

    df_monkey[percentile_name]=pval_story_all[:,0]
    df_pieman[percentile_name]=pval_story_all[:,1]
    df_tunnel[percentile_name]=pval_story_all[:,2]

    df_monkey[correl_name]=np.round(corr_score_story_all[:,0],2)
    df_pieman[correl_name]=np.round(corr_score_story_all[:,1],2)
    df_tunnel[correl_name]=np.round(corr_score_story_all[:,2],2)
        #df_pieman[correl_name].iloc[counter]=corr_score[1]
    #df_tunnel[correl_name].iloc[counter]=corr_score[2]



In [14]:
column_order=['Regression Window','Correlation', 'Percentile']

In [15]:
print(df[column_order].to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrr}
\toprule
 Regression Window &  Correlation &  Percentile \\
\midrule
                20 &         0.04 &        77.8 \\
                50 &         0.07 &        90.1 \\
               100 &         0.11 &        96.8 \\
               150 &         0.11 &        97.8 \\
               200 &         0.12 &        98.4 \\
\bottomrule
\end{tabular}

['KLDivergence']


# Combine transient and non-transient tables into one

In [16]:
column_order=['Correlation', 'Percentile']
column_order_transient=['Regression Window', 'Correlation_drift3', 'Percentile_drift3', 'Correlation_drift5', 'Percentile_drift5', 'Correlation_drift10', 'Percentile_drift10']

In [17]:
df_combined_corr=pd.concat([df_transient[column_order_transient],df[column_order]], axis=1)

In [18]:
df_combined_corr

Unnamed: 0,Regression Window,Correlation_drift3,Percentile_drift3,Correlation_drift5,Percentile_drift5,Correlation_drift10,Percentile_drift10,Correlation,Percentile
0,20,0.05,94.7,0.05,94.2,0.04,91.2,0.04,77.8
1,50,0.07,97.6,0.06,96.8,0.06,96.4,0.07,90.1
2,100,0.11,100.0,0.11,99.8,0.1,99.2,0.11,96.8
3,150,0.12,99.9,0.12,99.9,0.12,99.9,0.11,97.8
4,200,0.12,99.2,0.12,99.4,0.12,99.6,0.12,98.4


In [22]:
print(df_combined_corr.to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 &  Correlation &  Percentile \\
\midrule
                20 &                0.05 &               94.7 &                0.05 &               94.2 &                 0.04 &                91.2 &         0.04 &        77.8 \\
                50 &                0.07 &               97.6 &                0.06 &               96.8 &                 0.06 &                96.4 &         0.07 &        90.1 \\
               100 &                0.11 &              100.0 &                0.11 &               99.8 &                 0.10 &                99.2 &         0.11 &        96.8 \\
               150 &                0.12 &               99.9 &                0.12 &               99.9 &                 0.12 &                99.9 &         0.11 &        97.8 \\
               200 &                0.1

In [13]:
print(df_combined_corr.to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 &  Correlation &  Percentile \\
\midrule
                20 &                0.01 &               59.8 &                0.01 &               65.2 &                 0.02 &                70.3 &        -0.06 &        13.9 \\
                50 &                0.03 &               84.5 &                0.03 &               77.9 &                 0.02 &                72.0 &        -0.01 &        40.4 \\
               100 &                0.04 &               90.6 &                0.04 &               88.2 &                 0.03 &                78.6 &        -0.01 &        40.6 \\
               150 &               -0.01 &               42.7 &               -0.01 &               40.9 &                -0.02 &                30.6 &        -0.04 &        21.2 \\
               200 &                0.0

In [40]:
print(df_combined_corr.to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 &  Correlation &  Percentile \\
\midrule
                20 &               -0.03 &               17.4 &               -0.02 &               21.5 &                -0.02 &                22.5 &        -0.06 &         6.5 \\
                50 &               -0.08 &                1.0 &               -0.07 &                1.0 &                -0.06 &                 2.0 &        -0.08 &         2.8 \\
               100 &               -0.04 &               12.9 &               -0.04 &               12.5 &                -0.04 &                10.6 &        -0.06 &         6.0 \\
               150 &               -0.00 &               41.5 &               -0.01 &               37.6 &                -0.02 &                25.6 &        -0.01 &        34.0 \\
               200 &                0.0

In [51]:
print(df_combined_corr.to_latex(index=False)) 
print(analyses)

\begin{tabular}{rrrrrrrrr}
\toprule
 Regression Window &  Correlation\_drift3 &  Percentile\_drift3 &  Correlation\_drift5 &  Percentile\_drift5 &  Correlation\_drift10 &  Percentile\_drift10 &  Correlation &  Percentile \\
\midrule
                20 &               -0.02 &               27.3 &               -0.00 &               42.2 &                 0.01 &                56.4 &        -0.04 &        18.4 \\
                50 &               -0.04 &               12.2 &               -0.02 &               22.7 &                -0.01 &                33.6 &        -0.07 &         6.8 \\
               100 &                0.03 &               81.4 &                0.03 &               82.4 &                 0.03 &                84.6 &        -0.01 &        43.0 \\
               150 &                0.04 &               82.3 &                0.04 &               82.7 &                 0.04 &                82.7 &         0.00 &        55.3 \\
               200 &               -0.0