In [1]:
import os
import csv
import pandas as pd
import numpy as np
import scipy.stats as stats

In [2]:
# FUNCTIONS
def compute_CI(conf_level,values):
    """
    Compute 95% confidence interval for the 
    mean subj bias in a given experiment
    """
    X = np.mean(values)
    SD = np.std(values)
    n = len(values)
    t = stats.t.ppf(conf_level,n-1)
    MoE = t * (SD / np.sqrt(n))
    left = X - MoE
    right = X + MoE
    return [X,SD,MoE,(left,right)]

def compute_verb_subj_bias(df):
    """
    Compute the mean subj bias for female and male referents
    """
    verbs = df['verb'].values
    verb_subj_bias = (df['subj_ref_female_subj'].values + df['subj_ref_male_subj'].values)/2 
    data = pd.DataFrame(data={'verb':verbs,'subj_bias':verb_subj_bias})
    return data  

def compute_CI_for_experiments(dirpath,conf_level):
    """
    Compute confidence intervals for experiments
    associated with a specific model
    """
    model_exp_results = []
    for file in sorted(os.listdir(dirpath)):
        filepath = os.path.join(dirpath,file)
        # create dataframe
        df = pd.read_csv(filepath)
        # compute overall subj bias for each verb
        verb_bias = compute_verb_subj_bias(df)
        vals = verb_bias['subj_bias'].values
        # compute 95% confidence interval
        results = compute_CI(0.95,vals)
        # experiment results
        exp_type = os.path.splitext(file)[0]
        exp_results = {exp_type : results}
        model_exp_results.append(exp_results)
    return model_exp_results

def write_results_to_file(filepath,model_results):
    with open(filepath,'w') as csvfile:
        writer = csv.writer(csvfile,delimiter=',')
        writer.writerow(["verb_exp_type","mean","std_dev","margin_error","conf_interval"])  
        for verb_exp in model_results:
            for verb_type, results in verb_exp.items():
                row = [verb_type] + results
                writer.writerow(row)

###  Statistics for GPT-2 Experiments

In [3]:
gpt2_exp_path = "results/gpt2/summary"
gpt2_exp_stats = "results/gpt2_exp_stats.csv"

In [4]:
gpt2_exp_CI_results = compute_CI_for_experiments(gpt2_exp_path,0.95)
write_results_to_file(gpt2_exp_stats,gpt2_exp_CI_results)

In [5]:
gpt2_stats = pd.read_csv(gpt2_exp_stats)
gpt2_stats

Unnamed: 0,verb_exp_type,mean,std_dev,margin_error,conf_interval
0,IC1-connective1,0.630797,0.051071,0.019746,"(0.6110504685718969, 0.650543277449493)"
1,IC1-connective2,0.306063,0.059007,0.022815,"(0.2832488164382517, 0.3288781155444841)"
2,IC1-noprompt,0.59484,0.051457,0.019896,"(0.5749445053188852, 0.6147361512234956)"
3,IC2-connective1,0.419262,0.121214,0.046867,"(0.3723950305918708, 0.46612880149584474)"
4,IC2-connective2,0.470103,0.174534,0.067483,"(0.40262050105412545, 0.5375860979951448)"
5,IC2-noprompt,0.660898,0.046357,0.017924,"(0.6429740410800016, 0.6788213417673872)"
6,motion-connective1,0.649254,0.036205,0.014845,"(0.6344087607571335, 0.6640987105358015)"
7,motion-connective2,0.461234,0.043116,0.017679,"(0.44355489008936005, 0.47891252219973396)"
8,motion-noprompt,0.630973,0.02295,0.00941,"(0.6215626922598222, 0.6403827600053434)"
9,transofposs_imperfective-connective1,0.561594,0.060944,0.024245,"(0.5373488745514946, 0.5858383438654452)"


### Statistics for Transformer-XL Experiments

In [6]:
transfoxl_exp_path = "results/transfoxl/summary"
transfoxl_exp_stats = "results/transfoxl_exp_stats.csv"

In [7]:
transfoxl_exp_CI_results = compute_CI_for_experiments(transfoxl_exp_path,0.95)
write_results_to_file(transfoxl_exp_stats,transfoxl_exp_CI_results)

In [8]:
transfoxl_stats = pd.read_csv('results/transfoxl_exp_stats.csv')
transfoxl_stats

Unnamed: 0,verb_exp_type,mean,std_dev,margin_error,conf_interval
0,IC1-connective1,0.606976,0.073155,0.028285,"(0.578691167051509, 0.6352611693777989)"
1,IC1-connective2,0.429707,0.053115,0.020537,"(0.4091707024877731, 0.45024407855242843)"
2,IC1-noprompt,0.505754,0.02595,0.010033,"(0.49572033317631276, 0.5157869107506348)"
3,IC2-connective1,0.454251,0.042123,0.016287,"(0.4379645146669356, 0.4705375663696321)"
4,IC2-connective2,0.495217,0.136698,0.052854,"(0.442363502105384, 0.5480712468146074)"
5,IC2-noprompt,0.511348,0.04687,0.018122,"(0.49322600914974335, 0.5294703494831738)"
6,motion-connective1,0.613776,0.0377,0.015458,"(0.5983177392891775, 0.6292335401968759)"
7,motion-connective2,0.536081,0.038396,0.015743,"(0.5203373040316047, 0.5518240487132043)"
8,motion-noprompt,0.569162,0.029634,0.012151,"(0.5570116536869492, 0.5813128646216463)"
9,transofposs_imperfective-connective1,0.560968,0.067763,0.026957,"(0.5340103838476119, 0.5879253062683295)"
