In [2]:
from utils.load_results import *
from utils.plot_helpers import *
from utils.analysis_from_interaction import *
from utils.concept_reps import *
import torch
import pandas as pd
import seaborn as sns
import os
from matplotlib import pyplot as plt
plt.style.use('default')

# Gather dataframes for analysis of pragmatic mechanisms
This notebook is used to gather data from the different conditions and save them as csv files that can be used for a Bayesian analysis in R.

In [3]:
datasets = ['(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)']
n_values = [4, 8, 16, 4, 8, 4]
n_attributes = [3, 3, 3, 4, 4, 5]
vocab_sizes = [5, 9, 17, 5, 9, 5]
n_epochs = 300
n_runs = 5
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_0' for d in datasets]

In [4]:
length_cost = True # whether length_cost was applied
early_stopping = True
sampled_context = False # actually true, but this would navigate to a different folder
hierarchical = False
shared_context = True
test_interactions = False # whether scores should be calculated on test interactions
test_mode = 'test'

## Experiment 1

In [5]:
setting_1 = 'length_cost/context_unaware/shared_context'
setting_2 = 'length_cost/context_aware/shared_context'
settings = [setting_1, setting_2]
condition_1 = 'context_unaware'
condition_2 = 'context_aware'
conditions = [condition_1, condition_2]

In [6]:
# get n_epochs if early stopping
n_epochs_all_data = {}
for i, setting in enumerate(settings):
    n_epochs_all_data[conditions[i]] = []
    for d in range(len(datasets)):
        
        epochs = []
        
        for run in range(n_runs):
    
            path_to_run = paths[d] + '/' + setting +'/' + str(run) + '/' 
            with open(os.path.join(path_to_run, 'loss_and_metrics.pkl'), 'rb') as input_file:
                data = pickle.load(input_file)
                final_epoch = max(data['loss_train'].keys())
                epochs.append(final_epoch)
                
        n_epochs_all_data[conditions[i]].append(epochs)

### Training epochs

In [93]:
# gather epochs
epochs_training = {"condition": [], "dataset": [], "run": [], "epoch": []}
for i, setting in enumerate(settings):
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            epochs_training['condition'].append(conditions[i])
            epochs_training['dataset'].append(d)
            epochs_training['run'].append(run)
            epochs_training['epoch'].append(n_epochs_all_data[conditions[i]][d][run])
df = pd.DataFrame(epochs_training)
df.to_csv("BayesianAnalysis/exp1/epochs_training.csv", index=False)
df

Unnamed: 0,condition,dataset,run,epoch
0,context_unaware,0,0,69
1,context_unaware,0,1,63
2,context_unaware,0,2,60
3,context_unaware,0,3,62
4,context_unaware,0,4,52
5,context_unaware,1,0,56
6,context_unaware,1,1,62
7,context_unaware,1,2,69
8,context_unaware,1,3,63
9,context_unaware,1,4,74


### Accuracies

In [52]:
test_interactions = False

In [91]:
# gather final accuracies
accuracies = {"condition": [], "acc_type": [], "dataset": [], "run": [], "accuracy": []}
for i, setting in enumerate(settings):
    if conditions[i] == 'context_unaware':
        context_unaware = True
    else:
        context_unaware = False
    all_accuracies = load_accuracies(paths, n_runs=n_runs, n_epochs=300, val_steps=1, zero_shot=False, context_unaware=context_unaware, length_cost=length_cost, early_stopping=early_stopping, sampled_context=sampled_context, hierarchical=hierarchical, shared_context=shared_context)
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for acc_type in ['train', 'val', 'test']:
                accuracies['condition'].append(conditions[i])
                accuracies['dataset'].append(dataset)
                accuracies['run'].append(run)
                accuracies['acc_type'].append(acc_type)
                if conditions[i] == 'context_unaware':
                    if acc_type == 'train':
                        accuracies['accuracy'].append(all_accuracies['cu_train_acc'][d][run][-1])
                    elif acc_type == 'val':
                        accuracies['accuracy'].append(all_accuracies['cu_val_acc'][d][run][-1])
                    else:
                        accuracies['accuracy'].append(all_accuracies['cu_test_acc'][d][run])
                else:
                    if acc_type == 'train':
                        accuracies['accuracy'].append(all_accuracies['train_acc'][d][run][-1])
                    elif acc_type == 'val':
                        accuracies['accuracy'].append(all_accuracies['val_acc'][d][run][-1])
                    else:
                        accuracies['accuracy'].append(all_accuracies['test_acc'][d][run])
df = pd.DataFrame(accuracies)
df.to_csv("BayesianAnalysis/exp1/accuracies.csv", index=False)
df

  result_dict[key] = np.array(result_dict[key])


Unnamed: 0,condition,acc_type,dataset,run,accuracy
0,context_unaware,train,"(3,4)",0,0.960583
1,context_unaware,val,"(3,4)",0,0.898021
2,context_unaware,test,"(3,4)",0,0.808327
3,context_unaware,train,"(3,4)",1,0.990464
4,context_unaware,val,"(3,4)",1,0.935794
...,...,...,...,...,...
175,context_aware,val,"(5,4)",3,0.990262
176,context_aware,test,"(5,4)",3,0.989809
177,context_aware,train,"(5,4)",4,0.977849
178,context_aware,val,"(5,4)",4,0.965690


### Message length

In [92]:
# gather message lengths
ml = {"condition": [], "dataset": [], "run": [], "fixed": [], "ml": []}
for i, setting in enumerate(settings):
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            ml_hierarchical = pickle.load(open(paths[d] + '/' + setting + '/' + str(run) + 
                                           '/message_length_hierarchical.pkl', 'rb'))
            for level in range(5):
                ml['condition'].append(conditions[i])
                ml['dataset'].append(dataset)
                ml['run'].append(run)
                ml['fixed'].append(level)
                try: 
                    ml['ml'].append(ml_hierarchical[level])
                except: 
                    ml['ml'].append(np.NaN)
df = pd.DataFrame(ml)
df.to_csv("BayesianAnalysis/exp1/message_length.csv", index=False)
df

Unnamed: 0,condition,dataset,run,fixed,ml
0,context_unaware,"(3,4)",0,0,4.443
1,context_unaware,"(3,4)",0,1,4.236
2,context_unaware,"(3,4)",0,2,3.828
3,context_unaware,"(3,4)",0,3,
4,context_unaware,"(3,4)",0,4,
...,...,...,...,...,...
295,context_aware,"(5,4)",4,0,4.434
296,context_aware,"(5,4)",4,1,4.314
297,context_aware,"(5,4)",4,2,4.304
298,context_aware,"(5,4)",4,3,4.316


### Entropy scores

In [98]:
# gather entropy scores (means)
entropies = {"condition": [], "score": [], "dataset": [], "run": [], "entropy": []}
for i, setting in enumerate(settings):
    if conditions[i] == 'context_unaware':
        context_unaware = True
    else:
        context_unaware = False
    entropy_scores = load_entropies(paths, context_unaware=context_unaware, length_cost=length_cost, sampled_context=sampled_context, test_interactions=test_interactions, test_mode=test_mode, hierarchical=hierarchical, shared_context=shared_context, verbose=False)
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for score in ['NMI', 'effectiveness', 'consistency']:
                entropies['condition'].append(conditions[i])
                entropies['dataset'].append(dataset)
                entropies['run'].append(run)
                entropies['score'].append(score)
                entropies['entropy'].append(entropy_scores[score][d][run])
df = pd.DataFrame(entropies)
df.to_csv("BayesianAnalysis/exp1/entropies.csv", index=False)
df

  result_dict[key] = np.array(result_dict[key])


Unnamed: 0,condition,score,dataset,run,entropy
0,context_unaware,NMI,"(3,4)",0,0.790290
1,context_unaware,effectiveness,"(3,4)",0,0.814826
2,context_unaware,consistency,"(3,4)",0,0.767188
3,context_unaware,NMI,"(3,4)",1,0.831046
4,context_unaware,effectiveness,"(3,4)",1,0.963844
...,...,...,...,...,...
175,context_aware,effectiveness,"(5,4)",3,0.405036
176,context_aware,consistency,"(5,4)",3,0.686200
177,context_aware,NMI,"(5,4)",4,0.525295
178,context_aware,effectiveness,"(5,4)",4,0.423973


In [113]:
# gather entropy scores (hierarchical)
entropies = {"condition": [], "score": [], "dataset": [], "run": [], "fixed": [], "entropy": []}
for i, setting in enumerate(settings):
    if conditions[i] == 'context_unaware':
        context_unaware = True
    else:
        context_unaware = False
    entropy_scores = load_entropies(paths, context_unaware=context_unaware, length_cost=length_cost, sampled_context=sampled_context, test_interactions=test_interactions, test_mode=test_mode, hierarchical=hierarchical, shared_context=shared_context, verbose=False)
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for score in ['NMI_hierarchical', 'effectiveness_hierarchical', 'consistency_hierarchical']:
                for level in range(5):
                    entropies['condition'].append(conditions[i])
                    entropies['dataset'].append(dataset)
                    entropies['run'].append(run)
                    entropies['score'].append(score)
                    entropies['fixed'].append(level)
                    try:
                        entropies['entropy'].append(entropy_scores[score][d][run][level])
                    except:
                        entropies['entropy'].append(np.NaN)
df = pd.DataFrame(entropies)
df.to_csv("BayesianAnalysis/exp1/entropies_hierarchical.csv", index=False)
df

  result_dict[key] = np.array(result_dict[key])


Unnamed: 0,condition,score,dataset,run,fixed,entropy
0,context_unaware,NMI_hierarchical,"(3,4)",0,0,0.779790
1,context_unaware,NMI_hierarchical,"(3,4)",0,1,0.798455
2,context_unaware,NMI_hierarchical,"(3,4)",0,2,0.831628
3,context_unaware,NMI_hierarchical,"(3,4)",0,3,
4,context_unaware,NMI_hierarchical,"(3,4)",0,4,
...,...,...,...,...,...,...
895,context_aware,consistency_hierarchical,"(5,4)",4,0,0.764021
896,context_aware,consistency_hierarchical,"(5,4)",4,1,0.725995
897,context_aware,consistency_hierarchical,"(5,4)",4,2,0.699520
898,context_aware,consistency_hierarchical,"(5,4)",4,3,0.682845


## Experiment 2

In [7]:
test_interactions = True # whether scores should be calculated on test interactions
test_mode = 'test'
rsa = True
rsa_test = 'testtrainmixed'
rsa_test_int = 'testtrainmixed'
condition_1 = 'context_unaware'
condition_2 = 'context_unaware_RSA'
condition_3 = 'context_aware'
condition_4 = 'context_aware_RSA'
conditions = [condition_1, condition_2, condition_3, condition_4]

### Accuracies

In [28]:
# gather final accuracies
accuracies = {"condition": [], "rsa": [], "dataset": [], "run": [], "accuracy": []}
for i, setting in enumerate(settings):
    if 'context_unaware' in setting:
        context_unaware = True
    else:
        context_unaware = False
    all_accuracies = load_accuracies(paths, n_runs=n_runs, n_epochs=0, val_steps=1, zero_shot=False, context_unaware=context_unaware, length_cost=length_cost, early_stopping=early_stopping, hierarchical=hierarchical, shared_context=shared_context, rsa=rsa, rsa_test='test')
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for acc_type in ['test', 'rsa']:
                accuracies['condition'].append(conditions[::2][i])
                accuracies['dataset'].append(dataset)
                accuracies['run'].append(run)
                accuracies['rsa'].append(acc_type)
                if acc_type == 'test':
                    accuracies['accuracy'].append(all_accuracies['final_test_acc'][d][run])
                else:
                    accuracies['accuracy'].append(all_accuracies['rsa_test_acc'][d][run])
df = pd.DataFrame(accuracies)
df.to_csv("BayesianAnalysis/exp2/accuracies.csv", index=False)
df

Unnamed: 0,condition,rsa,dataset,run,accuracy
0,context_unaware,test,"(3,4)",0,0.801331
1,context_unaware,rsa,"(3,4)",0,0.822188
2,context_unaware,test,"(3,4)",1,0.818013
3,context_unaware,rsa,"(3,4)",1,0.938505
4,context_unaware,test,"(3,4)",2,0.765759
...,...,...,...,...,...
115,context_aware,rsa,"(5,4)",2,0.982950
116,context_aware,test,"(5,4)",3,0.999955
117,context_aware,rsa,"(5,4)",3,0.996160
118,context_aware,test,"(5,4)",4,0.973550


### Message length

In [23]:
# gather message lengths
ml = {"condition": [], "rsa": [], "dataset": [], "run": [], "fixed": [], "ml": []}
for i, setting in enumerate(settings):
    if 'context_unaware' in setting:
        context_unaware = True
    else:
        context_unaware = False
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for rsa in ['test', 'rsa']:
                if rsa == 'test':
                    ml_hierarchical = pickle.load(open(paths[d] + '/' + setting + '/' + str(run) + 
                                           '/message_length_hierarchical_' + test_mode + '.pkl', 'rb'))
                else:
                    ml_hierarchical = pickle.load(open(paths[d] + '/' + setting + '/' + str(run) + 
                                               '/message_length_hierarchical' + '_rsa_' + rsa_test_int + '.pkl', 'rb'))
                for level in range(5):
                    ml['condition'].append(conditions[::2][i])
                    ml['dataset'].append(dataset)
                    ml['run'].append(run)
                    ml['rsa'].append(rsa)
                    ml['fixed'].append(level)
                    try:
                        ml['ml'].append(ml_hierarchical[level])
                    except:
                        ml['ml'].append(np.NaN)
df = pd.DataFrame(ml)
df.to_csv("BayesianAnalysis/exp2/message_length.csv", index=False)
df

Unnamed: 0,condition,rsa,dataset,run,fixed,ml
0,context_unaware,test,"(3,4)",0,0,3.650
1,context_unaware,test,"(3,4)",0,1,4.100
2,context_unaware,test,"(3,4)",0,2,5.688
3,context_unaware,test,"(3,4)",0,3,
4,context_unaware,test,"(3,4)",0,4,
...,...,...,...,...,...,...
595,context_aware,rsa,"(5,4)",4,0,
596,context_aware,rsa,"(5,4)",4,1,
597,context_aware,rsa,"(5,4)",4,2,3.225
598,context_aware,rsa,"(5,4)",4,3,2.900


### Lexicon size and informativeness

In [24]:
# gather lexicon sizes
distance = 'manhattan'
lex = {"condition": [], "rsa": [], "dataset": [], "run": [], "lexsize": [], "lexinfo": []}
for i, setting in enumerate(settings):
    if 'context_unaware' in setting:
        context_unaware = True
    else:
        context_unaware = False
    for d, dataset in enumerate(datasets):
        for run in range(n_runs):
            for rsa in ['test', 'rsa']:
                if rsa == 'test':
                    lexprops = pickle.load(open(paths[d] + '/' + setting + '/' + str(run) + 
                                           '/lexicon_properties_' + distance + '_' + test_mode + '.pkl', 'rb'))
                else:
                    lexprops = pickle.load(open(paths[d] + '/' + setting + '/' + str(run) + 
                                           '/lexicon_properties_' + distance + '_rsa_' + rsa_test_int + '.pkl', 'rb'))
                lex['condition'].append(conditions[::2][i])
                lex['dataset'].append(dataset)
                lex['run'].append(run)
                lex['rsa'].append(rsa)
                lex['lexsize'].append(lexprops['lexicon size'])
                lex['lexinfo'].append(lexprops['lexicon informativeness'])
df = pd.DataFrame(lex)
df.to_csv("BayesianAnalysis/exp2/lexicon_props.csv", index=False)
df

Unnamed: 0,condition,rsa,dataset,run,lexsize,lexinfo
0,context_unaware,test,"(3,4)",0,97,6.323225
1,context_unaware,rsa,"(3,4)",0,58,3.783062
2,context_unaware,test,"(3,4)",1,104,3.409091
3,context_unaware,rsa,"(3,4)",1,52,4.388824
4,context_unaware,test,"(3,4)",2,102,6.770833
...,...,...,...,...,...,...
115,context_aware,rsa,"(5,4)",2,50,4.044269
116,context_aware,test,"(5,4)",3,87,3.210536
117,context_aware,rsa,"(5,4)",3,56,3.326627
118,context_aware,test,"(5,4)",4,81,4.373396
