In [1]:
import pandas as pd
from grid_search_utils import get_grid_search_results
import itertools

In [2]:
# give directory name
directory = "02-04-2024"
get_grid_search_results(directory, n_lines=128, header_in_line=1)

    attributes  values  context_unaware  game_size  vocab_size_factor  \
0          3.0     4.0              0.0       10.0                3.0   
1          3.0     4.0              1.0       10.0                3.0   
2          3.0     4.0              1.0       10.0                3.0   
3          3.0     4.0              1.0       10.0                3.0   
4          3.0     4.0              1.0       10.0                3.0   
..         ...     ...              ...        ...                ...   
85         3.0     4.0              1.0       10.0                3.0   
86         3.0    16.0              0.0       10.0                3.0   
87         3.0    16.0              0.0       10.0                3.0   
88         3.0     4.0              1.0       10.0                3.0   
89         3.0     4.0              1.0       10.0                3.0   

    n_epochs  batch_size  learning_rate  hidden_size  temp_update  \
0       60.0        32.0          0.001        128.0  

In [3]:
data = pd.read_csv("results_" + directory + ".csv")

## New grid search results

In [4]:
# parameters that were tested
for col in data.columns[:-6]:
    print(col, data[col].unique())    

attributes [3 5]
values [ 4 16]
context_unaware [0. 1.]
game_size [10]
vocab_size_factor [3.]
n_epochs [60.]
batch_size [32]
learning_rate [0.001]
hidden_size [128]
temp_update [0.99]
temperature [2.]
length_cost [0.0005 0.005  0.001 ]
max_mess_len [30. 20.  3. 10.  5.]


In [5]:
data = data[((data['attributes'] == 3) & (data['values'] == 4))
            | ((data['attributes'] == 3) & (data['values'] == 16))
            | ((data['attributes'] == 5) & (data['values'] == 4))]

In [6]:
attributes = [3, 5]
values = [4, 16]
datasets = list(itertools.product(attributes, values))[:3]
index = data.index
for (attributes, values) in datasets:
    for context_unaware in range(2):
        print("dataset:", attributes, values, "context_unaware:", context_unaware)
        per_dataset = data[(data["attributes"] == attributes) & (data["values"] == values) & (data['context_unaware'] == context_unaware)]
        max_test_acc = per_dataset['test_accuracy'].idxmax()
        print(per_dataset.loc[index[max_test_acc]])
        min_train_loss = per_dataset['train_loss'].idxmin()
        print(per_dataset.loc[index[min_train_loss]])

dataset: 3 4 context_unaware: 0
attributes             3.000000
values                 4.000000
context_unaware        0.000000
game_size             10.000000
vocab_size_factor      3.000000
n_epochs              60.000000
batch_size            32.000000
learning_rate          0.001000
hidden_size          128.000000
temp_update            0.990000
temperature            2.000000
length_cost            0.000500
max_mess_len          10.000000
train_loss             0.011059
train_accuracy         0.999994
train_length          10.998214
test_loss              0.023236
test_accuracy          0.996354
test_length           11.000000
Name: 6, dtype: float64
attributes             3.000000
values                 4.000000
context_unaware        0.000000
game_size             10.000000
vocab_size_factor      3.000000
n_epochs              60.000000
batch_size            32.000000
learning_rate          0.001000
hidden_size          128.000000
temp_update            0.990000
temperature     

### Length cost parameter & maximum message length
Best minimum losses are usually found with low cost parameters (where the cost doesn't influence the training sufficiently). So, we choose costs where it is evident from the message length observed in the last epoch (60) that the cost actually had an influence.
This is the case for 
- 0.005 for D(3,4)
- 0.001 for D(3,16)
- 0.005 for D(5,4)

In [7]:
lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'length_cost', 'max_mess_len'])['test_accuracy', 'test_loss', 'test_length'].describe()
lc_acc = lc_sumstat.loc[:,[('test_accuracy', 'mean')]]
lc_loss = lc_sumstat.loc[:,[('test_loss', 'mean')]]
lc_length = lc_sumstat.loc[:,[('test_length', 'mean')]]
lc_merged = lc_acc.merge(lc_loss, left_index=True, right_index=True)
lc_merged = lc_merged.merge(lc_length, left_index=True, right_index=True)
lc_merged

  lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'length_cost', 'max_mess_len'])['test_accuracy', 'test_loss', 'test_length'].describe()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_accuracy,test_loss,test_length
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,mean,mean
attributes,values,context_unaware,length_cost,max_mess_len,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
3,4,0.0,0.0005,3.0,0.989227,0.041587,4.000000
3,4,0.0,0.0005,5.0,0.994358,0.028957,6.000000
3,4,0.0,0.0005,10.0,0.996354,0.023236,11.000000
3,4,0.0,0.0005,20.0,0.986211,0.051317,8.805555
3,4,0.0,0.0005,30.0,0.986192,0.049808,11.590278
...,...,...,...,...,...,...,...
5,4,1.0,0.0050,3.0,0.896727,0.325546,3.711757
5,4,1.0,0.0050,5.0,0.941478,0.204935,3.679388
5,4,1.0,0.0050,10.0,0.995088,0.054159,5.826122
5,4,1.0,0.0050,20.0,0.968116,0.142014,4.623525


### Length cost

In [8]:
lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'length_cost'])['test_accuracy', 'test_loss', 'test_length'].describe()
lc_acc = lc_sumstat.loc[:,[('test_accuracy', 'mean'), ('test_accuracy', 'std')]]
lc_loss = lc_sumstat.loc[:,[('test_loss', 'mean'), ('test_loss', 'std')]]
lc_length = lc_sumstat.loc[:,[('test_length', 'mean'), ('test_length', 'std')]]
lc_merged = lc_acc.merge(lc_loss, left_index=True, right_index=True)
lc_merged = lc_merged.merge(lc_length, left_index=True, right_index=True)
lc_merged

  lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'length_cost'])['test_accuracy', 'test_loss', 'test_length'].describe()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,test_accuracy,test_accuracy,test_loss,test_loss,test_length,test_length
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std
attributes,values,context_unaware,length_cost,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
3,4,0.0,0.0005,0.990468,0.004683,0.038981,0.012496,8.279167,3.246126
3,4,0.0,0.001,0.983805,0.002454,0.061285,0.008787,6.448611,1.877062
3,4,0.0,0.005,0.980172,0.006586,0.08677,0.016116,4.104514,0.279776
3,4,1.0,0.0005,0.988861,0.004997,0.048247,0.018218,7.825,2.813684
3,4,1.0,0.001,0.991952,0.00492,0.042618,0.022503,6.605903,1.633601
3,4,1.0,0.005,0.979001,0.007181,0.089251,0.019386,4.057986,0.107236
3,16,0.0,0.0005,0.971928,0.038506,0.096388,0.108649,9.744303,6.964102
3,16,0.0,0.001,0.984078,0.007849,0.073424,0.027594,4.987907,0.877896
3,16,0.0,0.005,0.79934,0.235369,0.346195,0.308162,2.341946,1.252004
3,16,1.0,0.0005,0.98359,0.0176,0.063665,0.048786,9.639876,6.903151


### Maximum message length
No large differences between max_mess_len = 10, 20 or 30 for all datasets. Go with lowest or highest, best or best for all?
Max_mess_len = 20 seems to work well for all.

In [9]:
lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'max_mess_len'])['test_accuracy', 'test_loss', 'test_length'].describe()
lc_acc = lc_sumstat.loc[:,[('test_accuracy', 'mean'), ('test_accuracy', 'std')]]
lc_loss = lc_sumstat.loc[:,[('test_loss', 'mean'), ('test_loss', 'std')]]
lc_length = lc_sumstat.loc[:,[('test_length', 'mean'), ('test_length', 'std')]]
lc_merged = lc_acc.merge(lc_loss, left_index=True, right_index=True)
lc_merged = lc_merged.merge(lc_length, left_index=True, right_index=True)
lc_merged

  lc_sumstat = data.groupby(['attributes', 'values', 'context_unaware', 'max_mess_len'])['test_accuracy', 'test_loss', 'test_length'].describe()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,test_accuracy,test_accuracy,test_loss,test_loss,test_length,test_length
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std
attributes,values,context_unaware,max_mess_len,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
3,4,0.0,3.0,0.980592,0.010208,0.071679,0.030649,4.0,0.0
3,4,0.0,5.0,0.988068,0.005886,0.05663,0.030366,5.473958,0.911131
3,4,0.0,10.0,0.985352,0.009542,0.064605,0.039317,7.962963,3.435419
3,4,0.0,20.0,0.984591,0.001654,0.058824,0.011689,6.20081,2.3662
3,4,0.0,30.0,0.985472,0.001616,0.05999,0.009321,7.749421,3.958846
3,4,1.0,3.0,0.984838,0.00254,0.07066,0.008574,4.0,0.0
3,4,1.0,5.0,0.984042,0.010427,0.071246,0.035114,5.373843,1.084537
3,4,1.0,10.0,0.983671,0.010884,0.064441,0.036643,7.685764,3.385513
3,4,1.0,20.0,0.990169,0.009919,0.045113,0.038741,6.702546,2.338359
3,4,1.0,30.0,0.990302,0.005587,0.048733,0.024604,7.052662,2.916794
