In [5]:
from utils.load_results import *
from utils.plot_helpers import *

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
plt.style.use('default')
import torch
from utils.analysis_from_interaction import *
from language_analysis_local import TopographicSimilarityConceptLevel, encode_target_concepts_for_topsim
import os
if not os.path.exists('analysis'):
    os.makedirs('analysis')
#import plotly.express as px
from collections import Counter

### Utilities

In [46]:
def objects_to_concepts(sender_input):
    """reconstruct concepts from objects in interaction"""
    n_targets = int(sender_input.shape[1]/2)
    # get target objects and fixed vectors to re-construct concepts
    target_objects = sender_input[:, :n_targets]
    target_objects = k_hot_to_attributes(target_objects, n_values[i])
    # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
    (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
    concepts = list(zip(objects, fixed))
    return concepts

In [48]:
def retrieve_messages(interaction):
    """retrieve messages from interaction"""
    messages = interaction.message.argmax(dim=-1)
    messages = [msg.tolist() for msg in messages]
    return messages

In [49]:
def count_symbols(messages):
    """counts symbols in messages"""
    all_symbols = [symbol for message in messages for symbol in message]
    symbol_counts = Counter(all_symbols)
    return symbol_counts

In [54]:
def get_unique_message_set(messages):
    """returns unique messages as a set ready for set operations"""
    return set(tuple(message) for message in messages)

In [98]:
def get_unique_concept_set(concepts):
    """returns unique concepts"""
    concept_tuples = []
    for objects, fixed in concepts:
        tuple_objects = []
        for object in objects:
            tuple_objects.append(tuple(object))
        tuple_objects = tuple(tuple_objects)
        tuple_concept = (tuple_objects, tuple(fixed))
        concept_tuples.append(tuple_concept)
    tuple(concept_tuples)
    unique_concepts = set(concept_tuples)
    return unique_concepts

### Configurations

In [110]:
datasets = ['(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)']
n_values = [4, 8, 16, 4, 8, 4]
n_attributes = [3, 3, 3, 4, 4, 5]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [138]:
context_unaware = False # whether original or context_unaware simulations are evaluated
zero_shot = True # whether zero-shot simulations are evaluated
zero_shot_test = 'generic' # 'generic' or 'specific'
test_interactions = True # whether scores should be calculated on test interactions (only with zero shot)
setting = ""
if context_unaware:
    setting = setting + 'context_unaware'
else:
    setting = setting + 'standard'
if zero_shot:
    setting = setting + '/zero_shot/' + zero_shot_test

### Determine vocab size and message reuse

In [139]:
# go through all datasets
for i, d in enumerate(datasets):
    print(d)
    for run in range(5):
        path_to_run = paths[i] + '/' + str(setting) +'/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
        path_to_interaction_test = (path_to_run + 'interactions/test/epoch_0/interaction_gpu0')
        interaction_train = torch.load(path_to_interaction_train)
        interaction_val = torch.load(path_to_interaction_val)
        interaction_test = torch.load(path_to_interaction_test)
        
        concepts_train = objects_to_concepts(interaction_train.sender_input)
        concepts_val = objects_to_concepts(interaction_val.sender_input)
        concepts_test = objects_to_concepts(interaction_test.sender_input)
        
        messages_train = retrieve_messages(interaction_train)
        messages_val = retrieve_messages(interaction_val)
        messages_test = retrieve_messages(interaction_test)
    
        symbol_counts_train = count_symbols(messages_train)
        symbol_counts_val = count_symbols(messages_val)
        symbol_counts_test = count_symbols(messages_test)
        symbol_counts = [symbol_counts_train, symbol_counts_val, symbol_counts_test]
        pickle.dump(symbol_counts, open(path_to_run + 'symbol_counts.pkl', 'wb'))
        
        # consider train and validation messages together
        messages_train_val = messages_train +  messages_val
        # consider only unique messages
        messages_train_val_unique = get_unique_message_set(messages_train_val)
        messages_test_unique = get_unique_message_set(messages_test)
        # total messages
        messages_total = messages_train_val +  messages_test
        messages_total_unique = get_unique_message_set(messages_total)
        
        # concepts
        concepts_train_unique = get_unique_concept_set(concepts_train)
        concepts_val_unique = get_unique_concept_set(concepts_val)
        concepts_test_unique = get_unique_concept_set(concepts_test)
        concepts_total = concepts_train + concepts_val + concepts_test
        concepts_total_unique = get_unique_concept_set(concepts_total)
        num_of_concepts = [len(concepts_train_unique), len(concepts_val_unique), len(concepts_test_unique), len(concepts_total_unique)]
        pickle.dump(num_of_concepts, open(path_to_run + 'num_of_concepts.pkl', 'wb'))
        
        # messages reused in testing:
        intersection = messages_train_val_unique & messages_test_unique
        
        # messages only used in training:
        difference_train = messages_train_val_unique - messages_test_unique
        
        # messages only used in testing:
        difference_test = messages_test_unique - messages_train_val_unique
        print(len(difference_test), "messages used for the", len(concepts_test_unique), "novel concepts")
        
        message_reuse = [len(intersection), len(difference_train), len(difference_test), len(concepts_test_unique), (len(difference_test)/len(concepts_test_unique))]
        pickle.dump(message_reuse, open(path_to_run + 'message_reuse.pkl', 'wb'))

(3,4)
14 messages used for the 120 novel concepts
14 messages used for the 120 novel concepts
25 messages used for the 120 novel concepts
5 messages used for the 120 novel concepts
11 messages used for the 120 novel concepts
(3,8)
25 messages used for the 240 novel concepts
28 messages used for the 240 novel concepts
32 messages used for the 240 novel concepts
19 messages used for the 240 novel concepts
49 messages used for the 240 novel concepts
(3,16)
82 messages used for the 480 novel concepts
63 messages used for the 480 novel concepts
56 messages used for the 480 novel concepts
43 messages used for the 480 novel concepts
60 messages used for the 480 novel concepts
(4,4)
7 messages used for the 160 novel concepts
33 messages used for the 160 novel concepts
21 messages used for the 160 novel concepts
15 messages used for the 160 novel concepts
21 messages used for the 160 novel concepts
(4,8)
5 messages used for the 320 novel concepts
22 messages used for the 320 novel concepts
10 m

In [140]:
message_reuse_dict = {'intersection': [], 'difference train': [], 'difference test': [], 'concepts test unique': [], 'test ratio': []}
for i, d in enumerate(datasets):
    intersection, train_difference, test_difference, test_concepts, test_ratio = [], [], [], [], []
    for run in range(5):
        path_to_run = paths[i] + '/' + str(setting) +'/' + str(run) + '/'
        message_reuse = pickle.load(open(path_to_run + 'message_reuse.pkl', 'rb'))
        intersection.append(message_reuse[0])
        train_difference.append(message_reuse[1])
        test_difference.append(message_reuse[2])
        test_concepts.append(message_reuse[3])
        test_ratio.append(message_reuse[4])
    message_reuse_dict['intersection'].append(intersection)
    message_reuse_dict['difference train'].append(train_difference)
    message_reuse_dict['difference test'].append(test_difference)
    message_reuse_dict['concepts test unique'].append(test_concepts)
    message_reuse_dict['test ratio'].append(test_ratio)

In [142]:
message_reuse = [message_reuse_dict['difference test'], message_reuse_dict['concepts test unique'], message_reuse_dict['test ratio']]

# Convert the list to a NumPy array
mess_reuse_array = np.array(message_reuse)

# Compute means and standard deviations over the five runs
means = np.mean(mess_reuse_array, axis=-1)
std_devs = np.std(mess_reuse_array, axis=-1)

# Row names and column names
row_names = ["D(3,4)", "D(3,8)", "D(3,16)", "D(4,4)", "D(4,8)", "D(5,4)"]
col_names = ["novel messages", "novel concepts", "ratio"]

# Prepare the data for the DataFrames
data = []

# iterate over datasets
for i in range(means.shape[1]):
    row = []
    # iterate over conditions
    for j in range(means.shape[0]):
        if j == 0:
            formatted_value = f"{means[j, i]:.1f} $\\pm$ {std_devs[j, i]:.2f}"
        elif j == 1:
            formatted_value = f"{int(means[j, i])}"
        else:
            formatted_value = f"{means[j, i]:.2f} $\\pm$ {std_devs[j, i]:.2f}"
        row.append(formatted_value)
    data.append(row)

# Create DataFrames
df = pd.DataFrame(data, index=row_names, columns=col_names)

# Convert DataFrames to LaTeX tables
latex_table = df.to_latex(index=True, escape=False)

print(latex_table)

\begin{tabular}{llll}
\toprule
{} &    novel messages & novel\n concepts &            ratio \\
\midrule
D(3,4)  &   13.8 $\pm$ 6.49 &              120 &  0.11 $\pm$ 0.05 \\
D(3,8)  &  30.6 $\pm$ 10.13 &              240 &  0.13 $\pm$ 0.04 \\
D(3,16) &  60.8 $\pm$ 12.61 &              480 &  0.13 $\pm$ 0.03 \\
D(4,4)  &   19.4 $\pm$ 8.52 &              160 &  0.12 $\pm$ 0.05 \\
D(4,8)  &   12.4 $\pm$ 9.65 &              320 &  0.04 $\pm$ 0.03 \\
D(5,4)  &    2.6 $\pm$ 3.32 &              200 &  0.01 $\pm$ 0.02 \\
\bottomrule
\end{tabular}


  latex_table = df.to_latex(index=True, escape=False)
