In [1]:
import os
import torch
import numpy as np
from collections import defaultdict
from utils.load_results import *
from utils.analysis_from_interaction import *

In [2]:
datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
n_attributes = (3, 3, 3, 4, 4, 5)
n_values = (4, 8, 16, 4, 8, 4)
n_epochs = 300
paths = [f'results/{d}_game_size_10_vsf_3/' for d in datasets]

In [3]:
context_unaware = False # whether original or context_unaware simulations are evaluated
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

# Determine Unique Message size 

In [4]:
from collections import Counter

##### Total message size if symbol order matters:

In [5]:
num_runs = 5 # number of runs per ds
for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    unique_messages_all_runs = []

    for run in range(num_runs):
        path_to_run = paths[i] + '/' + str(setting) + '/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)

        messages = interaction.message.argmax(dim=-1)
        messages = [tuple(msg.tolist()) for msg in messages]  # Convert messages to tuples for hashing
        total_messages = set(messages)  # Set removes duplicated concepts. Note: tuples are returned unordered  
        number_of_unique_messages = len(total_messages)
        unique_messages_all_runs.append(number_of_unique_messages)

    # Calculate summary statistics for unique messages
    mean_unique_messages = np.mean(unique_messages_all_runs)

    print(f'Number of unique messages per run: {unique_messages_all_runs}')
    print(f'Mean unique messages: {mean_unique_messages:.2f}')
    print()

Dataset: (3,4)
Number of unique messages per run: [175, 187, 191, 170, 157]
Mean unique messages: 176.00

Dataset: (3,8)
Number of unique messages per run: [803, 900, 1263, 756, 786]
Mean unique messages: 901.60

Dataset: (3,16)
Number of unique messages per run: [4032, 2860, 3685, 2654, 4130]
Mean unique messages: 3472.20

Dataset: (4,4)
Number of unique messages per run: [1312, 1487, 1793, 1462, 1744]
Mean unique messages: 1559.60

Dataset: (4,8)
Number of unique messages per run: [12925, 6479, 10282, 8675, 10237]
Mean unique messages: 9719.60

Dataset: (5,4)
Number of unique messages per run: [6325, 10662, 5120, 10036, 10715]
Mean unique messages: 8571.60



##### Unique message size if you consider messages with the same symbols but in different orders as the same message:

In [6]:
unique_sorted_messages_all_datasets = []

for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    unique_sorted_messages_all_runs = []

    for run in range(num_runs):
        path_to_run = paths[i] + '/' + str(setting) + '/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)

        messages = interaction.message.argmax(dim=-1)
         # Convert messages to sorted tuples. Sorted orders ints in ascending order.
        sorted_messages = [tuple(sorted(msg.tolist())) for msg in messages] 
        # Set removed duplicates. 
        unique_sorted_messages = set(sorted_messages)
        number_of_unique_sorted_messages = len(unique_sorted_messages) 
        unique_sorted_messages_all_runs.append(number_of_unique_sorted_messages)
    
    unique_sorted_messages_all_datasets.append(unique_sorted_messages_all_runs)
    mean_unique_sorted_messages = np.mean(unique_sorted_messages_all_runs)
    print(f'Number of unique messages per run: {unique_sorted_messages_all_runs}')
    print(f'Mean unique sorted messages: {mean_unique_sorted_messages:.2f}')
    print() 

Dataset: (3,4)
Number of unique messages per run: [131, 128, 144, 119, 126]
Mean unique sorted messages: 129.60

Dataset: (3,8)
Number of unique messages per run: [649, 720, 853, 571, 616]
Mean unique sorted messages: 681.80

Dataset: (3,16)
Number of unique messages per run: [3214, 2353, 3131, 2240, 3109]
Mean unique sorted messages: 2809.40

Dataset: (4,4)
Number of unique messages per run: [908, 903, 1121, 991, 1027]
Mean unique sorted messages: 990.00

Dataset: (4,8)
Number of unique messages per run: [7476, 4191, 4746, 5573, 7447]
Mean unique sorted messages: 5886.60

Dataset: (5,4)
Number of unique messages per run: [3556, 3110, 2340, 3612, 3938]
Mean unique sorted messages: 3311.20



##### Calculates unique message size if you consider messages with the same symbols but in different orders as the same message. Then draws comparison between unique sorted messages with the number of unique concepts:

In [7]:
unique_sorted_messages_all_datasets = []
unique_concepts_all_datasets = []

for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    unique_sorted_messages_all_runs = []
    unique_concepts_all_runs = []
    
    for run in range(num_runs):
        path_to_run = paths[i] + '/' + str(setting) + '/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)
        
        # Calculate unique sorted messages
        messages = interaction.message.argmax(dim=-1)
        sorted_messages = [tuple(sorted(msg.tolist())) for msg in messages]
        unique_sorted_messages = set(sorted_messages)
        number_of_unique_sorted_messages = len(unique_sorted_messages)
        unique_sorted_messages_all_runs.append(number_of_unique_sorted_messages)
        
        # Calculate unique concepts
        sender_input = interaction.sender_input
        n_targets = int(sender_input.shape[1]/2)
        target_objects = sender_input[:, :n_targets]
        target_objects = k_hot_to_attributes(target_objects, n_values[i])
        (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
        concepts = list(zip(objects, fixed))
        concepts_strings = [(str(obj), str(fixed_vec)) for obj, fixed_vec in concepts]
        unique_concepts = set(concepts_strings)
        number_of_unique_concepts = len(unique_concepts)
        unique_concepts_all_runs.append(number_of_unique_concepts)
        
    unique_sorted_messages_all_datasets.append(unique_sorted_messages_all_runs)
    unique_concepts_all_datasets.append(unique_concepts_all_runs)
    
    print(f'Number of unique messages per run: {unique_sorted_messages_all_runs}')
    print(f'Number of unique concepts per run: {unique_concepts_all_runs}')
    print()

# Calculate the ratio for each run in each dataset
for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    ratios = []
    
    for run in range(num_runs):
        num_unique_messages = unique_sorted_messages_all_datasets[i][run]
        num_unique_concepts = unique_concepts_all_datasets[i][run]
        ratio = num_unique_messages / num_unique_concepts
        ratios.append(ratio)
        
    print(f'Ratios of unique messages to unique concepts per run: {ratios}')
    mean_ratio = np.mean(ratios)
    print(f'Mean ratio: {mean_ratio:.2f}')
    print()

Dataset: (3,4)
Number of unique messages per run: [131, 128, 144, 119, 126]
Number of unique concepts per run: [660, 655, 709, 743, 693]

Dataset: (3,8)
Number of unique messages per run: [649, 720, 853, 571, 616]
Number of unique concepts per run: [2773, 2911, 2828, 2738, 2822]

Dataset: (3,16)
Number of unique messages per run: [3214, 2353, 3131, 2240, 3109]
Number of unique concepts per run: [12561, 12561, 12561, 12561, 12561]

Dataset: (4,4)
Number of unique messages per run: [908, 903, 1121, 991, 1027]
Number of unique concepts per run: [5955, 6117, 6006, 6332, 6055]

Dataset: (4,8)
Number of unique messages per run: [7476, 4191, 4746, 5573, 7447]
Number of unique concepts per run: [44194, 44194, 44194, 44194, 44194]

Dataset: (5,4)
Number of unique messages per run: [3556, 3110, 2340, 3612, 3938]
Number of unique concepts per run: [45705, 45193, 44560, 45304, 45458]

Dataset: (3,4)
Ratios of unique messages to unique concepts per run: [0.1984848484848485, 0.19541984732824427, 0.2