In [1]:
from utils.load_results import *
from utils.plot_helpers import *

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
plt.style.use('default')
import torch
from utils.analysis_from_interaction import *
from language_analysis_local import TopographicSimilarityConceptLevel, encode_target_concepts_for_topsim
import os
if not os.path.exists('analysis'):
    os.makedirs('analysis')
#import plotly.express as px

In [6]:
datasets = ['(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)']
n_values = [4, 8, 16, 4, 8, 4]
n_attributes = [3, 3, 3, 4, 4, 5]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/vague_ds_results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [7]:
context_unaware = False # whether original or context_unaware simulations are evaluated
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

### Determine vocab size

In [8]:
from collections import Counter

In [9]:
# go through all datasets
for i, d in enumerate(datasets):
    # select first run
    path_to_run = paths[i] + '/' + str(setting) +'/' + str(0) + '/'
    path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
    path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
    interaction = torch.load(path_to_interaction_train)
    print(path_to_interaction_train)
    
    messages = interaction.message.argmax(dim=-1)
    messages = [msg.tolist() for msg in messages]
    all_symbols = [symbol for message in messages for symbol in message]
    symbol_counts = Counter(all_symbols)
    print(symbol_counts)
    print(len(symbol_counts))

results/vague_ds_results/(3,4)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
Counter({0: 1763, 7: 1216, 8: 879, 1: 788, 6: 651, 4: 517, 15: 474, 11: 302, 10: 191, 9: 174, 3: 78, 2: 13, 13: 1, 5: 1})
14
results/vague_ds_results/(3,8)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
Counter({0: 11624, 5: 7914, 2: 4291, 11: 2809, 19: 2441, 25: 2090, 12: 1984, 20: 1835, 16: 1607, 8: 1167, 18: 1059, 10: 1029, 4: 1013, 9: 905, 1: 820, 15: 782, 24: 767, 13: 678, 14: 658, 27: 503, 23: 335, 6: 185})
22
results/vague_ds_results/(3,16)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
Counter({0: 83227, 2: 42917, 30: 15831, 25: 14389, 32: 12819, 43: 12635, 9: 11623, 10: 10468, 1: 9094, 50: 8752, 15: 8729, 3: 8714, 31: 8488, 42: 6607, 26: 6013, 14: 5581, 38: 3914, 23: 3818, 36: 3596, 22: 3580, 46: 3322, 7: 3133, 27: 3039, 16: 2664, 18: 2528, 49: 2406, 33: 2233, 5: 2159, 41: 2154, 8: 2086, 48: 2026, 40: 2018, 47: 201

### See which messages are produced for which concepts

In [17]:
datasets = ['(3,16)',]
n_values = [16,]
n_attributes = [3,]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/vague_ds_results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [18]:
context_unaware = False # whether original or context_unaware simulations are evaluated
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

In [19]:
# go through all datasets
for i, d in enumerate(datasets):
    # select first run
    path_to_run = paths[i] + '/' + str(setting) +'/' + str(0) + '/'
    path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
    path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
    interaction = torch.load(path_to_interaction_train)
    print(path_to_interaction_train)

    messages = interaction.message.argmax(dim=-1)
    messages = [msg.tolist() for msg in messages]
    sender_input = interaction.sender_input
    print(sender_input.shape)
    n_targets = int(sender_input.shape[1]/2)
    # get target objects and fixed vectors to re-construct concepts
    target_objects = sender_input[:, :n_targets]
    target_objects = k_hot_to_attributes(target_objects, n_values[i])
    # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
    (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
    concepts = list(zip(objects, fixed))

    # get distractor objects to re-construct context conditions
    distractor_objects = sender_input[:, n_targets:]
    distractor_objects = k_hot_to_attributes(distractor_objects, n_values[i])
    context_conds = retrieve_context_condition(objects, fixed, distractor_objects)

    # get random qualitative samples
    #fixed_index = random.randint(0, n_attributes[i]-1) # define a fixed index for the concept
    #n_fixed = random.randint(1, n_attributes[i]) # how many fixed attributes?
    n_fixed = 1
    #fixed_indices = random.sample(range(0, n_attributes[i]), k=n_fixed) # select which attributes are fixed
    fixed_indices = [0]
    #fixed_value = random.randint(0, n_values[i]-1) # define a fixed value for this index
    fixed_values = random.choices(range(0, n_values[i]), k=n_fixed)
    fixed_values = [10, 7, 3]
    print(n_fixed, fixed_indices, fixed_values)
    #index_threshold = 20000 # optional: define some index threshold to make sure that examples are not taken from the beginning of training
    # TODO: adapt this loop such that multiple indices can be fixed
    all_for_this_concept = []
    for idx, (t_objects, t_fixed) in enumerate(concepts):
        #if sum(t_fixed) == 1 and t_fixed[fixed_index] == 1:# and idx > index_threshold:
        if sum(t_fixed) == n_fixed and all(t_fixed[fixed_index] == 1 for fixed_index in fixed_indices):
            for t_object in t_objects:
                if all(t_object[fixed_index] == fixed_values[j] for j, fixed_index in enumerate(fixed_indices)):
                    all_for_this_concept.append((idx, t_object, t_fixed, context_conds[idx], messages[idx]))
                    fixed = t_fixed
    if len(all_for_this_concept) > 0:
        #sample = random.sample(all_for_this_concept, 20)
        sample = all_for_this_concept
        column_names = ['game_nr', 'object', 'fixed indices', 'context condition', 'message']
        df = pd.DataFrame(sample, columns=column_names)
        df.to_csv('analysis_float_encoded_ds/quali_' + str(d) + '_' + str(setting) + '_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv', index=False)
        print('saved ' + 'analysis_float_encoded_ds/quali_' + str(d) + '_' + str(setting) + '_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv')
    else:
        raise ValueError("sample for dataset " + str(d) + " could not be generated")

results/vague_ds_results/(3,16)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
torch.Size([83227, 20, 48])
1 [0] [10, 7, 3]
saved analysis_float_encoded_ds/quali_(3,16)_standard_[10.  8. 11.],[1. 0. 0.]all.csv


### Specific Concepts

In [16]:
#add this line if you want to view all rows: pd.set_option('display.max_rows', None) 
df = pd.read_csv('analysis_float_encoded_ds\quali_(3,4)_standard_[0. 2. 1.],[1. 1. 1.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,19,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
1,19,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
2,19,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
3,19,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
4,19,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
...,...,...,...,...,...
265,1637,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
266,1637,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
267,1637,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"
268,1637,[0. 2. 1.],[1. 1. 1.],1,"[7, 8, 7, 0]"


In [4]:
df = pd.read_csv('analysis_float_encoded_ds\quali_(3,4)_standard_[0. 2. 1.],[1. 1. 1.]all.csv')

# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 1
Unique Messages:
[7, 8, 7, 0]
[7, 8, 8, 0]

Context Condition: 2
Unique Messages:
[7, 8, 7, 0]

Context Condition: 0
Unique Messages:
[7, 8, 7, 0]



In [12]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(4,8)_standard_[7. 3. 1. 0.],[1. 1. 1. 1.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,1584,[7. 3. 1. 0.],[1. 1. 1. 1.],3,"[14, 3, 22, 14, 0]"
1,1584,[7. 3. 1. 0.],[1. 1. 1. 1.],3,"[14, 3, 22, 14, 0]"
2,1584,[7. 3. 1. 0.],[1. 1. 1. 1.],3,"[14, 3, 22, 14, 0]"
3,1584,[7. 3. 1. 0.],[1. 1. 1. 1.],3,"[14, 3, 22, 14, 0]"
4,1584,[7. 3. 1. 0.],[1. 1. 1. 1.],3,"[14, 3, 22, 14, 0]"
...,...,...,...,...,...
265,122937,[7. 3. 1. 0.],[1. 1. 1. 1.],1,"[14, 22, 1, 22, 0]"
266,122937,[7. 3. 1. 0.],[1. 1. 1. 1.],1,"[14, 22, 1, 22, 0]"
267,122937,[7. 3. 1. 0.],[1. 1. 1. 1.],1,"[14, 22, 1, 22, 0]"
268,122937,[7. 3. 1. 0.],[1. 1. 1. 1.],1,"[14, 22, 1, 22, 0]"


In [13]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(4,8)_standard_[7. 3. 1. 0.],[1. 1. 1. 1.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 3
Unique Messages:
[14, 3, 22, 14, 0]
[14, 3, 22, 3, 0]
[14, 3, 22, 18, 0]
[14, 3, 3, 3, 0]

Context Condition: 1
Unique Messages:
[14, 22, 22, 4, 0]
[14, 22, 1, 22, 0]
[14, 27, 22, 1, 0]

Context Condition: 2
Unique Messages:
[14, 22, 1, 22, 0]
[14, 3, 3, 18, 0]
[14, 27, 22, 14, 0]
[14, 3, 22, 14, 0]
[14, 27, 22, 3, 0]
[14, 22, 22, 1, 0]

Context Condition: 0
Unique Messages:
[14, 27, 22, 1, 0]



In [17]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(5,4)_standard_[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.]all.csv')
df



Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,1802,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],1,"[7, 7, 12, 11, 7, 0]"
1,1802,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],1,"[7, 7, 12, 11, 7, 0]"
2,1802,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],1,"[7, 7, 12, 11, 7, 0]"
3,1802,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],1,"[7, 7, 12, 11, 7, 0]"
4,1802,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],1,"[7, 7, 12, 11, 7, 0]"
...,...,...,...,...,...
355,70097,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],0,"[12, 12, 12, 11, 10, 0]"
356,70097,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],0,"[12, 12, 12, 11, 10, 0]"
357,70097,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],0,"[12, 12, 12, 11, 10, 0]"
358,70097,[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.],0,"[12, 12, 12, 11, 10, 0]"


In [18]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(5,4)_standard_[3. 2. 2. 0. 2.],[1. 1. 1. 1. 1.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 1
Unique Messages:
[7, 7, 12, 11, 7, 0]
[7, 11, 12, 11, 10, 0]
[7, 7, 12, 11, 10, 0]
[12, 12, 12, 11, 10, 0]
[12, 11, 12, 11, 10, 0]

Context Condition: 3
Unique Messages:
[7, 7, 2, 7, 10, 0]
[7, 7, 2, 11, 10, 0]
[7, 7, 12, 7, 10, 0]
[7, 11, 12, 7, 10, 0]
[7, 10, 2, 11, 10, 0]

Context Condition: 4
Unique Messages:
[7, 10, 2, 7, 10, 0]
[7, 10, 2, 11, 10, 0]
[7, 7, 2, 7, 10, 0]

Context Condition: 0
Unique Messages:
[12, 11, 12, 11, 10, 0]
[7, 11, 12, 11, 10, 0]
[12, 12, 12, 11, 10, 0]

Context Condition: 2
Unique Messages:
[7, 7, 2, 7, 10, 0]
[7, 7, 2, 11, 10, 0]
[12, 7, 12, 7, 10, 0]
[7, 11, 12, 11, 10, 0]



### Generic Concepts

In [5]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(3,4)_standard_[0. 2. 3.],[1. 0. 0.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,116,[0. 2. 3.],[1. 0. 0.],0,"[7, 15, 15, 0]"
1,116,[0. 3. 2.],[1. 0. 0.],0,"[7, 15, 15, 0]"
2,116,[0. 2. 2.],[1. 0. 0.],0,"[7, 15, 15, 0]"
3,116,[0. 1. 0.],[1. 0. 0.],0,"[7, 15, 15, 0]"
4,116,[0. 0. 2.],[1. 0. 0.],0,"[7, 15, 15, 0]"
...,...,...,...,...,...
65,1691,[0. 3. 3.],[1. 0. 0.],0,"[7, 15, 15, 0]"
66,1691,[0. 1. 1.],[1. 0. 0.],0,"[7, 15, 15, 0]"
67,1691,[0. 2. 2.],[1. 0. 0.],0,"[7, 15, 15, 0]"
68,1691,[0. 3. 2.],[1. 0. 0.],0,"[7, 15, 15, 0]"


In [6]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(3,4)_standard_[0. 2. 3.],[1. 0. 0.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 0
Unique Messages:
[7, 15, 15, 0]



In [10]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(4,8)_standard_[7. 2. 0. 0.],[1. 0. 0. 0.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,36064,[7. 2. 0. 0.],[1. 0. 0. 0.],0,"[14, 21, 10, 12, 0]"
1,36064,[7. 7. 7. 3.],[1. 0. 0. 0.],0,"[14, 21, 10, 12, 0]"
2,36064,[7. 0. 3. 1.],[1. 0. 0. 0.],0,"[14, 21, 10, 12, 0]"
3,36064,[7. 7. 7. 6.],[1. 0. 0. 0.],0,"[14, 21, 10, 12, 0]"
4,36064,[7. 3. 0. 2.],[1. 0. 0. 0.],0,"[14, 21, 10, 12, 0]"
...,...,...,...,...,...
95,114604,[7. 6. 2. 4.],[1. 0. 0. 0.],0,"[14, 21, 21, 12, 0]"
96,114604,[7. 6. 5. 3.],[1. 0. 0. 0.],0,"[14, 21, 21, 12, 0]"
97,114604,[7. 5. 0. 4.],[1. 0. 0. 0.],0,"[14, 21, 21, 12, 0]"
98,114604,[7. 0. 5. 6.],[1. 0. 0. 0.],0,"[14, 21, 21, 12, 0]"


In [11]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(4,8)_standard_[7. 2. 0. 0.],[1. 0. 0. 0.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 0
Unique Messages:
[14, 21, 10, 12, 0]
[14, 21, 4, 12, 0]
[14, 21, 27, 12, 0]
[14, 4, 10, 12, 0]
[18, 21, 22, 4, 0]
[14, 21, 21, 12, 0]
[14, 21, 10, 2, 0]
[18, 21, 10, 12, 0]



In [15]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(5,4)_standard_[3. 1. 3. 0. 3.],[1. 0. 0. 0. 0.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,2785,[3. 1. 3. 0. 3.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
1,2785,[3. 2. 0. 0. 1.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
2,2785,[3. 3. 1. 3. 1.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
3,2785,[3. 1. 1. 2. 2.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
4,2785,[3. 0. 0. 0. 0.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
...,...,...,...,...,...
75,70742,[3. 1. 1. 1. 0.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
76,70742,[3. 2. 3. 2. 2.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
77,70742,[3. 3. 1. 1. 1.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"
78,70742,[3. 3. 0. 2. 2.],[1. 0. 0. 0. 0.],0,"[9, 12, 12, 11, 11, 0]"


In [16]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(5,4)_standard_[3. 1. 3. 0. 3.],[1. 0. 0. 0. 0.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 0
Unique Messages:
[9, 12, 12, 11, 11, 0]
[9, 12, 2, 11, 11, 0]



In [20]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(3,16)_standard_[10.  8. 11.],[1. 0. 0.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,1856,[10. 8. 11.],[1. 0. 0.],0,"[15, 23, 18, 0]"
1,1856,[10. 1. 15.],[1. 0. 0.],0,"[15, 23, 18, 0]"
2,1856,[10. 10. 1.],[1. 0. 0.],0,"[15, 23, 18, 0]"
3,1856,[10. 3. 6.],[1. 0. 0.],0,"[15, 23, 18, 0]"
4,1856,[10. 2. 5.],[1. 0. 0.],0,"[15, 23, 18, 0]"
...,...,...,...,...,...
95,79581,[10. 12. 14.],[1. 0. 0.],0,"[15, 38, 50, 0]"
96,79581,[10. 7. 15.],[1. 0. 0.],0,"[15, 38, 50, 0]"
97,79581,[10. 0. 1.],[1. 0. 0.],0,"[15, 38, 50, 0]"
98,79581,[10. 9. 1.],[1. 0. 0.],0,"[15, 38, 50, 0]"


In [21]:
df = pd.read_csv('analysis_float_encoded_ds/quali_(3,16)_standard_[10.  8. 11.],[1. 0. 0.]all.csv')
# Get the context conditions
context_conditions = df['context condition'].unique()

# Iterate over each context condition
for context in context_conditions:
    print(f"Context Condition: {context}")
    print("Unique Messages:")
    
    # Filter the DataFrame for the current context condition
    context_df = df[df['context condition'] == context]
    
    # Get the unique messages for the current context condition
    unique_messages = context_df['message'].unique()
    
    # Print each unique message
    for message in unique_messages:
        print(message)
    
    print()

Context Condition: 0
Unique Messages:
[15, 23, 18, 0]
[15, 23, 50, 0]
[15, 38, 50, 0]
[20, 23, 50, 0]

