In [3]:
from utils.load_results import *
from utils.plot_helpers import *

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
plt.style.use('default')
import torch
from utils.analysis_from_interaction import *
from language_analysis_local import TopographicSimilarityConceptLevel, encode_target_concepts_for_topsim
import os
if not os.path.exists('analysis'):
    os.makedirs('analysis')
#import plotly.express as px

In [20]:
datasets = ['(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)']
n_values = [4, 8, 16, 4, 8, 4]
n_attributes = [3, 3, 3, 4, 4, 5]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [2]:
datasets = ['(3,4)']
n_values = [4,]
n_attributes = [3,]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [6]:
context_unaware = False # whether original or context_unaware simulations are evaluated

setting = 'standard'
granularity_list = ['coarse', 'mixed', 'fine']
non_default_gran_list = ['coarse', 'fine']

### Determine vocab size

In [7]:
import sys
import os
from collections import Counter

# Check if the output file exists
if not os.path.exists('vocab_sizes.txt'):
    mode = 'w'  # If the file doesn't exist, open in write mode to create it
else:
    mode = 'a'  # If the file exists, open in append mode to add to it

# Open the file in the appropriate mode
with open('vocab_sizes.txt', mode) as f:
    # Redirect stdout to the file
    sys.stdout = f
    
    # go through all datasets
    for i, d in enumerate(datasets):
        print('dataset: '+d+'')
        for g in granularity_list:
            # select first run
            if g != 'mixed':
                print(g+ ' context condition')
                path_to_run = paths[i] + '/' + str(setting) +'/' + 'granularity_'+g+ '/'+ str(0) + '/'
                path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
                path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
                interaction = torch.load(path_to_interaction_train)
                print(path_to_interaction_train)
                
                messages = interaction.message.argmax(dim=-1)
                messages = [msg.tolist() for msg in messages]
                all_symbols = [symbol for message in messages for symbol in message]
                symbol_counts = Counter(all_symbols)
                print(symbol_counts)
                print(len(symbol_counts), '\n')

# Restore the original stdout
sys.stdout = sys.__stdout__


### See which messages are produced for which concepts

In [22]:
datasets = ['(3,4)',]
n_values = [4]
n_attributes = [3]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [29]:
# go through all datasets
for i, d in enumerate(datasets):
    for g in non_default_gran_list:
        # select first run
        path_to_run = paths[i] + '/' + str(setting) +'/granularity_' + g +'/' + str(0) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)
        print(path_to_interaction_train)

        messages = interaction.message.argmax(dim=-1)
        messages = [msg.tolist() for msg in messages]
        sender_input = interaction.sender_input
        print(sender_input.shape)
        n_targets = int(sender_input.shape[1]/2)
        # get target objects and fixed vectors to re-construct concepts
        target_objects = sender_input[:, :n_targets]
        target_objects = k_hot_to_attributes(target_objects, n_values[i])
        # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
        (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
        concepts = list(zip(objects, fixed))

        # get distractor objects to re-construct context conditions
        distractor_objects = sender_input[:, n_targets:]
        distractor_objects = k_hot_to_attributes(distractor_objects, n_values[i])
        context_conds = retrieve_context_condition(objects, fixed, distractor_objects)
        print(context_conds)

        # get random qualitative samples
        #fixed_index = random.randint(0, n_attributes[i]-1) # define a fixed index for the concept
        n_fixed = random.randint(1, n_attributes[i]) # how many fixed attributes?
        #n_fixed = 3
        fixed_indices = random.sample(range(0, n_attributes[i]), k=n_fixed) # select which attributes are fixed
        #fixed_indices = [0, 2, 1]
        #fixed_value = random.randint(0, n_values[i]-1) # define a fixed value for this index
        fixed_values = random.choices(range(0, n_values[i]), k=n_fixed)
        #fixed_values = [0, 1, 2]
        print(n_fixed, fixed_indices, fixed_values)
        #index_threshold = 20000 # optional: define some index threshold to make sure that examples are not taken from the beginning of training
        # TODO: adapt this loop such that multiple indices can be fixed
        all_for_this_concept = []
        for idx, (t_objects, t_fixed) in enumerate(concepts):
            #if sum(t_fixed) == 1 and t_fixed[fixed_index] == 1:# and idx > index_threshold:
            if sum(t_fixed) == n_fixed and all(t_fixed[fixed_index] == 1 for fixed_index in fixed_indices):
                for t_object in t_objects:
                    if all(t_object[fixed_index] == fixed_values[j] for j, fixed_index in enumerate(fixed_indices)):
                        all_for_this_concept.append((idx, t_object, t_fixed, context_conds[idx], messages[idx]))
                        fixed = t_fixed
        #print(all_for_this_concept)                
        if len(all_for_this_concept) > 0:
            #sample = random.sample(all_for_this_concept, 20)
            sample = all_for_this_concept
            column_names = ['game_nr', 'object', 'fixed indices', 'context condition', 'message']
            df = pd.DataFrame(sample, columns=column_names)
            df.to_csv('analysis/quali_' + str(d) + '_' + str(setting) + '_'+g +'_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv', index=False)
            print('saved ' + 'analysis/quali_' + str(d) + '_' + str(setting) +'_' + g + '_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv')
        else:
            raise ValueError("sample for dataset " + str(d) + g + " could not be generated")

results/(3,4)_game_size_10_vsf_3/standard/granularity_coarse/0/interactions/train/epoch_300/interaction_gpu0
torch.Size([742, 20, 12])
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [4]:
import pandas as pd
df = pd.read_csv('analysis/quali_(3,4)_standard_[0. 3. 2.],[1. 1. 1.].csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,1384,[0. 3. 2.],[1. 1. 1.],2,"[1, 1, 1, 0]"
1,90,[0. 3. 2.],[1. 1. 1.],1,"[1, 1, 1, 0]"
2,1384,[0. 3. 2.],[1. 1. 1.],2,"[1, 1, 1, 0]"
3,1183,[0. 3. 2.],[1. 1. 1.],1,"[11, 1, 1, 0]"
4,1798,[0. 3. 2.],[1. 1. 1.],0,"[11, 1, 9, 0]"
5,1483,[0. 3. 2.],[1. 1. 1.],2,"[1, 1, 14, 0]"
6,389,[0. 3. 2.],[1. 1. 1.],0,"[1, 1, 14, 0]"
7,1535,[0. 3. 2.],[1. 1. 1.],0,"[11, 1, 1, 0]"
8,1535,[0. 3. 2.],[1. 1. 1.],0,"[11, 1, 1, 0]"
9,1699,[0. 3. 2.],[1. 1. 1.],1,"[1, 1, 9, 0]"


In [5]:
df = pd.read_csv('analysis\quali_(3,4)_standard_coarse_[3. 1. 2.],[1. 1. 1.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,155,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
1,155,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
2,155,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
3,155,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
4,155,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
...,...,...,...,...,...
85,740,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
86,740,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
87,740,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"
88,740,[3. 1. 2.],[1. 1. 1.],0,"[3, 3, 3, 0]"


In [6]:
df = pd.read_csv('analysis\quali_(3,4)_standard_fine_[0. 0. 0.],[1. 1. 1.]all.csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,190,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
1,190,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
2,190,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
3,190,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
4,190,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
...,...,...,...,...,...
75,582,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
76,582,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
77,582,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
78,582,[0. 0. 0.],[1. 1. 1.],2,"[14, 12, 5, 0]"
