In [6]:
import pandas as pd
import torch
from utils.analysis_from_interaction import *
import os
if not os.path.exists('qualitative_analysis'):
    os.makedirs('qualitative_analysis')

In [7]:
datasets = ['(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)']
n_values = [4, 8, 16, 4, 8, 4]
n_attributes = [3, 3, 3, 4, 4, 5]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [9]:
context_unaware = False # whether original or context_unaware simulations are evaluated
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

### See which messages are produced for which concepts
The following code uses the original interaction files which are too large to upload to Github. Please reach out to the authors if you are interested in receiving the interaction files. 
Alternatively, please check the folder 'qualitative_analysis' for a collection of csv files that contain concepts and messages for all datasets.

In [13]:
# go through all datasets
for i, d in enumerate(datasets):
    # select first run
    path_to_run = paths[i] + '/' + str(setting) +'/' + str(0) + '/'
    path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
    path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
    interaction = torch.load(path_to_interaction_train)
    print(path_to_interaction_train)

    messages = interaction.message.argmax(dim=-1)
    messages = [msg.tolist() for msg in messages]
    sender_input = interaction.sender_input
    print(sender_input.shape)
    n_targets = int(sender_input.shape[1]/2)
    # get target objects and fixed vectors to re-construct concepts
    target_objects = sender_input[:, :n_targets]
    target_objects = k_hot_to_attributes(target_objects, n_values[i])
    # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
    (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
    concepts = list(zip(objects, fixed))

    # get distractor objects to re-construct context conditions
    distractor_objects = sender_input[:, n_targets:]
    distractor_objects = k_hot_to_attributes(distractor_objects, n_values[i])
    context_conds = retrieve_context_condition(objects, fixed, distractor_objects)

    # get random qualitative samples
    n_fixed = random.randint(1, n_attributes[i]) # how many fixed attributes?
    fixed_indices = random.sample(range(0, n_attributes[i]), k=n_fixed) # select which attributes are fixed
    fixed_values = random.choices(range(0, n_values[i]), k=n_fixed)
    print(n_fixed, fixed_indices, fixed_values)
    all_for_this_concept = []
    for idx, (t_objects, t_fixed) in enumerate(concepts):
        if sum(t_fixed) == n_fixed and all(t_fixed[fixed_index] == 1 for fixed_index in fixed_indices):
            for t_object in t_objects:
                if all(t_object[fixed_index] == fixed_values[j] for j, fixed_index in enumerate(fixed_indices)):
                    all_for_this_concept.append((idx, t_object, t_fixed, context_conds[idx], messages[idx]))
                    fixed = t_fixed
    if len(all_for_this_concept) > 0:
        #sample = random.sample(all_for_this_concept, 20)
        sample = all_for_this_concept
        column_names = ['game_nr', 'object', 'fixed indices', 'context condition', 'message']
        df = pd.DataFrame(sample, columns=column_names)
        df.to_csv('analysis/quali_' + str(d) + '_' + str(setting) + '_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv', index=False)
        print('saved ' + 'analysis/quali_' + str(d) + '_' + str(setting) + '_' + str(sample[0][1]) + ',' + str(fixed) + 'all.csv')
    else:
        raise ValueError("sample for dataset " + str(d) + " could not be generated")

results/(4,4)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
torch.Size([12037, 20, 16])
4 [0, 1, 2, 3] [0, 0, 0, 3]
saved analysis/quali_(4,4)_standard_[0. 0. 0. 3.],[1. 1. 1. 1.]all.csv


### Case study: concepts on different levels of abstraction

In [149]:
datasets = ['(4,4)']
n_values = [4]
n_attributes = [4]
n_epochs = 300
n_datasets = len(datasets)
paths = ['results/' + d + '_game_size_10_vsf_3' for d in datasets]

In [150]:
path_to_run = paths[0] + '/' + str(setting) +'/' + str(0) + '/'
path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
interaction = torch.load(path_to_interaction_train)
print(path_to_interaction_train)

messages = interaction.message.argmax(dim=-1)
messages = [msg.tolist() for msg in messages]
sender_input = interaction.sender_input
print(sender_input.shape)
n_targets = int(sender_input.shape[1]/2)
# get target objects and fixed vectors to re-construct concepts
target_objects = sender_input[:, :n_targets]
target_objects = k_hot_to_attributes(target_objects, n_values[0])
# concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
(objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
concepts = list(zip(objects, fixed))

# get distractor objects to re-construct context conditions
distractor_objects = sender_input[:, n_targets:]
distractor_objects = k_hot_to_attributes(distractor_objects, n_values[0])
context_conds = retrieve_context_condition(objects, fixed, distractor_objects)

results/(4,4)_game_size_10_vsf_3/standard/0/interactions/train/epoch_300/interaction_gpu0
torch.Size([12037, 20, 16])


In [151]:
concept = np.array([0., 1., 2., 1.]) 
fixed = np.array([1., 1., 1., 1.])
n_fixed = 4
fixed_indices = [0, 1, 2, 3]
fixed_values = [0, 1, 2, 1]
print(n_fixed, fixed_indices, fixed_values)
all_for_this_concept = []
for idx, (t_objects, t_fixed) in enumerate(concepts):
    if sum(t_fixed) == n_fixed and all(t_fixed[fixed_index] == 1 for fixed_index in fixed_indices):
        for t_object in t_objects:
            if all(t_object[fixed_index] == fixed_values[j] for j, fixed_index in enumerate(fixed_indices)):
                all_for_this_concept.append((idx, t_object, t_fixed, context_conds[idx], messages[idx]))
if len(all_for_this_concept) > 0:
    sample = random.sample(all_for_this_concept, 20)
    column_names = ['game_nr', 'object', 'fixed indices', 'context condition', 'message']
    df = pd.DataFrame(sample, columns=column_names)
    df.to_csv('analysis/quali_' + str(datasets[0]) + '_' + str(setting) + '_' + str(concept) + ',' + str(fixed) + '.csv', index=False)
    print('saved ' + 'analysis/quali_' + str(datasets[0]) + '_' + str(setting) + '_' + str(concept) + ',' + str(fixed) + '.csv')
else:
    raise ValueError("sample for dataset " + str(datasets[0]) + " could not be generated")

4 [0, 1, 2, 3] [0, 1, 2, 1]
saved analysis/quali_(4,4)_standard_[0. 1. 2. 1.],[1. 1. 1. 1.].csv


In [53]:
df = pd.read_csv('analysis/quali_(4,4)_standard_[0. 1. 2. 1.],[1. 1. 1. 1.].csv')
df

Unnamed: 0,game_nr,object,fixed indices,context condition,message
0,1129,[2. 3. 1.],[1. 1. 0.],0,"[6, 13, 10, 0]"
1,666,[2. 3. 3.],[1. 1. 0.],1,"[6, 13, 10, 0]"
2,1129,[2. 3. 0.],[1. 1. 0.],0,"[6, 13, 10, 0]"
3,1129,[2. 3. 0.],[1. 1. 0.],0,"[6, 13, 10, 0]"
4,253,[2. 3. 0.],[1. 1. 0.],0,"[13, 6, 13, 0]"
5,788,[2. 3. 1.],[1. 1. 0.],0,"[6, 13, 6, 0]"
6,497,[2. 3. 2.],[1. 1. 0.],0,"[6, 13, 6, 0]"
7,918,[2. 3. 1.],[1. 1. 0.],1,"[6, 13, 10, 0]"
8,78,[2. 3. 0.],[1. 1. 0.],0,"[6, 13, 10, 0]"
9,1636,[2. 3. 0.],[1. 1. 0.],1,"[13, 6, 10, 0]"
