##### This document is for the purpose of extracting the concepts from each dataset encoding scheme and finding the overlapping concepts between the two. Some are then randomly selected for the qualitative analysis.

In [None]:
#This file was used for comparing the vague float encoded datasets to the one-hot encoded datasets. 

In [1]:
import os
import torch
import numpy as np
from collections import defaultdict
from utils.load_results import *
from utils.analysis_from_interaction import *

In [2]:
datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
n_attributes = (3, 3, 3, 4, 4, 5)
n_values = (4, 8, 16, 4, 8, 4)
n_epochs = 300
paths = [f'results/one_hot_encoded_ds/{d}_game_size_10_vsf_3/' for d in datasets]

In [3]:
context_unaware = False 
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

In [4]:
unique_concepts_all_one_hot_encoded_datasets = []

for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    unique_concepts_one_hot_dataset = []

    # Only process the first run (run = 0)
    path_to_run = paths[i] + '/' + str(setting) + '/' + str(0) + '/'
    path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
    interaction = torch.load(path_to_interaction_train)

    # Extract unique concepts
    sender_input = interaction.sender_input
    n_targets = int(sender_input.shape[1]/2)
    target_objects = sender_input[:, :n_targets]
    target_objects = k_hot_to_attributes(target_objects, n_values[i])
    (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
    concepts = list(zip(objects, fixed))
    concepts_list = [(tuple(tuple(x) if isinstance(x, list) else x for x in obj.tolist()),
                      tuple(tuple(x) if isinstance(x, list) else x for x in fixed_vec.tolist()))
                     for obj, fixed_vec in concepts]
    unique_concepts = list(set(concepts_list))

    unique_concepts_one_hot_dataset.extend(unique_concepts)
    unique_concepts_all_one_hot_encoded_datasets.append(unique_concepts_one_hot_dataset)

# Save unique concepts as lists
one_hot_unique_concepts_lists = []
for dataset_concepts in unique_concepts_all_one_hot_encoded_datasets:
    one_hot_unique_concepts_list = [tuple(tuple(x) if isinstance(x, list) else x for x in concept)
                            for concept in dataset_concepts]
    one_hot_unique_concepts_lists.append(one_hot_unique_concepts_list)

Dataset: (3,4)
Dataset: (3,8)
Dataset: (3,16)
Dataset: (4,4)
Dataset: (4,8)
Dataset: (5,4)


In [5]:
datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
n_attributes = (3, 3, 3, 4, 4, 5)
n_values = (4, 8, 16, 4, 8, 4)
n_epochs = 300
paths = [f'results/vague_ds_results/{d}_game_size_10_vsf_3/' for d in datasets]

In [6]:
context_unaware = False 
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

In [7]:
unique_concepts_all_float_encoded_datasets = []

for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    unique_concepts_float_dataset = []

    # Only process the first run (run = 0)
    path_to_run = paths[i] + '/' + str(setting) + '/' + str(0) + '/'
    path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
    interaction = torch.load(path_to_interaction_train)

    # Extract unique concepts
    sender_input = interaction.sender_input
    n_targets = int(sender_input.shape[1]/2)
    target_objects = sender_input[:, :n_targets]
    target_objects = k_hot_to_attributes(target_objects, n_values[i])
    (objects, fixed) = retrieve_concepts_sampling(target_objects, all_targets=True)
    concepts = list(zip(objects, fixed))
    concepts_list = [(tuple(tuple(x) if isinstance(x, list) else x for x in obj.tolist()),
                      tuple(tuple(x) if isinstance(x, list) else x for x in fixed_vec.tolist()))
                     for obj, fixed_vec in concepts]
    unique_concepts = list(set(concepts_list))

    unique_concepts_float_dataset.extend(unique_concepts)
    unique_concepts_all_float_encoded_datasets.append(unique_concepts_float_dataset)

# Save unique concepts as lists
float_unique_concepts_lists = []
for dataset_concepts in unique_concepts_all_float_encoded_datasets:
    float_unique_concepts_list = [tuple(tuple(x) if isinstance(x, list) else x for x in concept)
                            for concept in dataset_concepts]
    float_unique_concepts_lists.append(float_unique_concepts_list)

Dataset: (3,4)
Dataset: (3,8)
Dataset: (3,16)
Dataset: (4,4)
Dataset: (4,8)
Dataset: (5,4)


In [5]:
# Print the general concepts for the float encoding dataset
for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    print("General Concepts:")
    for concept in unique_concepts_all_float_encoded_datasets[i]:
        obj, fixed_indices = concept
        if sum(fixed_indices) == 1:  # Check if the fixed vector has only one 1
            print(f"Object: {list(obj)}, Fixed Indices: {list(fixed_indices)}")
    print()

Dataset: (3,4)
General Concepts:
Object: [(3.0, 1.0, 1.0), (2.0, 0.0, 1.0), (1.0, 0.0, 1.0), (2.0, 1.0, 1.0), (0.0, 1.0, 1.0), (1.0, 1.0, 1.0), (3.0, 0.0, 1.0), (0.0, 0.0, 1.0), (3.0, 3.0, 1.0), (1.0, 2.0, 1.0)], Fixed Indices: [0.0, 0.0, 1.0]
Object: [(1.0, 3.0, 1.0), (1.0, 1.0, 0.0), (1.0, 3.0, 2.0), (1.0, 2.0, 1.0), (1.0, 1.0, 3.0), (1.0, 2.0, 3.0), (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 2.0, 0.0), (1.0, 3.0, 0.0)], Fixed Indices: [1.0, 0.0, 0.0]
Object: [(1.0, 2.0, 1.0), (1.0, 0.0, 3.0), (1.0, 3.0, 2.0), (1.0, 1.0, 0.0), (1.0, 1.0, 2.0), (1.0, 0.0, 2.0), (1.0, 2.0, 3.0), (1.0, 1.0, 3.0), (1.0, 3.0, 1.0), (1.0, 3.0, 0.0)], Fixed Indices: [1.0, 0.0, 0.0]
Object: [(0.0, 2.0, 1.0), (0.0, 0.0, 2.0), (0.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 3.0), (0.0, 3.0, 2.0), (0.0, 1.0, 1.0), (0.0, 3.0, 3.0), (0.0, 3.0, 0.0), (0.0, 2.0, 0.0)], Fixed Indices: [1.0, 0.0, 0.0]
Object: [(2.0, 1.0, 0.0), (0.0, 1.0, 0.0), (1.0, 3.0, 0.0), (2.0, 0.0, 0.0), (0.0, 0.0, 0.0), (1.0, 1.0, 0.0), (1.0, 0.0, 0

In [8]:
# Find the overlapping concepts
overlapping_concepts = []
for onehot_concepts, float_concepts in zip(unique_concepts_all_one_hot_encoded_datasets, unique_concepts_all_float_encoded_datasets):
    onehot_set = set(tuple(tuple(y) for y in x) for x in onehot_concepts)
    float_set = set(tuple(tuple(y) for y in x) for x in float_concepts)
    overlapping = onehot_set.intersection(float_set)
    overlapping_concepts.append(list(overlapping))

# Print the overlapping concepts
for i, d in enumerate(datasets):
    print(f"Dataset: {d}")
    print("Overlapping Concepts:")
    for concept in overlapping_concepts[i]:
        obj, fixed_indices = concept
        print(f"Object: {list(obj)}, Fixed Indices: {list(fixed_indices)}")
    print()

Dataset: (3,4)
Overlapping Concepts:
Object: [(1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0), (1.0, 1.0, 3.0)], Fixed Indices: [1.0, 1.0, 1.0]
Object: [(3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0), (3.0, 3.0, 3.0)], Fixed Indices: [1.0, 1.0, 1.0]
Object: [(2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0), (2.0, 2.0, 2.0)], Fixed Indices: [1.0, 1.0, 1.0]
Object: [(0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0), (0.0, 2.0, 0.0)], Fixed Indices: [1.0, 1.0, 1.0]
Object: [(0.0, 2.0, 2.0), (0.0, 2.0, 2.0), (0.0, 2.0, 2.0), (0.0, 2.0, 2.0), (0.0, 2.0, 2.0), (0.0, 2.0, 2.0), (0.0, 2.