In [7]:
import pickle
from utils.analysis_from_interaction import *
from egg.core.language_analysis import Disent
from language_analysis_local import TopographicSimilarityConceptLevel, encode_target_concepts_for_topsim

# calculate metrics from stored interactions

In [8]:
control = False # whether original or control simulations are evaluated

if control:
    datasets = ('(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)', '(4,8)')
    n_attributes = (4, 4, 4, 4, 4, 4, 4)
    n_values = (8, 8, 8, 8, 8, 8, 8)
    n_epochs = 300
    paths = ['results/(4,8)_sample_scaling_10_balanced_True_vsf_1/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_1/',
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_2/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_2/',
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_3/', 
             'results/(4,8)_sample_scaling_10_balanced_True_vsf_4/', 
             'results/(4,8)_sample_scaling_10_balanced_False_vsf_4/',]
    
else: 
    datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
    n_attributes = (3, 3, 3, 4, 4, 5)
    n_values = (4, 8, 16, 4, 8, 4)
    #n_epochs = 300
    n_epochs = 100
    #paths = ['results/' + d + '_sample_scaling_10_balanced_False_vsf_3/' for d in datasets]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]



In [11]:
context_unaware = True # whether original or context_unaware simulations are evaluated
if context_unaware:
    setting = 'context_unaware'
else:
    setting = 'standard'

### entropy scores: MI, effectiveness, efficiency

In [12]:
for d in range(len(datasets)):
    
    for run in range(5):

        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/' # do same for 'context_unaware' instead of 'standard'
        #path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = information_scores(interaction, attributes, values, normalizer="arithmetic")
        
        pickle.dump(scores, open(path_to_run + 'entropy_scores.pkl', 'wb'))
    

###  message length

In [20]:
# we evaluated message length per hierarchy level after training but 
# you can also use the HierarchicalMessageLength callback and store the results 
# TODO: Message length results look weird, needs to be fixed!

for d in range(len(datasets)):
    
    for run in range(5): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        #path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = message_length_per_hierarchy_level(interaction, attributes)
        
        pickle.dump(scores, open(path_to_run + 'message_length_hierarchical.pkl', 'wb'))

###  symbol redundancy

In [None]:
# Not yet implemented:

for d in range(len(datasets)):
    
    attributes = n_attributes[d]
    values = n_values[d]
    vs_factor = int(paths[d][-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    for run in range(5): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        symbol_f = np.load(path_to_run + 'symbols_pernsame.npy')
        #path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)
        redundancy, MI = symbol_frequency(interaction, attributes, values, vocab_size)
        
        scores = {'symbol_redundancy': redundancy, 'MI_symbol-attribute_value': MI}
        
        pickle.dump(scores, open(path_to_run + 'symbol_redundancy.pkl', 'wb'))

###  compositionality scores: topsim, posdis, bosdis

In [14]:
# topsim
# although topsim values are stored throughout training if callbacks are verbose, we reevaluate the
# final topsim scores with more data points 

samples = 5000
for d, dataset in enumerate(datasets):
    
    dim = [n_values[d]]*n_attributes[d]
    
    for run in range(5):
        print("dataset", dataset, "run", run)
        
        topsim_final = {}
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        #path_to_interaction = (path_to_run + 'interactions/train/interactions_epoch' + str(n_epochs))
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        path_to_interaction_test = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs) + '/interaction_gpu0')
        
        TOPSIM = TopographicSimilarityConceptLevel(dim, is_gumbel=True)
        
        for mode in ['train', 'test']:

            if mode == 'train':
                interaction = torch.load(path_to_interaction_train)
            elif mode == 'test':
                interaction = torch.load(path_to_interaction_test)
                
                  
            messages = interaction.message.argmax(dim=-1)
            sender_input = interaction.sender_input
            n_targets = int(sender_input.shape[1]/2)
            # get target objects and fixed vectors to re-construct concepts
            target_objects = sender_input[:, :n_targets]
            target_objects = k_hot_to_attributes(target_objects, n_values[d])
            # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
            (objects, fixed) = retrieve_concepts_sampling(target_objects)
            # add one such that zero becomes an empty attribute for the calculation (_)
            objects = objects + 1
            concepts = torch.from_numpy(objects * (np.array(fixed)))
            specific_idx = np.where(np.sum(fixed, axis=1)==n_attributes[d])[0]
            #print(specific_idx)
            #messages_max_relevant = messages[max_relevant]
            #sender_input_max_relevant = sender_input[max_relevant]
            messages_specific = messages[specific_idx]
            concepts_specific = concepts[specific_idx]
            #print(messages_specific[:3])
            #print(concepts_specific[:3])

            messages = [msg.tolist() for msg in messages]
            messages_specific = [msg.tolist() for msg in messages_specific]

            #encoded_input = encode_input_for_topsim_hierarchical(sender_input, dim)
            #encoded_input_max_relevant = encode_input_for_topsim_hierarchical(sender_input_max_relevant, dim)
            encoded_input = encode_target_concepts_for_topsim(sender_input)
            topsim = TOPSIM.compute_topsim(encoded_input[0:samples], messages[0:samples])
            #topsim_max_relevant = TOPSIM.compute_topsim(encoded_input_max_relevant[0:samples], 
            #                                            messages_max_relevant[0:samples])
            topsim_specific = TOPSIM.compute_topsim(concepts_specific[0:samples], messages_specific[0:samples], 
                                                    meaning_distance_fn="edit")
            print('... topsim computed')

            topsim_final['topsim_' + mode] = topsim
            topsim_final['topsim_specific_' + mode] = topsim_specific
    
        pickle.dump(topsim_final, open(path_to_run +  "topsim_final.pkl", "wb" ) )
        print(topsim_final)        

dataset (3,4) run 0
... topsim computed
... topsim computed
{'topsim_train': 0.4239018353653433, 'topsim_specific_train': 0.5103873491798231, 'topsim_test': 0.4371581732605145, 'topsim_specific_test': 0.5133992917589033}
dataset (3,4) run 1
... topsim computed
... topsim computed
{'topsim_train': 0.3934997453936548, 'topsim_specific_train': 0.4528229888327764, 'topsim_test': 0.4029663547474446, 'topsim_specific_test': 0.4598832100611965}
dataset (3,4) run 2
... topsim computed
... topsim computed
{'topsim_train': 0.4684454171670405, 'topsim_specific_train': 0.5228703913145929, 'topsim_test': 0.45989264302761873, 'topsim_specific_test': 0.516067972430117}
dataset (3,4) run 3
... topsim computed
... topsim computed
{'topsim_train': 0.4379705660962943, 'topsim_specific_train': 0.469714858941141, 'topsim_test': 0.4320143945098909, 'topsim_specific_test': 0.48134552598615715}
dataset (3,4) run 4
... topsim computed
... topsim computed
{'topsim_train': 0.4361416470989032, 'topsim_specific_tr

In [15]:
# use Disent callback from egg

for d in range(len(datasets)): 
    
    path = paths[d]
    dim = [n_values[d]] * n_attributes[d]
    n_features = n_attributes[d] * n_values[d]
    vs_factor = int(path[-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    print("data set", dim)
    
    for run in range(5):
        
        posdis_bosdis = {}
    
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)
        
        messages = interaction.message.argmax(dim=-1)
        sender_input = interaction.sender_input
        n_targets = int(sender_input.shape[1]/2)
        # get target objects and fixed vectors to re-construct concepts
        target_objects = sender_input[:, :n_targets]
        target_objects = k_hot_to_attributes(target_objects, n_values[d])
        # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
        (objects, fixed) = retrieve_concepts_sampling(target_objects)
        # add one such that zero becomes an empty attribute for the calculation (_)
        objects = objects + 1
        concepts = torch.from_numpy(objects * (np.array(fixed)))

        # concrete/specific concepts: where all attributes are fixed
        #objects_max_relevance = torch.tensor(k_hot_to_attributes(
        #    objects[torch.sum(relevance_vectors, dim=1) == 0], dim[0]))
        #messages_max_relevance = messages[torch.sum(relevance_vectors, dim=1) == 0]
        #concepts_specific = torch.tensor(k_hot_to_attributes(
        #    objects[torch.sum(torch.from_numpy(fixed), dim=1) == n_attributes], dim[0]))
        concepts_specific = torch.tensor(
            objects[torch.sum(torch.from_numpy(fixed), dim=1) == n_attributes[d]])
        messages_specific = messages[torch.sum(torch.from_numpy(fixed), dim=1) == n_attributes[d]]
        
        #posdis_max_relevance = Disent.posdis(objects_max_relevance, messages_max_relevance)
        #bosdis_max_relevance = Disent.bosdis(objects_max_relevance, messages_max_relevance, vocab_size)
        posdis_specific = Disent.posdis(concepts_specific, messages_specific)
        bosdis_specific = Disent.bosdis(concepts_specific, messages_specific, vocab_size)
        
        #targets_encoded = torch.tensor(encode_target_concepts_for_topsim(sender_input))
        #objects = torch.tensor(k_hot_to_attributes(targets_encoded, n_values[d]+1))
        posdis = Disent.posdis(torch.from_numpy(objects), messages)
        bosdis = Disent.bosdis(torch.from_numpy(objects), messages, vocab_size)
        
        posdis_bosdis['posdis_specific'] = posdis_specific
        posdis_bosdis['bosdis_specific'] = bosdis_specific
        posdis_bosdis['posdis'] = posdis
        posdis_bosdis['bosdis'] = bosdis

        print(posdis_bosdis)
    
        pickle.dump(posdis_bosdis, open(path_to_run + "posdis_bosdis.pkl", "wb" ) )

data set [4, 4, 4]
{'posdis_specific': 0.0812758207321167, 'bosdis_specific': 0.21682985126972198, 'posdis': 0.05927928164601326, 'bosdis': 0.22567006945610046}
{'posdis_specific': 0.03443637862801552, 'bosdis_specific': 0.22426798939704895, 'posdis': 0.014325053431093693, 'bosdis': 0.20297060906887054}
{'posdis_specific': 0.17433571815490723, 'bosdis_specific': 0.33425045013427734, 'posdis': 0.1578224152326584, 'bosdis': 0.3052409291267395}
{'posdis_specific': 0.07186233997344971, 'bosdis_specific': 0.20578840374946594, 'posdis': 0.04527847096323967, 'bosdis': 0.2170572280883789}
{'posdis_specific': 0.10273190587759018, 'bosdis_specific': 0.2828274667263031, 'posdis': 0.07438632100820541, 'bosdis': 0.2684449851512909}
data set [8, 8, 8]
{'posdis_specific': 0.11353063583374023, 'bosdis_specific': 0.24216723442077637, 'posdis': 0.10144481062889099, 'bosdis': 0.24038828909397125}
{'posdis_specific': 0.2311098426580429, 'bosdis_specific': 0.29010918736457825, 'posdis': 0.19693471491336823

### co-occurrences

In [None]:
# Not yet implemented:

for d in range(len(datasets)):
    
    vs_factor = int(paths[d][-2])
    
    for run in range(5): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        
        scores = cooccurrence_per_hierarchy_level(interaction, attributes, values, vs_factor)

        print(scores)
        
        pickle.dump(scores, open(path_to_run + 'normalized_cooccurrence.pkl', 'wb'))
    