In [1]:
import pickle
from utils.analysis_from_interaction import *
from egg.core.language_analysis import Disent
from language_analysis_local import TopographicSimilarityConceptLevel, encode_target_concepts_for_topsim
import os

# calculate metrics from stored interactions

In [2]:
datasets = ('(3,4)', '(3,8)', '(3,16)', '(4,4)', '(4,8)', '(5,4)')
n_attributes = (3, 3, 3, 4, 4, 5)
n_values = (4, 8, 16, 4, 8, 4)
epochs = 300
n_runs = 5
vsf = 0
max_mess_lens = (20, 20, 20, 20, 20, 20)
if vsf == 3:
    vocab_sizes = [16, 28, 52, 16, 28, 52]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]
elif vsf == 0:
    vocab_sizes = [5, 9, 17, 5, 9, 5]
    paths = ['results/' + d + '_game_size_10_vsf_0/' for d in datasets]

In [96]:
datasets = ('(3,4)', )
n_attributes = (3, )
n_values = (4, )
epochs = 300
n_runs = 5
vsf = 0
if vsf == 3:
    vocab_sizes = [16, ]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]
elif vsf == 0:
    vocab_sizes = [5, ]
    paths = ['results/' + d + '_game_size_10_vsf_0/' for d in datasets]

In [104]:
datasets = ('(3,8)', '(4,4)')
n_attributes = (3,4)
n_values = (8, 4)
epochs = 0
n_runs = 5
vsf = 0
if vsf == 3:
    vocab_sizes = [28,16]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]
elif vsf == 0:
    vocab_sizes = [9,5]
    paths = ['results/' + d + '_game_size_10_vsf_0/' for d in datasets]

In [182]:
datasets = ('(3,16)',)
n_attributes = (3,)
n_values = (16,)
epochs = 300
n_runs = 5
vsf = 0
if vsf == 3:
    vocab_sizes = [52,]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]
elif vsf == 0:
    vocab_sizes = [17,]
    paths = ['results/' + d + '_game_size_10_vsf_0/' for d in datasets]

In [13]:
datasets = ('(4,8)', '(5,4)')
n_attributes = (4, 5)
n_values = (8, 4)
epochs = 300
n_runs = 5
vsf = 0
if vsf == 3:
    vocab_sizes = [28, 52]
    paths = ['results/' + d + '_game_size_10_vsf_3/' for d in datasets]
elif vsf == 0:
    vocab_sizes = [9, 5]
    paths = ['results/' + d + '_game_size_10_vsf_0/' for d in datasets]

In [9]:
context_unaware = False # whether original or context_unaware simulations are evaluated
zero_shot = False # whether zero-shot simulations are evaluated
zero_shot_test = 'specific' # 'generic' or 'specific'
test_interactions = True # whether scores should be calculated on test interactions
test_mode = 'test' # 'test' or 'test_fine' or 'test_sampled_unscaled' or 'test_load_train'
length_cost = True # whether length_cost was applied; length cost runs have been run with early stopping
early_stopping = True # only with length cost and sampled context
rsa = False 
rsa_test = 'testtrainmixed' # 'testtrain' for RSA tests on the test dataset, utterances are sampled from interactions during training
sampled_context = False
hierarchical = False
shared_context = True
if rsa or test_interactions:
    is_gumbel = False
else:
    is_gumbel = True
setting = ""
if length_cost:
    setting = setting + 'length_cost/'
    if not context_unaware:
        setting = setting + 'context_aware'
if context_unaware:
    setting = setting + 'context_unaware'
else:
    if not length_cost:
        setting = setting + 'standard'
if hierarchical:
    setting = setting + '/hierarchical'
if shared_context:
    setting = setting + '/shared_context'
if zero_shot:
    setting = setting + '/zero_shot/' + zero_shot_test
elif sampled_context:
    setting = setting + '/sampled_context'

In [10]:
# get n_epochs if early stopping
if early_stopping:
    
    n_epochs_all_data = []
    for d in range(len(datasets)):
        
        n_epochs = []
        
        for run in range(0, n_runs):
    
            path_to_run = paths[d] + str(setting) +'/' + str(run) + '/' 
            with open(os.path.join(path_to_run, 'loss_and_metrics.pkl'), 'rb') as input_file:
                data = pickle.load(input_file)
                final_epoch = max(data['loss_train'].keys())
                n_epochs.append(final_epoch)
                
        n_epochs_all_data.append(n_epochs)
        
else:
    n_epochs_all_data = []
    for d in range(len(datasets)):
        n_epochs = []
        
        for run in range(n_runs):
            n_epochs.append(epochs)
                
        n_epochs_all_data.append(n_epochs)
            

In [145]:
n_epochs_all_data

[[1, 41, 169, 65, 36], [44, 45, 31, 56, 136], [30, 34, 48, 26, 51]]

In [146]:
n_epochs_all_data[0][0] = 300

## entropy scores: MI, effectiveness, efficiency

In [202]:
for d in range(len(datasets)):
    
    n_epochs = n_epochs_all_data[d]
    
    for run in range(0, n_runs):

        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/' 
        if not rsa:
            if not test_interactions:
                path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
            else:
                path_to_interaction = (path_to_run + 'interactions/' + test_mode + '/epoch_0/interaction_gpu0')
        else:
            path_to_interaction = (path_to_run + 'interactions/rsa_' + rsa_test + '/epoch_0/interaction_gpu0')

        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = information_scores(interaction, attributes, values, normalizer="arithmetic", is_gumbel=is_gumbel, trim_eos=True, max_mess_len=21)   
    
        if not rsa:
            if not test_interactions:
                pickle.dump(scores, open(path_to_run + 'entropy_scores.pkl', 'wb'))
            else:
                pickle.dump(scores, open(path_to_run + 'entropy_scores_' + test_mode + '.pkl', 'wb'))
        else:
            pickle.dump(scores, open(path_to_run + 'entropy_scores_rsa_' + rsa_test + '.pkl', 'wb'))

##  message length

In [203]:
# we evaluated message length per hierarchy level after training but 
# you can also use the HierarchicalMessageLength callback and store the results 

for d in range(len(datasets)):
    
    n_epochs = n_epochs_all_data[d]
    
    for run in range(0, n_runs): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        if not rsa:
            if not test_interactions:
                path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
            else:
                path_to_interaction = (path_to_run + 'interactions/' + test_mode + '/epoch_0/interaction_gpu0')
        else:
            path_to_interaction = (path_to_run + 'interactions/rsa_' + rsa_test + '/epoch_0' + '/interaction_gpu0')
            
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        ml, ml_concept = message_length_per_hierarchy_level(interaction, attributes)
        ml_context, ml_fine_context, ml_coarse_context = message_length_per_context_condition(interaction, attributes)
        scores = {'ml_over_context': ml_context, 'ml_fine_context': ml_fine_context, 'ml_coarse_context': ml_coarse_context}
        
        if not rsa:
            if not test_interactions:
                pickle.dump(ml, open(path_to_run + 'message_length.pkl', 'wb'))
                pickle.dump(ml_concept, open(path_to_run + 'message_length_hierarchical.pkl', 'wb'))
                pickle.dump(scores, open(path_to_run + 'message_length_over_context.pkl', 'wb'))
            else:
                pickle.dump(ml, open(path_to_run + 'message_length_' + test_mode + '.pkl', 'wb'))
                pickle.dump(ml_concept, open(path_to_run + 'message_length_hierarchical_' + test_mode + '.pkl', 'wb'))
                pickle.dump(scores, open(path_to_run + 'message_length_over_context_' + test_mode + '.pkl', 'wb'))            
        else:
            pickle.dump(ml, open(path_to_run + 'message_length_rsa_' + rsa_test + '.pkl', 'wb'))
            pickle.dump(ml_concept, open(path_to_run + 'message_length_hierarchical_rsa_' + rsa_test + '.pkl', 'wb'))
            pickle.dump(scores, open(path_to_run + 'message_length_over_context_rsa_' + rsa_test + '.pkl', 'wb'))

## lexicon properties

In [209]:
distance = 'manhattan' # 'manhattan' or 'euclidean'
for d in range(len(datasets)):
    print(datasets[d])
    
    n_epochs = n_epochs_all_data[d]
    
    for run in range(n_runs): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        if not rsa:
            if not test_interactions:
                path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
            else:
                path_to_interaction = (path_to_run + 'interactions/' + test_mode + '/epoch_0/interaction_gpu0')
        else:
            path_to_interaction = (path_to_run + 'interactions/rsa_' + rsa_test + '/epoch_0/interaction_gpu0')
            
        interaction = torch.load(path_to_interaction)

        lex_info, unique_messages, num_concepts = informativeness_score(interaction, distance=distance)
        scores = {'lexicon informativeness': lex_info, 'lexicon size': unique_messages, 'number of concepts': num_concepts}
        print(scores)

        if not rsa:
            if not test_interactions:
                pickle.dump(scores, open(path_to_run + 'lexicon_properties_' + distance + '.pkl', 'wb'))
            else:
                pickle.dump(scores, open(path_to_run + 'lexicon_properties_' + distance + '_' + test_mode  + '.pkl', 'wb'))
        else:
            pickle.dump(scores, open(path_to_run + 'lexicon_properties_' + distance + '_rsa_' + rsa_test + '.pkl', 'wb'))

(3,4)
{'lexicon informativeness': nan, 'lexicon size': 742, 'number of concepts': 742}
{'lexicon informativeness': nan, 'lexicon size': 742, 'number of concepts': 742}
{'lexicon informativeness': nan, 'lexicon size': 742, 'number of concepts': 742}
{'lexicon informativeness': nan, 'lexicon size': 742, 'number of concepts': 742}
{'lexicon informativeness': nan, 'lexicon size': 742, 'number of concepts': 742}
(3,8)


KeyboardInterrupt: 

##  symbol redundancy

In [11]:
for d in range(len(datasets)):
    
    n_epochs = n_epochs_all_data[d]
    
    attributes = n_attributes[d]
    values = n_values[d]
    vocab_size = vocab_sizes[d]
    max_mess_len = max_mess_lens[d]
    
    for run in range(n_runs): 
                
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        if not rsa:
            if not test_interactions:
                path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
            else:
                path_to_interaction = (path_to_run + 'interactions/' + test_mode + '/epoch_0/interaction_gpu0')
        else:
            path_to_interaction = (path_to_run + 'interactions/rsa_' + rsa_test + '/epoch_0/interaction_gpu0')
        interaction = torch.load(path_to_interaction)
        redundancy, MI = symbol_frequency(interaction, attributes, values, vocab_size, max_mess_len, is_gumbel, trim_eos=True)
        
        scores = {'symbol_redundancy': redundancy, 'MI_symbol-attribute_value': MI}
        
        if not rsa:
            if not test_interactions:
                pickle.dump(scores, open(path_to_run + 'symbol_redundancy.pkl', 'wb'))
            else:
                pickle.dump(scores, open(path_to_run + 'symbol_redundancy_' + test_mode + '.pkl', 'wb'))
        else:
            pickle.dump(scores, open(path_to_run + 'symbol_redundancy_rsa_' + rsa_test + '.pkl', 'wb'))

  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information
  return symbol_frequency / att_val_frequency, mutual_information


##  compositionality scores: topsim, posdis, bosdis

### topsim

In [102]:
# topsim for train and validation
# although topsim values are stored throughout training if callbacks are verbose, we reevaluate the
# final topsim scores with more data points 
# not yet implemented for rsa

samples = 5000 # maybe shuffle from these because otherwise I just take the first 5,000 (which might not be the best)
for d, dataset in enumerate(datasets):
    
    n_epochs = n_epochs_all_data[d]
    
    dim = [n_values[d]]*n_attributes[d]
    
    for run in range(n_runs):
        print("dataset", dataset, "run", run)
        
        topsim_final = {}
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        
        TOPSIM = TopographicSimilarityConceptLevel(dim, is_gumbel=True)
        
        for mode in ['train', 'val']:

            if mode == 'train':
                interaction = torch.load(path_to_interaction_train)
            elif mode == 'val':
                interaction = torch.load(path_to_interaction_val)
                  
            messages = interaction.message.argmax(dim=-1)
            sender_input = interaction.sender_input
            n_targets = int(sender_input.shape[1]/2)
            # get target objects and fixed vectors to re-construct concepts
            target_objects = sender_input[:, :n_targets]
            target_objects = k_hot_to_attributes(target_objects, n_values[d])
            # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
            (objects, fixed) = retrieve_concepts_sampling(target_objects)
            # add one such that zero becomes an empty attribute for the calculation (_)
            objects = objects + 1
            concepts = torch.from_numpy(objects * (np.array(fixed)))
            specific_idx = np.where(np.sum(fixed, axis=1)==n_attributes[d])[0]
            messages_specific = messages[specific_idx]
            concepts_specific = concepts[specific_idx]
            
            generic_idx = np.where(np.sum(fixed, axis=1)==1)[0]
            messages_generic = messages[generic_idx]
            concepts_generic = concepts[generic_idx]

            messages = [msg.tolist() for msg in messages]
            messages_specific = [msg.tolist() for msg in messages_specific]
            messages_generic = [msg.tolist() for msg in messages_generic]

            encoded_input = encode_target_concepts_for_topsim(sender_input)

            topsim = TOPSIM.compute_topsim(encoded_input[0:samples], messages[0:samples]) # default: hausdorff distance
            
            if not zero_shot:
                topsim_specific = TOPSIM.compute_topsim(concepts_specific[0:samples], messages_specific[0:samples], 
                                                            meaning_distance_fn="edit")
                
                topsim_generic = TOPSIM.compute_topsim(concepts_generic[0:samples], messages_generic[0:samples],
                                                       meaning_distance_fn="edit")

            print('... topsim computed')

            topsim_final['topsim_' + mode] = topsim
            if not zero_shot:
                topsim_final['topsim_specific_' + mode] = topsim_specific
                topsim_final['topsim_generic_' + mode] = topsim_generic
        
        pickle.dump(topsim_final, open(path_to_run +  "topsim_final.pkl", "wb" ) )
        print(topsim_final)        

dataset (3,4) run 0
... topsim computed
... topsim computed
{'topsim_train': 0.1722713793139702, 'topsim_specific_train': 0.1807396955211413, 'topsim_generic_train': 0.20757333373337575, 'topsim_val': 0.15958917050286012, 'topsim_specific_val': 0.2018166777861541, 'topsim_generic_val': 0.1492896365544714}
dataset (3,4) run 1
... topsim computed
... topsim computed
{'topsim_train': 0.18948264947930848, 'topsim_specific_train': 0.20810605059521692, 'topsim_generic_train': 0.13773017420723788, 'topsim_val': 0.16497707180925605, 'topsim_specific_val': 0.20688069802647163, 'topsim_generic_val': 0.09452930075446025}
dataset (3,4) run 2
... topsim computed
... topsim computed
{'topsim_train': 0.20925988041822158, 'topsim_specific_train': 0.21721522942584634, 'topsim_generic_train': 0.1623523671780775, 'topsim_val': 0.19311371048882245, 'topsim_specific_val': 0.19424749322313073, 'topsim_generic_val': 0.12531673054886083}
dataset (3,4) run 3
... topsim computed
... topsim computed
{'topsim_tra

In [None]:
# topsim for test interactions

if test_interactions:

    samples = 5000 
    for d, dataset in enumerate(datasets):
        
        n_epochs = n_epochs_all_data[d]
        
        dim = [n_values[d]]*n_attributes[d]
        
        for run in range(n_runs):
            print("dataset", dataset, "run", run)
            
            topsim_final = {}
            path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
            mode = 'test'
            path_to_interaction_test = (path_to_run + 'interactions/' + test_mode + '/epoch_0/interaction_gpu0')
            
            TOPSIM = TopographicSimilarityConceptLevel(dim, is_gumbel=True)
            
            interaction = torch.load(path_to_interaction_test)
                      
            messages = interaction.message.argmax(dim=-1)
            sender_input = interaction.sender_input
            n_targets = int(sender_input.shape[1]/2)
            # get target objects and fixed vectors to re-construct concepts
            target_objects = sender_input[:, :n_targets]
            target_objects = k_hot_to_attributes(target_objects, n_values[d])
            # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
            (objects, fixed) = retrieve_concepts_sampling(target_objects)
            # add one such that zero becomes an empty attribute for the calculation (_)
            objects = objects + 1
            concepts = torch.from_numpy(objects * (np.array(fixed)))

            messages = [msg.tolist() for msg in messages]

            encoded_input = encode_target_concepts_for_topsim(sender_input)

            topsim = TOPSIM.compute_topsim(encoded_input[0:samples], messages[0:samples])  

            print('... topsim computed')

            topsim_final['topsim_' + mode] = topsim
    
            pickle.dump(topsim_final, open(path_to_run +  "topsim_final_" + zero_shot_test_ds + ".pkl", "wb" ) )
            print(topsim_final)        

#### Topsim over time

In [None]:
for d, dataset in enumerate(datasets):
    
    n_epochs = n_epochs_all_data[d]
    
    for run in range(n_runs):
        print("dataset", dataset, "run", run)
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        path_to_interaction_val = (path_to_run + 'interactions/validation/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        
        for mode in ['train', 'val']:

            if mode == 'train':
                interaction = torch.load(path_to_interaction_train)
            elif mode == 'val':
                interaction = torch.load(path_to_interaction_val)

        messages = interaction.message.argmax(dim=-1)
        sender_input = interaction.sender_input
        messages = [msg.tolist() for msg in messages]
        encoded_input = encode_target_concepts_for_topsim(sender_input)
        dim = [n_values[0]] * n_attributes[0]
        TOPSIM = TopographicSimilarityConceptLevel(dim, is_gumbel=True)
        
        samples = 5000
        num_batches = len(messages) // samples + (len(messages) % samples > 0)
        topsim_over_time = []
        
        for i in range(num_batches):
            messages_batch = messages[i * samples:(i + 1) * samples]
            topsim = TOPSIM.compute_topsim(encoded_input[i * samples:(i + 1) * samples], messages_batch)
            topsim_over_time.append(topsim)
            
        pickle.dump(topsim_over_time, open(path_to_run +  "topsim_over_time.pkl", "wb" ) )

### Posdis and Bosdis

In [None]:
# use Disent callback from egg

for d in range(len(datasets)): 
    
    n_epochs = n_epochs_all_data[d]
    
    path = paths[d]
    dim = [n_values[d]] * n_attributes[d]
    n_features = n_attributes[d] * n_values[d]
    vs_factor = int(path[-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    print("data set", dim)
    
    for run in range(n_runs):
        
        posdis_bosdis = {}
    
        path_to_run = paths[d] + '/' + str(setting) +'/' + str(run) + '/'
        path_to_interaction_train = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction_train)
        
        messages = interaction.message.argmax(dim=-1)
        sender_input = interaction.sender_input
        n_targets = int(sender_input.shape[1]/2)
        # get target objects and fixed vectors to re-construct concepts
        target_objects = sender_input[:, :n_targets]
        target_objects = k_hot_to_attributes(target_objects, n_values[d])
        # concepts are defined by a list of target objects (here one sampled target object) and a fixed vector
        (objects, fixed) = retrieve_concepts_sampling(target_objects)
        # add one such that zero becomes an empty attribute for the calculation (_)
        objects = objects + 1
        concepts = torch.from_numpy(objects * (np.array(fixed)))

        # concrete/specific concepts: where all attributes are fixed
        concepts_specific = torch.tensor(
            objects[torch.sum(torch.from_numpy(fixed), dim=1) == n_attributes[d]])
        messages_specific = messages[torch.sum(torch.from_numpy(fixed), dim=1) == n_attributes[d]]

        # generic concepts: where only one attribute is fixed
        concepts_generic = torch.tensor(
            objects[torch.sum(torch.from_numpy(fixed), dim=1) == 1])
        messages_generic = messages[torch.sum(torch.from_numpy(fixed), dim=1) == 1]
        
        posdis_specific = Disent.posdis(concepts_specific, messages_specific)
        bosdis_specific = Disent.bosdis(concepts_specific, messages_specific, vocab_size)

        posdis_generic = Disent.posdis(concepts_generic, messages_generic)
        bosdis_generic = Disent.bosdis(concepts_generic, messages_generic, vocab_size)
        
        posdis = Disent.posdis(torch.from_numpy(objects), messages)
        bosdis = Disent.bosdis(torch.from_numpy(objects), messages, vocab_size)
        
        posdis_bosdis['posdis_specific'] = posdis_specific
        posdis_bosdis['bosdis_specific'] = bosdis_specific
        posdis_bosdis['posdis_generic'] = posdis_generic
        posdis_bosdis['bosdis_generic'] = bosdis_generic
        posdis_bosdis['posdis'] = posdis
        posdis_bosdis['bosdis'] = bosdis

        print(posdis_bosdis)
    
        pickle.dump(posdis_bosdis, open(path_to_run + "posdis_bosdis.pkl", "wb" ) )

#### Posdis and bosdis concept x context

In [None]:
# bosdis concept x context
from utils.analysis_from_interaction import bosdis
for d in range(len(datasets)):
    
    n_epochs = n_epochs_all_data[d]

    vs_factor = int(paths[d][-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1
    
    for run in range(n_runs):

        path_to_run = paths[d] + '/' + str(setting) +'/' + str(run) + '/' 
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = bosdis(interaction, attributes, values, vocab_size)

        pickle.dump(scores, open(path_to_run + 'bosdis_scores.pkl', 'wb'))

In [None]:
# posdis concept x context
from utils.analysis_from_interaction import posdis
for d in range(len(datasets)):
    
    n_epochs = n_epochs_all_data[d]

    vs_factor = int(paths[d][-2])
    vocab_size = (n_values[d] + 1) * vs_factor + 1

    for run in range(n_runs):
        path_to_run = paths[d] + '/' + str(setting) + '/' + str(run) + '/'
        print(path_to_run)
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        #print(path_to_interaction)
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        scores = posdis(interaction, attributes, values, vocab_size)

        pickle.dump(scores, open(path_to_run + 'posdis_scores.pkl', 'wb'))

## co-occurrences

In [None]:
# Not yet implemented:

for d in range(len(datasets)):
    
    vs_factor = int(paths[d][-2])
    
    for run in range(n_runs): 
        
        path_to_run = paths[d] + str(setting) +'/' + str(run) + '/'
        path_to_interaction = (path_to_run + 'interactions/train/epoch_' + str(n_epochs[run]) + '/interaction_gpu0')
        interaction = torch.load(path_to_interaction)

        attributes = n_attributes[d]
        values = n_values[d]
        
        scores = cooccurrence_per_hierarchy_level(interaction, attributes, values, vs_factor)

        print(scores)
        
        pickle.dump(scores, open(path_to_run + 'normalized_cooccurrence.pkl', 'wb'))
    