In [None]:
import pickle
import numpy as np
import math
import itertools
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
pad_token = '#PAD_TOKEN'
type_to_ix = {'person': 0, 'user': 1, 'song': 2, pad_token: 3}
relation_to_ix = {'song_person': 0, 'person_song': 1, 'user_song': 
                  2, 'song_user': 3, '#UNK_RELATION': 4, '#END_RELATION': 5, pad_token: 6}

Construct mappings from entity, type, and relation to idxs

In [None]:
#entity vocab set is combination of songs, users, and persons
#currently using dicts and not id_txt files since it seemed like there were missing songs
with open('../song_data/song_user.dict', 'rb') as handle:
    songs1 = set(pickle.load(handle).keys())
    
with open('../song_data/song_person.dict', 'rb') as handle:
    songs2 = set(pickle.load(handle).keys())

with open('../song_data/user_song.dict', 'rb') as handle:
    users = set(pickle.load(handle).keys())
    
with open('../song_data/person_song.dict', 'rb') as handle:
    persons = set(pickle.load(handle).keys())

songs = songs1|songs2
entities = songs|users|persons

#if we have singe map for all entities need to ensure no duplicates across categories
assert len(entities) == len(songs) + len(persons) + len(users)

entity_to_ix = {entity: idx for idx, entity in enumerate(entities)}
entity_to_ix[pad_token] = len(entity_to_ix)
    

Construct inverse idx mappings

In [None]:
ix_to_type = {v: k for k, v in type_to_ix.items()}
ix_to_relation = {v: k for k, v in relation_to_ix.items()}
ix_to_entity = {v: k for k, v in entity_to_ix.items()}

Save idx mappings as .dict files

In [None]:
with open('type_to_ix.dict', 'wb') as handle:
    pickle.dump(type_to_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('relation_to_ix.dict', 'wb') as handle:
    pickle.dump(relation_to_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('entity_to_ix.dict', 'wb') as handle:
    pickle.dump(entity_to_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('ix_to_type.dict', 'wb') as handle:
    pickle.dump(ix_to_type, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('ix_to_relation.dict', 'wb') as handle:
    pickle.dump(ix_to_relation, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('ix_to_entity.dict', 'wb') as handle:
    pickle.dump(ix_to_entity, handle, protocol=pickle.HIGHEST_PROTOCOL)

Construct relation and type dictionaries replacing names with ids

In [None]:
def convert_to_ids(rel_dict, key_id_dict, value_id_dict):
    new_rel = {}
    for key,values in rel_dict.items():
        key_id = key_id_dict[key]
        value_ids = []
        for val in values:
            value_ids.append(value_id_dict[val])
            
        new_rel[key_id] = value_ids
    
    return new_rel

with open('../song_data/song_user.dict', 'rb') as handle:
    song_user = pickle.load(handle)
    song_user_ix = convert_to_ids(song_user, entity_to_ix, entity_to_ix)
    
with open('../song_data/user_song.dict', 'rb') as handle:
    user_song = pickle.load(handle)
    user_song_ix = convert_to_ids(user_song, entity_to_ix, entity_to_ix)
    
with open('../song_data/song_person.dict', 'rb') as handle:
    song_person = pickle.load(handle)
    song_person_ix = convert_to_ids(song_person, entity_to_ix, entity_to_ix)
    
with open('../song_data/person_song.dict', 'rb') as handle:
    person_song = pickle.load(handle)
    person_song_ix = convert_to_ids(person_song, entity_to_ix, entity_to_ix)

#Convert entity to id dict to entity_ix to id dict
with open('../song_data/id_type.dict', 'rb') as handle:
    entity_type = pickle.load(handle)
    entity_ix_type = {}
    for key,value in entity_type.items():
        key_id = entity_to_ix[key]    
        entity_ix_type[key_id] = value

In [None]:
with open('song_user_ix.dict', 'wb') as handle:
    pickle.dump(song_user_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('user_song_ix.dict', 'wb') as handle:
    pickle.dump(user_song_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('song_person_ix.dict', 'wb') as handle:
    pickle.dump(song_person_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('person_song_ix.dict', 'wb') as handle:
    pickle.dump(person_song_ix, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('entity_ix_type.dict', 'wb') as handle:
    pickle.dump(entity_ix_type, handle, protocol=pickle.HIGHEST_PROTOCOL)