In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import torch
from collections import OrderedDict
import numpy as np
from src.dataloader.pointwise.data_fetcher import DataFetcher
from src.utils.utils import create_node2vec_embedding_layer
from tqdm import tqdm
from src.model.pointwise.model_fm import HomophilyContentCNNFM

In [None]:
import gensim

from src.constants import DICTIONARY_PATH, MAX_LENGTH_USER_REPRESENATION, MAX_LENGTH_COMMENT_SECTION, ROOT_PATH
import pickle 

In [None]:
from src.model.pointwise.deepconn_model_fm import DeepCoNNFM

In [None]:
from src.testing.evaluation_data import EvaluationData
from src.testing.evaluation_dataset import EvaluationDataset

In [None]:
torch.cuda.set_device(0)

In [None]:
user_embedding_path = ''
validation_author_path = ''
model_folder_path = ''
model_filename = ''
model_path = model_folder_path + model_filename
comment_id_to_author_dict_path = ''
root_path = ''
training_path = ''
train_set_path = ''

In [None]:
NODE2VEC_EMB_DIM, num_authors, node2vec_emb_layer, author_to_pos_dict = create_node2vec_embedding_layer(
        user_embedding_path, True)

In [None]:
validation_authors = np.load(validation_author_path)

In [None]:
comment_id_to_author_dict = pickle.load(open(comment_id_to_author_dict_path, 'rb'))

In [None]:
data_fetcher = DataFetcher(training_path,
                           train_set_path,
                           gensim.corpora.Dictionary.load(DICTIONARY_PATH))

evaluation_data = EvaluationData(data_fetcher, comment_id_to_author_dict, author_to_pos_dict)
evaluation_dataset = EvaluationDataset()

In [None]:
def load_own_model(path):
    print('Load HomophilyCoNN')
    NODE2VEC_EMB_DIM, num_authors, node2vec_emb_layer, author_to_pos_dict = create_node2vec_embedding_layer(
        user_embedding_path,
        True)

    checkpoint = torch.load(path)
    config = checkpoint['config']
    state_dict = checkpoint['state_dict']

    # del config['pairwise']

    to_keep_set = ['node2vec_emb_layer', 'NODE2VEC_EMB_DIM',
                   'MAX_LENGTH_USER_REPRESENATION',
                   'MAX_LENGTH_COMMENT_SECTION',
                   'dropout',
                   'user_num_kernels',
                   'number of kernels',
                   'section_num_kernels',
                   'user_kernel_size',  # number of words in window
                   'section_kernel_size',
                   'latent_factors_deepconn',  # embedding size
                   'freeze_embeddings',
                   'latent_factors_user',
                   'latent_factors_section']

    to_keep_set = set(to_keep_set)

    keys = list(config.keys())
    for k in keys:
        if k not in to_keep_set:
            del config[k]
    print(config)
    # new_state_dict = OrderedDict()
    # for k, v in state_dict.items():
    #    name = k[7:]  # remove `module.`
    #    new_state_dict[name] = v

    model = HomophilyContentCNNFM(node2vec_emb_layer,
                                NODE2VEC_EMB_DIM,
                                MAX_LENGTH_USER_REPRESENATION,
                                MAX_LENGTH_COMMENT_SECTION,
                                **config)
    model.load_state_dict(state_dict)
    return config['latent_factors_user'], model


In [None]:
latent_factors_user, model = load_own_model(model_path)

In [None]:
from gensim.models import KeyedVectors
keyed_vectors = KeyedVectors(vector_size=latent_factors_user)

In [None]:
model.cuda()
model.eval()

In [None]:
for author_id in tqdm(validation_authors):
    user_rep = data_fetcher.get_user_representation(author_id, 'asdqwe')
    user_content, user_emb, user_emb_offsets = evaluation_data.get_author_data(author_id, user_rep)
    user_emb, user_emb_offsets = user_emb.cuda(), user_emb_offsets.cuda()
    user_emb = model.get_user_rep(user_content.unsqueeze(0).cuda(), user_emb, user_emb_offsets,)
    keyed_vectors.add(str(author_id), user_emb.cpu().detach().numpy()[0])

In [None]:
keyed_vectors.save_word2vec_format(model_folder_path + 'keyed_vectors.txt')

In [None]:
print('Output Path:', model_folder_path + 'keyed_vectors.txt')