In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import torch
from collections import OrderedDict
import numpy as np
from src.dataloader.pointwise.data_fetcher import DataFetcher
from src.utils.utils import create_node2vec_embedding_layer
from tqdm import tqdm

In [None]:
import gensim
from src.constants import DICTIONARY_PATH
import pickle

In [None]:
from src.model.pointwise.deepconn_model_fm import DeepCoNNFM

In [None]:
from src.testing.evaluation_data import EvaluationData
from src.testing.evaluation_dataset import EvaluationDataset

In [None]:
torch.cuda.set_device(0)

In [None]:
user_embedding_path = ''
validation_author_path = ''
deepconn_folder_path = ''
deepconn_filename = ''
deepconn_path = deepconn_folder_path + deepconn_filename
comment_id_to_author_dict_path = ''
root_path = ''
training_data_path = ''
train_set_all_path = ''

In [None]:
NODE2VEC_EMB_DIM, num_authors, node2vec_emb_layer, author_to_pos_dict = create_node2vec_embedding_layer(
        user_embedding_path, True)

In [None]:
validation_authors = np.load(validation_author_path)



In [None]:

comment_id_to_author_dict = pickle.load(open(comment_id_to_author_dict_path, 'rb'))

In [None]:
data_fetcher = DataFetcher(training_data_path,
                           train_set_all_path,
                           gensim.corpora.Dictionary.load(DICTIONARY_PATH))

evaluation_data = EvaluationData(data_fetcher, comment_id_to_author_dict, author_to_pos_dict)
evaluation_dataset = EvaluationDataset()

In [None]:
def load_deep_conn_model(path):
    checkpoint = torch.load(path)
    config = checkpoint['config']
    state_dict = checkpoint['state_dict']
    latent_factors_user = config['user_latent_factors1']
    del config['pairwise']
    del config['learning_rate']
    del config['batch_size']
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove `module.`
        new_state_dict[name] = v
    # load params

    model = DeepCoNNFM(**config)
    model_dict = model.state_dict()
    model_dict.update(new_state_dict)
    model.load_state_dict(model_dict)
    return latent_factors_user, model


In [None]:
latent_factors_user, model = load_deep_conn_model(deepconn_path)

In [None]:
from gensim.models import KeyedVectors
keyed_vectors = KeyedVectors(vector_size=latent_factors_user)

In [None]:
model.cuda()
model.eval()

In [None]:
for author_id in tqdm(validation_authors):
    user_rep = data_fetcher.get_user_representation(author_id, 'asd')
    user_content, user_emb, user_emb_offsets = evaluation_data.get_author_data(author_id, user_rep)
    test = user_content.unsqueeze(0)[0]
    user_emb = model.get_user_rep(user_content.unsqueeze(0).cuda())
    keyed_vectors.add(str(author_id), user_emb.cpu().detach().numpy()[0])

In [None]:
keyed_vectors.save_word2vec_format(deepconn_folder_path + 'keyed_vectors.txt')

In [None]:
print('Output Path:', deepconn_folder_path + 'keyed_vectors.txt')