In [8]:
import os
import sys
os.chdir('/home/peitian_zhang/Codes/NR')
sys.path.append('/home/peitian_zhang/Codes/NR')

import math
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
from torchtext.vocab import GloVe
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from utils.MIND import MIND_iter,MIND_map
from utils.utils import getLoss,getLabel,constructBasicDict,run_eval,run_train
from models.KNRM import KNRMModel

hparams = {
    'mode':'demo',
    'name':'knrm',
    'train_embedding':False,
    'epochs':10,
    'batch_size':5,
    'title_size':15,
    'his_size':50,
    'npratio':4,
    'embedding_dim':300,
    'metrics':'group_auc,ndcg@5,ndcg@10,mean_mrr',
    'device':'cuda:1',
    'attrs': ['title'],
}

news_file_train = '/home/peitian_zhang/Data/MIND/MIND'+hparams['mode']+'_train/news.tsv'
news_file_test = '/home/peitian_zhang/Data/MIND/MIND'+hparams['mode']+'_dev/news.tsv'
news_file_pair = (news_file_train,news_file_test)

behavior_file_train = '/home/peitian_zhang/Data/MIND/MIND'+hparams['mode']+'_train/behaviors.tsv'
behavior_file_test = '/home/peitian_zhang/Data/MIND/MIND'+hparams['mode']+'_dev/behaviors.tsv'
behavior_file_pair = (behavior_file_train,behavior_file_test)

save_path = 'models/model_params/{}_{}_{}'.format(hparams['name'],hparams['mode'],hparams['epochs']) +'.model'

if not os.path.exists('data/dictionaries/vocab_{}_{}.pkl'.format(hparams['mode'],'_'.join(hparams['attrs']))):
    constructBasicDict(news_file_pair,behavior_file_pair,hparams['mode'],hparams['attrs'])

device = torch.device(hparams['device']) if torch.cuda.is_available() else torch.device("cpu")

dataset_train = MIND_map(hparams=hparams,news_file=news_file_train,behaviors_file=behavior_file_train)
dataset_test = MIND_iter(hparams=hparams,news_file=news_file_test,behaviors_file=behavior_file_test, mode='test')

vocab = dataset_train.vocab
embedding = GloVe(dim=300,cache='.vector_cache')
vocab.load_vectors(embedding)

loader_train = DataLoader(dataset_train,batch_size=hparams['batch_size'],shuffle=True,pin_memory=True,num_workers=3,drop_last=True)
loader_test = DataLoader(dataset_test,batch_size=hparams['batch_size'],pin_memory=True,num_workers=0,drop_last=True)

In [5]:
knrmModel = KNRMModel(hparams=hparams,vocab=vocab).to(device)
knrmModel.train()

KNRMModel(
  (CosSim): CosineSimilarity()
  (softmax): Softmax(dim=-1)
  (learningToRank): Linear(in_features=1000, out_features=1, bias=True)
)

In [6]:
if knrmModel.training:
    print("training...")
    loss_func = getLoss(knrmModel)
    optimizer = optim.Adam(knrmModel.parameters(),lr=0.001)
    knrmModel = run_train(knrmModel,loader_train,optimizer,loss_func,epochs=hparams['epochs'], interval=10)

training...
epoch 0 , step 390 , loss: 1.6246: : 400it [00:05, 77.92it/s]
epoch 1 , step 390 , loss: 1.6589: : 400it [00:04, 89.25it/s]
epoch 2 , step 390 , loss: 1.6277: : 400it [00:04, 89.41it/s]
epoch 3 , step 390 , loss: 1.6328: : 400it [00:04, 84.75it/s]
epoch 4 , step 390 , loss: 1.6247: : 400it [00:04, 89.69it/s]
epoch 5 , step 390 , loss: 1.6207: : 400it [00:04, 91.66it/s]
epoch 6 , step 390 , loss: 1.6116: : 400it [00:04, 87.85it/s]
epoch 7 , step 390 , loss: 1.6418: : 400it [00:04, 85.48it/s]
epoch 8 , step 390 , loss: 1.6070: : 400it [00:04, 89.41it/s]
epoch 9 , step 390 , loss: 1.6135: : 400it [00:04, 83.32it/s]


In [9]:
print("evaluating...")
knrmModel.eval()
knrmModel.cdd_size = 1

run_eval(knrmModel,loader_test)

0it [00:00, ?it/s]evaluating...
3622it [00:13, 265.78it/s]
evaluation results:{'group_auc': 0.5353, 'ndcg@5': 0.2523, 'ndcg@10': 0.3165, 'mean_mrr': 0.2409}


{'group_auc': 0.5353, 'ndcg@5': 0.2523, 'ndcg@10': 0.3165, 'mean_mrr': 0.2409}