In [1]:
import torch
import torch.optim as optim
from torchtext.vocab import FastText
from utils.preprocess import MINDIterator
from utils.utils import getVocab,getLoss,getLabel
from models.NPA import NPAModel
from utils.utils import run_eval

## setting up the *NPA* model

### define paths and hyperparameters, load data

all these hyper parameters are fixed according to the paper [23](https://dl.acm.org/doi/abs/10.1145/3292500.3330665)

- *mode*: data to read (*small*/*large*)

- *batch_size*: size of each minibatch

- *title_size*: max word capacity of title

- *his_size*: max record capacity of click history

- *npratio*: number of negtive sampling

- *dropout_p*: probability of dropout layer

- *filter_num*: number of kernels in 1D CNN, which is also embedding dimension of news/user

- *embedding_dim*: word embedding dimension

- *user_dim*: user id embedding dimension

- *preference_dim*: user preference embedding dimension

In [2]:
news_file = r'D:\Data\NR_data\MINDsmall_train\news.tsv'
behavior_file = r'D:\Data\NR_data\dev\behaviors_small.tsv'
hparams = {
    'mode':'small',
    'batch_size':100,
    'title_size':30,
    'his_size':50,   
    'npratio':4,     
    'dropout_p':0.2,
    'filter_num':400,
    'embedding_dim':300,
    'user_dim':50,
    'preference_dim':200,
    'metrics':'group_auc,ndcg@4,mean_mrr',
}

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

iterator = MINDIterator(hparams=hparams)
train = iterator.load_data_from_file(news_file,behavior_file)

# torchtext.Vocab.vocab object
vocab = iterator.word_dict
embedding = FastText('simple',cache='.vector_cache')
vocab.load_vectors(embedding)

In [3]:
npaModel = NPAModel(vocab=vocab,hparams=hparams)

# migrate the model to GPU
npaModel.to(device).train()

NPAModel(
  (userProject): Linear(in_features=1, out_features=50, bias=True)
  (wordQueryProject): Linear(in_features=50, out_features=200, bias=True)
  (newsQueryProject): Linear(in_features=50, out_features=200, bias=True)
  (wordPrefProject): Linear(in_features=200, out_features=400, bias=True)
  (newsPrefProject): Linear(in_features=200, out_features=400, bias=True)
  (CNN): Conv1d(300, 400, kernel_size=(3,), stride=(1,), padding=(1,))
  (RELU): ReLU()
  (DropOut): Dropout(p=0.2, inplace=False)
)

### training

In [5]:
loss_func = getLoss(npaModel)
optimizer = optim.Adam(npaModel.parameters(),lr=0.0002)

num = 0
for epoch in range(10):
    train = iterator.load_data_from_file(news_file,behavior_file)

    for x in train:
        pred = npaModel(x)
        label = getLabel(npaModel,x)
        loss = loss_func(pred,label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        num+=1
    
    print("epoch:{},loss:{}".format(epoch,loss))

epoch:0,loss:1.5922387838363647
epoch:1,loss:1.5638729333877563
epoch:2,loss:1.583019733428955
epoch:3,loss:1.5708460807800293
epoch:4,loss:1.5582536458969116
epoch:5,loss:1.572494387626648
epoch:6,loss:1.5632704496383667
epoch:7,loss:1.5356812477111816
epoch:8,loss:1.5512973070144653
epoch:9,loss:1.5153663158416748


### test & evaluate

In [6]:
npaModel.eval()

iterator.npratio = -1
test = iterator.load_data_from_file(news_file,behavior_file)

npaModel.npratio = -1
run_eval(npaModel,test)

{'group_auc': 0.6353, 'ndcg@4': 0.3636, 'mean_mrr': 0.3698}