In [1]:
import torch
import torch.optim as optim
from torchtext.vocab import FastText
from utils.preprocess import MINDIterator
from utils.utils import getVocab,getLoss,getLabel,constructBasicDict
from models.NPA import NPAModel
from utils.utils import run_eval
from tqdm import tqdm

## setting up the *NPA* model

### define paths and hyperparameters, load data

all these hyper parameters are fixed according to the paper [23](https://dl.acm.org/doi/abs/10.1145/3292500.3330665)

- *mode*: data to read (*small*/*large*)

- *batch_size*: size of each minibatch

- *title_size*: max word capacity of title

- *his_size*: max record capacity of click history

- *npratio*: number of negtive sampling

- *dropout_p*: probability of dropout layer

- *filter_num*: number of kernels in 1D CNN, which is also embedding dimension of news/user

- *embedding_dim*: word embedding dimension

- *user_dim*: user id embedding dimension

- *preference_dim*: user preference embedding dimension

In [2]:
hparams = {
    'mode':'small',
    'batch_size':5,
    'title_size':30,
    'his_size':50,   
    'npratio':4,     
    'dropout_p':0.2,
    'filter_num':400,
    'embedding_dim':300,
    'user_dim':50,
    'preference_dim':200,
    'metrics':'group_auc,ndcg@4,mean_mrr',
    'gpu':'cuda:0'
}

# customize your path here
news_file = r'D:\Data\NR_data\dev\news.tsv'
behavior_file_train = r'D:\Data\NR_data\dev\behaviors_train.tsv'
behavior_file_test = r'D:\Data\NR_data\dev\behaviors_test.tsv'

# if user2id,word2id,news2id hasn't been constructed
if not os.path.exists('data/vocab_'+hparams['mode']+'.pkl'):
    constructBasicDict(news_file,behavior_file_train,hparams['mode'])

device = torch.device(hparams['gpu']) if torch.cuda.is_available() else torch.device("cpu")

iterator = MINDIterator(hparams=hparams)

# torchtext.Vocab.vocab object
vocab = iterator.word_dict
embedding = FastText('simple',cache='.vector_cache')
vocab.load_vectors(embedding)

In [3]:
train = iterator.load_data_from_file(news_file,behavior_file_train)
next(train)

{'impression_index_batch': [429, 429, 227, 266, 376],
 'user_index_batch': tensor([[429],
         [429],
         [228],
         [267],
         [377]], device='cuda:0'),
 'clicked_title_batch': tensor([[[    0,     0,     0,  ...,     0,     0,     0],
          [    0,     0,     0,  ...,     0,     0,     0],
          [    0,     0,     0,  ...,     0,     0,     0],
          ...,
          [ 3427,  1969,    31,  ...,     0,     0,     0],
          [   94,  1016,   206,  ...,     0,     0,     0],
          [  234,  1541,    11,  ...,     0,     0,     0]],
 
         [[    0,     0,     0,  ...,     0,     0,     0],
          [    0,     0,     0,  ...,     0,     0,     0],
          [    0,     0,     0,  ...,     0,     0,     0],
          ...,
          [ 3427,  1969,    31,  ...,     0,     0,     0],
          [   94,  1016,   206,  ...,     0,     0,     0],
          [  234,  1541,    11,  ...,     0,     0,     0]],
 
         [[    0,     0,     0,  ...,     0,    

In [None]:
npaModel = NPAModel(vocab=vocab,hparams=hparams)

# migrate the model to GPU
npaModel.to(device).train()

### training

In [None]:
loss_func = getLoss(npaModel)
optimizer = optim.Adam(npaModel.parameters(),lr=0.0002)

for epoch in range(10):
    train = iterator.load_data_from_file(news_file,behavior_file_train)
    tqdm_ = tqdm(train)
    step = 0
    epoch_loss = 0

    for x in tqdm_:
        pred = npaModel(x)
        label = getLabel(npaModel,x)
        loss = loss_func(pred,label)
        epoch_loss += loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        step += 1

        if step % 100 == 0:
            tqdm_.set_description(
                "step {:d} , epoch {:d} , total_loss: {:.4f}, data_loss: {:.4f}".format(step,epoch, epoch_loss / step, loss))

### test & evaluate

In [None]:
npaModel.eval()

iterator.npratio = -1
test = iterator.load_data_from_file(news_file,behavior_file_test)

npaModel.npratio = -1
run_eval(npaModel,test)