In [1]:
import torch
import torch.optim as optim
from torchtext.vocab import FastText
from utils.preprocess import MINDIterator
from utils.utils import getVocab,getLoss,getLabel
from models.NPA import NPAModel

## setting up the *NPA* model

### define paths and hyperparameters, load data

all these hyper parameters are fixed according to the paper [23](https://dl.acm.org/doi/abs/10.1145/3292500.3330665)

- *mode*: data to read (*small*/*large*)

- *batch_size*: size of each minibatch

- *title_size*: max word capacity of title

- *his_size*: max record capacity of click history

- *npratio*: number of negtive sampling

- *dropout_p*: probability of dropout layer

- *filter_num*: number of kernels in 1D CNN, which is also embedding dimension of news/user

- *embedding_dim*: word embedding dimension

- *user_dim*: user id embedding dimension

- *preference_dim*: user preference embedding dimension

In [2]:
news_file = r'D:\Data\NR_data\MINDsmall_train\news.tsv'
behavior_file = r'D:\Data\NR_data\MINDsmall_train\behaviors.tsv'
hparams = {
    'mode':'small',
    'batch_size':5,     #100
    'title_size':20,    #30
    'his_size':20,      #50
    'npratio':3,        #4
    'dropout_p':0.2,
    'filter_num':400,
    'embedding_dim':300,
    'user_dim':50
    'preference_dim':200,
}

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

iterator = MINDIterator(hparams=hparams)
train = iterator.load_data_from_file(news_file,behavior_file)

# torchtext.Vocab.vocab object
vocab = iterator.word_dict
embedding = FastText('simple',cache='.vector_cache')
vocab.load_vectors(embedding)

In [3]:
record = next(train)

In [4]:
npaModel = NPAModel(vocab=vocab,hparams=hparams)

# migrate the model to GPU
npaModel.to(device)

NPAModel(
  (userProject): Linear(in_features=1, out_features=50, bias=True)
  (wordQueryProject): Linear(in_features=50, out_features=200, bias=True)
  (newsQueryProject): Linear(in_features=50, out_features=200, bias=True)
  (wordPrefProject): Linear(in_features=200, out_features=400, bias=True)
  (newsPrefProject): Linear(in_features=200, out_features=400, bias=True)
  (CNN): Conv1d(300, 400, kernel_size=(3,), stride=(1,), padding=(1,))
  (RELU): ReLU()
  (DropOut): Dropout(p=0.2, inplace=False)
)

### a prediction example
*training, testing and evaluating phase will be updated later*

In [5]:
c = npaModel(record)
c

tensor([[-1.3550, -1.3247, -1.5654, -1.3199],
        [-1.4652, -1.1928, -1.7035, -1.2604],
        [-1.3407, -1.3079, -1.4717, -1.4338],
        [-1.3793, -1.3189, -1.6652, -1.2321],
        [-1.2793, -1.6684, -1.2879, -1.3573]], device='cuda:0',
       grad_fn=<SqueezeBackward0>)

In [6]:
loss_func = getLoss(npaModel)
optimizer = optim.Adam(npaModel.parameters(),lr=0.002)
label = getLabel(npaModel,record)
loss = loss_func(c,label)
loss

# for epoch in range(2):
#     for x in train:
#         pred = npaModel(x)
#         label = getLabel(npaModel,x)
#         loss = loss_func(pred,label)
#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()
#     print(loss)

tensor(1.3639, device='cuda:0', grad_fn=<NllLossBackward>)

### a glance of data

In [66]:
candidate = record['candidate_title_batch']
history = record['clicked_title_batch']
label = record['labels']
record['candidate_title_batch'].shape,record['clicked_title_batch'].shape,label

(torch.Size([5, 4, 20]),
 torch.Size([5, 20, 20]),
 tensor([[1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.]]))