In [1]:
import os
import sys
os.chdir('../')
sys.path.append('../')

import torch
from utils.utils import evaluate,train,prepare,my_collate
from utils.MIND import MIND_news
from models.SFI_FIM import SFIModel_pipeline1
from torchtext.vocab import GloVe

In [14]:
hparams = {
    'scale':'large',
    'mode':'train',
    'name':'sfi-fim',
    'batch_size':100,
    'title_size':20,
    'his_size':50,
    'npratio':4,
    'dropout_p':0.2,
    'embedding_dim':300,
    'filter_num':150,
    'epochs':1,
    'metrics':'group_auc,mean_mrr,ndcg@5,ndcg@10',
    'device':'cpu',
    'attrs': ['title'],
    'integration':'harmony',
    'k':10,
    'save_step':0,
    'save_each_epoch':True,
    'train_embedding':True
}

device = torch.device(hparams['device'])
# torch.cuda.set_device(hparams['device'])

path='/home/peitian_zhang/Data/MIND'
news_file_train = path+'/MIND'+hparams['scale']+'_train/news.tsv'
news_file_test = path+'/MIND'+hparams['scale']+'_dev/news.tsv'
behavior_file_train = path+'/MIND'+hparams['scale']+'_train/behaviors.tsv'
behavior_file_test = path+'/MIND'+hparams['scale']+'_dev/behaviors.tsv'
mind_news = MIND_news(hparams, news_file_train)
news_loader = torch.utils.data.DataLoader(mind_news,batch_size=hparams['batch_size'],pin_memory=True,num_workers=8,drop_last=False,collate_fn=my_collate)

vocab = mind_news.vocab
embedding = GloVe(dim=300,cache='.vector_cache')
vocab.load_vectors(embedding)

In [4]:
record = next(iter(news_loader))

In [6]:
record['candidate_title'].shape

torch.Size([100, 1, 20])

In [3]:
hparams['select'] = 'pipeline'
sfiModel_pipeline1 = SFIModel_pipeline1(hparams, vocab=vocab, pipeline=True).to(device)

sfiModel_pipeline1.load_state_dict(torch.load('/home/peitian_zhang/Codes/News-Recommendation/models/model_params/sfi-fim-pipeline_large_epoch1_[hs=50,topk=30].model'))
sfiModel_pipeline1.cdd_size = 1

In [4]:
news_num_dict = {
    'small': 51282,
    'large': 101527
}
news_num = 101527

model = sfiModel_pipeline1
news_reprs = torch.zeros((news_num + 1,model.filter_num))
news_embeddings = torch.zeros((news_num + 1,model.signal_length,model.level,model.filter_num))

In [6]:
from tqdm import tqdm

for i,x in tqdm(enumerate(news_loader)):
    embedding, repr = sfiModel_pipeline1(x)
    for i in range(embedding.shape[0]):
        news_reprs[x['news_id'][i]] = repr[i].to('cpu')
        news_embeddings[x['news_id'][i]] = embedding[i].to('cpu')

1016it [00:41, 24.77it/s]


In [17]:
torch.save(news_reprs, 'data/tensors/news_reprs_{}_{}.tensor'.format(hparams['scale'],hparams['mode']))
torch.save(news_embeddings, 'data/tensors/news_embeddings_{}_{}.tensor'.format(hparams['scale'],hparams['mode']))

In [2]:
from models.SFI_FIM import SFIModel_pipeline2

In [3]:
hparams = {
    'scale':'large',
    'mode':'train',
    'name':'sfi-fim',
    'batch_size':100,
    'title_size':20,
    'his_size':50,
    'npratio':4,
    'dropout_p':0.2,
    'embedding_dim':300,
    'filter_num':150,
    'epochs':1,
    'metrics':'group_auc,mean_mrr,ndcg@5,ndcg@10',
    'device':'cuda:0',
    'attrs': ['title'],
    'integration':'harmony',
    'k':10,
    'save_step':0,
    'save_each_epoch':True,
    'train_embedding':True,
    'news_id':True
}
device = torch.device(hparams['device'])
vocab,loader_train,loader_test = prepare(hparams)

In [7]:
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
class SFIModel_pipeline2(nn.Module):
    def __init__(self,hparams,vocab):
        super().__init__()
        self.name = hparams['name']
        self.metrics = hparams['metrics']

        self.cdd_size = (hparams['npratio'] + 1) if hparams['npratio'] > 0 else 1
        self.his_size =hparams['his_size']
        self.batch_size = hparams['batch_size']
        
        # concatenate category embedding and subcategory embedding
        self.signal_length = hparams['title_size']# + 1 + 1
        self.filter_num = hparams['filter_num']
        self.k = hparams['k']
        self.level = 3

        self.device = hparams['device']

        self.news_reprs = torch.load('data/tensors/news_reprs_{}_{}.tensor'.format(hparams['scale'],hparams['mode'])).to(self.device)
        self.news_embeddings = torch.load('data/tensors/news_embeddings_{}_{}.tensor'.format(hparams['scale'],hparams['mode'])).to(self.device)

        # elements in the slice along dim will sum up to 1 
        self.softmax = nn.Softmax(dim=-1)
        self.learningToRank = nn.Linear(self.his_size,1)
        self.SeqCNN3D = nn.Sequential(
            nn.Conv3d(in_channels=3,out_channels=32,kernel_size=[3,3,3],padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=[3,3,3],stride=[3,3,3]),
            nn.Conv3d(in_channels=32,out_channels=16,kernel_size=[3,3,3],padding=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=[3,3,3],stride=[3,3,3])
        )
        self.learningToRank = nn.Linear(int((int((self.k - 3)/3 + 1) - 3)/3 + 1) * 2 * 2 * 16,1)
    
    def _scaled_dp_attention(self,query,key,value):
        """ calculate scaled attended output of values
        
        Args:
            query: tensor of [*, query_num, key_dim]
            key: tensor of [batch_size, *, key_num, key_dim]
            value: tensor of [batch_size, *, key_num, value_dim]
        
        Returns:
            attn_output: tensor of [batch_size, *, query_num, value_dim]
        """

        # make sure dimension matches
        assert query.shape[-1] == key.shape[-1]
        key = key.transpose(-2,-1)

        attn_weights = torch.matmul(query,key)/torch.sqrt(torch.tensor([self.embedding_dim],dtype=torch.float,device=self.device))
        attn_weights = self.softmax(attn_weights)
        
        attn_output = torch.matmul(attn_weights,value)
        return attn_output.squeeze(dim=-2)

    def _fusion(self,cdd_news_reprs,his_news_reprs):
        """ construct fusion tensor between candidate news repr and history news repr at each dilation level

        Args:
            cdd_news_reprs: tensor of [batch_size, cdd_size, signal_length, level, filter_num]
            his_activated: tensor of [batch_size, cdd_size, k, signal_length, level, filter_num]

        Returns:
            fusion_tensor: tensor of [batch_size, *], where * is derived from MaxPooling with no padding
        """

        # [batch_size, cdd_size, his_size, level, signal_length, signal_length]
        cdd_news_reprs = cdd_news_reprs.transpose(-2,-3)
        his_news_reprs = his_news_reprs.transpose(-2,-3)

        fusion_tensor = torch.matmul(cdd_news_reprs.unsqueeze(dim=2),his_news_reprs.transpose(-2,-1)) / math.sqrt(self.filter_num)
        # print(fusion_tensor.shape)
        
        # reshape the tensor in order to feed into 3D CNN pipeline
        fusion_tensor = fusion_tensor.view(-1, self.k, self.level, self.signal_length, self.signal_length).transpose(1,2)

        fusion_tensor = self.SeqCNN3D(fusion_tensor).view(self.batch_size,self.cdd_size,-1)
        
        return fusion_tensor

    def _click_predictor(self,fusion_tensors):
        """ calculate batch of click probabolity

        Args:
            fusion_tensors: tensor of [batch_size, cdd_size, 320]
        
        Returns:
            score: tensor of [batch_size, npratio+1], which is normalized click probabilty
        """
        score = self.learningToRank(fusion_tensors).squeeze(dim=-1)
        if self.cdd_size > 1:
            score = nn.functional.log_softmax(score,dim=1)
        else:
            score = torch.sigmoid(score).squeeze(dim=-1)
        return score

    def forward(self,x):
        if x['candidate_title'].shape[0] != self.batch_size:
            self.batch_size = x['candidate_title'].shape[0]
    
        cdd_news_id = x['cdd_id'].long().to(self.device)
        cdd_repr = self.news_reprs[cdd_news_id]
        cdd_embedding = self.news_embeddings[cdd_news_id]

        his_news_id = x['his_id'].long().to(self.device)
        his_repr = self.news_reprs[his_news_id]
        his_embedding = self.news_embeddings[his_news_id]

        attn_weights = self.softmax(torch.bmm(cdd_repr, his_repr.transpose(-1,-2)))
        _, attn_weights_sorted = attn_weights.detach().sort(dim=-1, descending=True)
        attn_focus = F.one_hot(attn_weights_sorted[:,:,:self.k], num_classes=self.his_size).float()

        # [bs, cs, k, sl, 3, fn]
        his_activated = torch.matmul(attn_focus, his_embedding.view(self.batch_size, 1, self.his_size,-1)).view(self.batch_size, self.cdd_size, self.k, self.signal_length, self.level, self.filter_num)

        fusion_tensors = self._fusion(cdd_embedding, his_activated)
        
        score = self._click_predictor(fusion_tensors)
        return score

sfiModel_pipeline2 = SFIModel_pipeline2(hparams,vocab).to(device)

In [5]:
record = next(iter(loader_train))

In [8]:
sfiModel_pipeline2(record)

tensor([[-1.5548, -1.5655, -1.6545, -1.6259, -1.6509],
        [-1.5772, -1.7154, -1.5231, -1.6466, -1.5954],
        [-1.6432, -1.5850, -1.5375, -1.6722, -1.6148],
        [-1.5898, -1.6590, -1.6568, -1.5039, -1.6466],
        [-1.6129, -1.6411, -1.6591, -1.5614, -1.5763],
        [-1.7899, -1.8136, -1.7710, -1.3867, -1.3867],
        [-1.5782, -1.5762, -1.6296, -1.7041, -1.5656],
        [-1.5427, -1.5558, -1.5720, -1.7073, -1.6810],
        [-1.5488, -1.5780, -1.6173, -1.6390, -1.6686],
        [-1.5846, -1.6976, -1.5588, -1.6253, -1.5866],
        [-1.5742, -1.5649, -1.5686, -1.7373, -1.6125],
        [-1.8695, -1.9126, -1.4580, -1.4580, -1.4580],
        [-1.6036, -1.6276, -1.5974, -1.6340, -1.5855],
        [-1.6335, -1.6748, -1.6027, -1.5524, -1.5880],
        [-1.6114, -1.6235, -1.6264, -1.5760, -1.6107],
        [-1.5237, -1.6428, -1.6186, -1.6075, -1.6602],
        [-1.6724, -1.6080, -1.7124, -1.4577, -1.6159],
        [-1.6445, -1.5939, -1.5843, -1.6482, -1.5785],
        [-

In [9]:
train(sfiModel_pipeline2, hparams, loader_train, loader_test)

training...
0it [00:00, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 3.40 GiB (GPU 0; 11.75 GiB total capacity; 8.33 GiB already allocated; 1.73 GiB free; 8.93 GiB reserved in total by PyTorch)