# `train_util`

In [None]:
import os
import json
import random

import torch
import numpy as np


def save_checkpoint(state, is_best, checkpoint):
    """Saves model and training parameters at checkpoint + 'last.pth.tar'. If is_best==True, also saves
    checkpoint + 'best.pth.tar'
    Args:
        state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict
        is_best: (bool) True if it is the best model seen till now
        checkpoint: (string) folder where parameters are to be saved
    """
    filepath = os.path.join(checkpoint, 'last.pth.tar')
    if not os.path.exists(checkpoint):
        print("Checkpoint Directory does not exist! Making directory {}".format(checkpoint))
        os.mkdir(checkpoint)
    else:
        print("Checkpoint Directory exists! ")
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'best.pth.tar'))


def save_checkpoint_by_epoch(model, optimizer, epoch, checkpoint):
    """Saves model and training parameters at checkpoint + 'last.pth.tar'. If is_best==True, also saves
    checkpoint + 'best.pth.tar'
    Args:
        state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict
        epoch: (int) epoch no
        checkpoint: (string) folder where parameters are to be saved
    """
    filepath = os.path.join(checkpoint, 'model.ep{0}'.format(epoch))
    if not os.path.exists(checkpoint):
        print("Checkpoint Directory does not exist! Making directory {}".format(checkpoint))
        os.mkdir(checkpoint)
    else:
        print("Checkpoint Directory exists! ")
    try:
      torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, filepath)
      print('Train Done!')
    except:
      print('False')
      torch.save(model.state_dict(), filepath)

    


def load_checkpoint(checkpoint, model, optimizer=None):
    """Loads model parameters (state_dict) from file_path. If optimizer is provided, loads state_dict of
    optimizer assuming it is present in checkpoint.
    Args:
        checkpoint: (string) filename which needs to be loaded
        model: (torch.nn.Module) model for which the parameters are loaded
        optimizer: (torch.optim) optional: resume optimizer from checkpoint
    """
    if not os.path.exists(checkpoint):
        raise ("File doesn't exist {}".format(checkpoint))
    checkpoint = torch.load(checkpoint)
    model.load_state_dict(checkpoint['state_dict'])

    if optimizer:
        optimizer.load_state_dict(checkpoint['optim_dict'])

    return checkpoint

# `eval_util`

In [None]:
from sklearn.metrics import (
    roc_auc_score,
)
import numpy as np


def group_labels(labels, preds, group_keys):
    """Devide labels and preds into several group according to values in group keys.
    Args:
        labels (list): ground truth label list.
        preds (list): prediction score list.
        group_keys (list): group key list.
    Returns:
        all_labels: labels after group.
        all_preds: preds after group.
    """

    all_keys = list(set(group_keys))
    group_labels = {k: [] for k in all_keys}
    group_preds = {k: [] for k in all_keys}

    for l, p, k in zip(labels, preds, group_keys):
        group_labels[k].append(l)
        group_preds[k].append(p)

    all_labels = []
    all_preds = []
    for k in all_keys:
        all_labels.append(group_labels[k])
        all_preds.append(group_preds[k])

    return all_labels, all_preds


def mrr_score(y_true, y_score):
    """Computing mrr score metric.
    Args:
        y_true (numpy.ndarray): ground-truth labels.
        y_score (numpy.ndarray): predicted labels.
    Returns:
        numpy.ndarray: mrr scores.
    """
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)


def ndcg_score(y_true, y_score, k=10):
    """Computing ndcg score metric at k.
    Args:
        y_true (numpy.ndarray): ground-truth labels.
        y_score (numpy.ndarray): predicted labels.
    Returns:
        numpy.ndarray: ndcg scores.
    """
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def dcg_score(y_true, y_score, k=10):
    """Computing dcg score metric at k.
    Args:
        y_true (numpy.ndarray): ground-truth labels.
        y_score (numpy.ndarray): predicted labels.
    Returns:
        numpy.ndarray: dcg scores.
    """
    k = min(np.shape(y_true)[-1], k)
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def cal_metric(labels, preds, metrics):
    """Calculate metrics,such as auc, logloss.
    FIXME:
        refactor this with the reco metrics and make it explicit.
    """
    res = {}
    for metric in metrics:
        if metric == "auc":
            tmp_labels, tmp_preds = [], []
            for l, p in zip(labels, preds):
                tmp_labels += l
                tmp_preds += p
            auc = roc_auc_score(np.asarray(tmp_labels), np.asarray(tmp_preds))
            res["auc"] = round(auc, 4)
        elif metric == "mean_mrr":
            mean_mrr = np.mean(
                [
                    mrr_score(each_labels, each_preds)
                    for each_labels, each_preds in zip(labels, preds)
                ]
            )
            res["mean_mrr"] = round(mean_mrr, 4)
        elif metric.startswith("ndcg"):  # format like:  ndcg@2;4;6;8
            ndcg_list = [1, 2]
            ks = metric.split("@")
            if len(ks) > 1:
                ndcg_list = [int(token) for token in ks[1].split(";")]
            for k in ndcg_list:
                ndcg_temp = np.mean(
                    [
                        ndcg_score(each_labels, each_preds, k)
                        for each_labels, each_preds in zip(labels, preds)
                    ]
                )
                res["ndcg@{0}".format(k)] = round(ndcg_temp, 4)
        elif metric == "group_auc":
            auc_list = []
            for each_labels, each_preds in zip(labels, preds):
                try:
                    x = roc_auc_score(each_labels, each_preds)
                    auc_list.append(x)
                except:
                    print("There are only zero labels")
                    auc_list.append(0.0)
            group_auc = np.mean(
                auc_list
            )
            res["group_auc"] = round(group_auc, 4)
        else:
            raise ValueError("not define this metric {0}".format(metric))
    return res

# `gather`

In [None]:
import os
import argparse
from tqdm import tqdm
import json
import scipy.stats as ss
import numpy as np
import pandas as pd
import math
import torch



def gather(output_path, input_file, flag, validate=False, save=True): ## ('result/', validate=True, save=False)
    preds = []
    labels = []
    imp_indexes = []

  
    with open(output_path + input_file, 'r', encoding='utf-8') as f:
        cur_result = json.load(f)
    imp_indexes += cur_result['imp']
    labels += cur_result['labels']

    preds += cur_result['preds']
    all_keys = list(set(imp_indexes))
    group_labels = {k: [] for k in all_keys}
    group_preds = {k: [] for k in all_keys}

    for l, p, k in zip(labels, preds, imp_indexes):
        group_labels[k].append(l)
        group_preds[k].append(p)
    
    if validate:
        all_labels = []
        all_preds = []
        for k in all_keys:
            all_labels.append(group_labels[k])
            all_preds.append(group_preds[k])
        
        metric_list = [x.strip() for x in "group_auc || mean_mrr || ndcg@5;10".split("||")]
        ret = cal_metric(all_labels, all_preds, metric_list)
        for metric, val in ret.items():
            print("Eval - {}: {}".format(metric, val))

    if save:
        final_arr = []
        for k in group_preds.keys():
            new_row = []
            new_row.append(k)
            new_row.append(','.join(list(map(str, np.array(group_labels[k]).astype(int)))))
            new_row.append(','.join(list(map(str, np.array(group_preds[k]).astype(float)))))
            
            rank = ss.rankdata(-np.array(group_preds[k])).astype(int).tolist()
            new_row.append('[' + ','.join(list(map(str, rank))) + ']')
            
            assert(len(rank) == len(group_labels[k]))
            
            final_arr.append(new_row)
        
        fdf = pd.DataFrame(final_arr, columns=['impression', 'labels', 'preds', 'ranks'])
        fdf.drop(columns=['labels', 'ranks']).to_csv(output_path + 'score-{}.txt'.format(flag), sep=' ', index=False)
        fdf.drop(columns=['labels', 'preds']).to_csv(output_path + 'result-{}.txt'.format(flag), header=None, sep=' ', index=False)

# `config`

In [None]:
import json
import pickle
import numpy as np

class ModelConfig():
    def __init__(self, root):

      tracks_dict = json.load(open('{}/tracks_dict.jsonl'.format(root), 'r', encoding='utf-8'))
      self.tracks_num = len(tracks_dict)
      self.word_emb = np.load('{}/emb.npy'.format(root))
      self.word_num = len(self.word_emb)

      self.pos_hist_length = 30
      self.max_lyric_len = 100
      self.neg_count = 4
      self.word_dim = 300
      self.hidden_size = 300
      self.head_num = 6
      self.dropout = 0.2

      return None

# `modules`

In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class SelfAttend(nn.Module):
    def __init__(self, embedding_size: int) -> None:
        super(SelfAttend, self).__init__()

        self.h1 = nn.Sequential(
            nn.Linear(embedding_size, 200),
            nn.Tanh()
        )
        
        self.gate_layer = nn.Linear(200, 1)

    def forward(self, seqs, seq_masks=None):
        """
        :param seqs: shape [batch_size, seq_length, embedding_size]
        :param seq_lens: shape [batch_size, seq_length]
        :return: shape [batch_size, seq_length, embedding_size]
        """
        gates = self.gate_layer(self.h1(seqs)).squeeze(-1)
        if seq_masks is not None:
            gates = gates.masked_fill(seq_masks == 0, -1e9)
        p_attn = F.softmax(gates, dim=-1)
        p_attn = p_attn.unsqueeze(-1)
        h = seqs * p_attn
        output = torch.sum(h, dim=1)
        return output

class TitleEncoder(nn.Module):
    def __init__(self, cfg):
        super(TitleEncoder, self).__init__()
        self.cfg = cfg
        self.word_embedding = nn.Embedding.from_pretrained(torch.FloatTensor(cfg.word_emb), freeze=False)

        self.mh_self_attn = nn.MultiheadAttention(
            cfg.hidden_size, num_heads=cfg.head_num
        )
        self.word_self_attend = SelfAttend(cfg.hidden_size)

        self.user_mh_self_attn = nn.MultiheadAttention(
            cfg.hidden_size, num_heads=cfg.head_num
        )
        self.pos_self_attend = SelfAttend(cfg.hidden_size)

        self.dropout = nn.Dropout(cfg.dropout)
        self.word_layer_norm = nn.LayerNorm(cfg.hidden_size)
        self.user_layer_norm = nn.LayerNorm(cfg.hidden_size)

    def _extract_hidden_rep(self, seqs):
        """
        Encoding
        :param seqs: [*, seq_length]
        :param seq_lens: [*]
        :return: Tuple, (1) [*, seq_length, hidden_size] (2) [*, seq_length];
        """
        embs = self.word_embedding(seqs)
        X = self.dropout(embs)

        X = X.permute(1, 0, 2)
        output, _ = self.mh_self_attn(X, X, X)
        output = output.permute(1, 0, 2)
        output = self.dropout(output)
        X = X.permute(1, 0, 2)

        return self.word_layer_norm(output + X)

    def encode_news(self, seqs):
        """
        Args:
            seqs: [*, max_news_len]
            seq_lens: [*]
        Returns:
            [*, hidden_size]
        """
        hiddens = self._extract_hidden_rep(seqs)

        # [*, hidden_size]
        self_attend = self.word_self_attend(hiddens)

        return self_attend

    def encode_user(self, seqs):
        """
        Args:
            seqs: [*, max_hist_len, hidden_size]
        Returns:
            [*, hidden_size]
        """
        user_mh_self_attn = self.user_mh_self_attn
        news_self_attend = self.pos_self_attend

        hiddens = seqs.permute(1, 0, 2)
        user_hiddens, _ = user_mh_self_attn(hiddens, hiddens, hiddens)
        user_hiddens = user_hiddens.permute(1, 0, 2)

        residual_sum = self.user_layer_norm(user_hiddens + seqs)
        user_title_hidden = news_self_attend(residual_sum)

        return user_title_hidden


class NodesEncoder(nn.Module):
    def __init__(self, cfg):
        super(NodesEncoder, self).__init__()
        self.cfg = cfg
        
        self.user_mh_self_attn = nn.MultiheadAttention(
            cfg.hidden_size, num_heads=cfg.head_num)
        
        self.nodes_mh_self_attn = nn.MultiheadAttention(
            cfg.hidden_size, num_heads=cfg.head_num)
        
        self.pos_self_attend = SelfAttend(cfg.hidden_size)

        self.dropout = nn.Dropout(cfg.dropout)
        self.user_layer_norm = nn.LayerNorm(cfg.hidden_size)

    def forward(self, pos): ## neg, pos_nodes, neg_nodes
        """
        Args:
            seqs: [*, max_hist_len, hidden_size]
        Returns:
            [*, hidden_size]
        """

        pos_permuted = pos.permute(1, 0, 2)
        pos_hiddens, _ = self.user_mh_self_attn(pos_permuted, pos_permuted, pos_permuted)
        pos_hiddens = pos_hiddens.permute(1, 0, 2)
        pos_residual = self.user_layer_norm(pos_hiddens + pos)

        pos_s = self.pos_self_attend(pos_residual)


        return pos_s ## pos_s, pos_s_nodes, neg_s, neg_s_nodes, pos_c, pos_c_nodes, neg_c, neg_c_nodes

class MaskedSelfAttend(nn.Module):
    def __init__(self, hidden_size, mask_len) -> None:
        super(MaskedSelfAttend, self).__init__()

        # self.query = nn.Linear(cfg.hidden_size, cfg.hidden_size)
        # self.key = nn.Linear(cfg.hidden_size, cfg.hidden_size)
        # self.value = nn.Linear(cfg.hidden_size, cfg.hidden_size)
        self.mask = nn.Parameter(torch.eye(mask_len) == 1, requires_grad=False)
        self.hidden_size = hidden_size

    def forward(self, q):
        # q (batch_size, seq_len, hidden_size)
        
        k = q.permute(0, 2, 1)
        sim = torch.matmul(q, k) / math.sqrt(self.hidden_size)
        sim = torch.softmax(sim.masked_fill_(self.mask, -1e9), dim=-1)
        output = torch.matmul(sim, q)

        return output

class Multihead_bandti(nn.Module):

    def __init__(self, cfg):

        super(Multihead_bandti, self).__init__()

        self.head_num = cfg.head_num
        self.head_dim = cfg.hidden_size // cfg.head_num
        self.hidden_size = cfg.hidden_size
        
        self.policy_1 = nn.Sequential(
            nn.Linear(cfg.hidden_size * 2, cfg.hidden_size),
            nn.Tanh(),
            nn.Linear(cfg.hidden_size, self.head_num))

    
    def forward(self, refer, s1, s2, s3, s4):

        gamma_1 = self.policy_1(refer).unsqueeze(-1)

        s1 = s1.view(-1, refer.size(1), self.head_num, self.head_dim)
        final = gamma_1 * s1
        final = final.reshape(-1, refer.size(1), self.hidden_size)

        return final

# `pnrec`

In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class PNRec(nn.Module):
    def __init__(self, cfg):
        super(PNRec, self).__init__()

        self.title_encoder = TitleEncoder(cfg)
        self.news_encoder = NodesEncoder(cfg)
        self.cfg = cfg
        self.policy_pos_s = nn.Sequential(
            nn.Linear(cfg.hidden_size * 2, cfg.hidden_size),
            nn.Tanh(),
            nn.Linear(cfg.hidden_size, 1),)
 

        self.news_embedding = nn.Embedding(cfg.tracks_num, cfg.hidden_size)

        self.title_self_attend = SelfAttend(cfg.hidden_size)

    def forward(self, data, test_mode=False):
        neg_num = self.cfg.neg_count
        if test_mode:
            neg_num = 0

        target_news = data[3].reshape(-1, self.cfg.max_lyric_len)
        target_news = self.title_encoder.encode_news(target_news).reshape(-1, neg_num + 1, self.cfg.hidden_size)
        target_all = target_news

        pos_his = data[4].reshape(-1, self.cfg.max_lyric_len)
        pos_his = self.title_encoder.encode_news(pos_his).reshape(-1, self.cfg.pos_hist_length, self.cfg.hidden_size)

        title_v = self.title_self_attend(pos_his)
        title_v = title_v.repeat(1, neg_num + 1).view(-1, neg_num + 1, self.cfg.hidden_size)
        
        pos_s= self.news_encoder(pos_his)

        pos_s = pos_s.repeat(1, neg_num + 1).view(-1, neg_num + 1, self.cfg.hidden_size)
     
        news_states = torch.cat([title_v, target_news], dim=-1)
        gamma_1 = self.policy_pos_s(news_states)
        news_final = gamma_1 * pos_s

        ###return torch.sum(torch.cat([news_final, node_final], dim=-1) * target_all, dim=-1)
        return torch.sum(news_final * target_all, dim=-1)

# `training`

In [None]:
import os
import argparse
import json
import pickle
from tqdm import tqdm
import time
import torch
import numpy as np
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn.functional as F
import logging
from torch.utils.data import Dataset, TensorDataset, DataLoader 
import math
from random import sample

def run(train_dataset, valid_dataset, is_break=False, model_path=None):
    """
    train and evaluate
    :param args: config
    :param rank: process id
    :param device: device
    :param train_dataset: dataset instance of a process
    :return:
    """
    batch_size = 128
    epochs = 10
    lr = 0.001
    weight_decay = 1e-6
    port = 9337
    root = "data"
    mc = ModelConfig(root)
    result_path = 'result/train/'
    checkpoint_path = 'checkpoint/'
    
    # Build Dataloader
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    # Build model.
    model = PNRec(mc)

    # Build optimizer.
    steps_one_epoch = len(train_data_loader)
    train_steps = epochs * steps_one_epoch
    print("Total train steps: ", train_steps)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    
    # Load model if break
    if is_break:
      checkpoint = torch.load(model_path)
      model.load_state_dict(checkpoint['model_state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
      epoch_break = checkpoint['epoch']
      print('Load model done!')

    # Training and validation
    for epoch in range(epoch_break + 1, epochs):

        train(epoch, model, train_data_loader, optimizer, steps_one_epoch)
        # save_checkpoint_by_epoch(model.state_dict(), epoch, 'checkpoint/model-{}.pt'.format(epoch)) 
        save_checkpoint_by_epoch(model, optimizer, epoch, checkpoint_path)  
        validate(result_path, epoch, model, valid_data_loader)       
        gather(result_path, 'tmp-{}.json'.format(epoch), epoch, validate=True, save=True)

def train(epoch, model, loader, optimizer, steps_one_epoch):
    """
    train loop
    :param args: config
    :param epoch: int, the epoch number
    :param gpu_id: int, the gpu id
    :param rank: int, the process rank, equal to gpu_id in this code.
    :param model: gating_model.Model
    :param loader: train data loader.
    :param criterion: loss function
    :param optimizer:
    :param steps_one_epoch: the number of iterations in one epoch
    :return:
    """
    model.train()
    
    model.zero_grad()


    for i, data in tqdm(enumerate(loader), total=len(loader), desc="epoch-{} train".format(epoch)):
        if i >= steps_one_epoch:
            break
        
        pred = model(data).squeeze()
        loss = F.cross_entropy(pred, data[1])

        loss.backward()

        optimizer.step()

        model.zero_grad()


def validate(result_path, epoch, model, valid_data_loader, fast_dev=False, top_k=20):

    model.eval()

    # Setting the tqdm progress bar
    data_iter = tqdm(enumerate(valid_data_loader),
                    desc="epoch_test %d" % epoch,
                    total=len(valid_data_loader),
                    bar_format="{l_bar}{r_bar}")
                        
    with torch.no_grad():
        preds, truths, imp_ids = list(), list(), list()
        for i, data in data_iter:
          
            ## if fast_dev and i > 10:
            ##     break
            
            imp_ids += data[0].numpy().tolist()
            truths += data[1].numpy().tolist()

            pred = model(data, test_mode=True)
            if pred.dim() > 1:
                pred = pred.squeeze()
            try:
                preds += pred.numpy().tolist()
            except:
                preds.append(int(pred.numpy()))

        tmp_dict = {}
        tmp_dict['imp'] = imp_ids
        tmp_dict['labels'] = truths
        tmp_dict['preds'] = preds

        with open(result_path + 'tmp-{}.json'.format(epoch), 'w', encoding='utf-8') as f:
            json.dump(tmp_dict, f)

# ***RUN***

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd '/content/drive/MyDrive/Những năm tháng Đại học/[4] DS300.N11 - Hệ khuyến nghị/ReSys_/DRPN'

/content/drive/.shortcut-targets-by-id/127eMhUqzE2w-Fg1IDb9eLKYQHPsZSeG2/Những năm tháng Đại học/[4] DS300.N11 - Hệ khuyến nghị/ReSys_/DRPN


In [None]:
trainset = torch.load('data/train/train.pt')
devset = torch.load('data/dev/dev.pt')

In [None]:
run(trainset, devset)

Total train steps:  4470


UnboundLocalError: ignored

In [None]:
save_model_path = 'checkpoint/model.ep{}'.format(3)

In [None]:
run(trainset, devset, True, save_model_path)

Total train steps:  4470
Load model done!


epoch-4 train: 100%|██████████| 447/447 [4:03:32<00:00, 32.69s/it]


Checkpoint Directory exists! 
Train Done!


epoch_test 4: 100%|| 106/106 [17:07<00:00,  9.69s/it]


Eval - group_auc: 0.9377
Eval - mean_mrr: 0.2337
Eval - ndcg@5: 0.9546
Eval - ndcg@10: 0.9492


epoch-5 train:   9%|▊         | 38/447 [20:27<3:39:02, 32.13s/it]

In [None]:
# run(trainset, devset)
gather('result/train/', 'tmp-{}.json'.format(0), 0, validate=True, save=True)

# Validate if break

In [None]:
import os
import argparse
import json
import pickle
from tqdm import tqdm
import time
import torch
import numpy as np
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn.functional as F
import logging
from torch.utils.data import Dataset, TensorDataset, DataLoader 
import math
from random import sample

def run_validate_in_train(epoch, valid_dataset):
    """
    train and evaluate
    :param args: config
    :param rank: process id
    :param device: device
    :param train_dataset: dataset instance of a process
    :return:
    """
    batch_size = 128
    epochs = 10
    lr = 0.001
    weight_decay = 1e-6
    port = 9337
    root = "data"
    mc = ModelConfig(root)
    result_path = 'result/train/'
    save_model_path = 'checkpoint/model-{}'.format(epoch)
    print(save_model_path)
    # Build Dataloader
    valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    
    # Load model to validate
    model_cfg = ModelConfig(root)
    model = PNRec(model_cfg)
    pretrained_model = torch.load(save_model_path)
    model.load_state_dict(pretrained_model, strict=False)
    validate(result_path, epoch, model, valid_data_loader)       
    gather(result_path, 'tmp-{}.json'.format(epoch), epoch, validate=True, save=True)


def validate(result_path, epoch, model, valid_data_loader, fast_dev=False, top_k=20):

    model.eval()

    # Setting the tqdm progress bar
    data_iter = tqdm(enumerate(valid_data_loader),
                    desc="epoch_test %d" % epoch,
                    total=len(valid_data_loader),
                    bar_format="{l_bar}{r_bar}")
                        
    with torch.no_grad():
        preds, truths, imp_ids = list(), list(), list()
        for i, data in data_iter:
          
            ## if fast_dev and i > 10:
            ##     break
            
            imp_ids += data[0].numpy().tolist()
            truths += data[1].numpy().tolist()

            pred = model(data, test_mode=True)
            if pred.dim() > 1:
                pred = pred.squeeze()
            try:
                preds += pred.numpy().tolist()
            except:
                preds.append(int(pred.numpy()))

        tmp_dict = {}
        tmp_dict['imp'] = imp_ids
        tmp_dict['labels'] = truths
        tmp_dict['preds'] = preds

        with open(result_path + 'tmp-{}.json'.format(epoch), 'w', encoding='utf-8') as f:
            json.dump(tmp_dict, f) 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/Những năm tháng Đại học/[4] DS300.N11 - Hệ khuyến nghị/ReSys_/DRPN'

In [None]:
epoch = 0 #################################### điền epch dô đây #######################
save_model_path = 'checkpoint/model-{}'.format(epoch)

In [None]:
devset = torch.load('data/dev/dev.pt')
run_validate_in_train(epoch )