In [1]:
%matplotlib inline
%matplotlib inline
import torch
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
import math
import random
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import tqdm
from matplotlib import pyplot as plt
from copy import deepcopy
import os
import datetime
import pickle
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device("cuda:0")
# device = torch.device("cpu")

In [3]:
session_length = 19
batch_size = 512
plot_num = 500
epochs = 30

In [4]:
# this part is different form POEM
class SessionData(object):
    def __init__(self,session_index,session_id,items_indexes):
        self.session_index = session_index
        self.session_id = session_id
        self.item_list = items_indexes
    def generate_seq_datas(self,session_length,padding_idx=0,predict_length=1):
        sessions = []
        if len(self.item_list)<2:
            self.item_list.append[self.item_list[0]]
        if predict_length==1:
#             # when session length>=3
#             for i in range(1,len(self.item_list)-1):
            # when session length >=2
            for i in range(len(self.item_list)-1):
                if i <session_length:
                    train_data = [0 for _ in range(session_length-i-1)]
                    train_data.extend(self.item_list[:i+1])
                    train_data.append(self.item_list[i+1])
                else:
                    train_data = self.item_list[i+1-session_length:i+1]
                    train_data.append(self.item_list[i+1])
                sessions.append(train_data)
        else:
            # To be continue if necessary
            pass
        return self.session_index,sessions
    def __str__(self):
        info = " session index = {}\n session id = {} \n the length of item list= {} \n the fisrt item index in item list is {}".format(self.session_index,self.session_id,len(self.item_list),self.item_list[0])
        return info
class SessionDataSet(object):
    def __init__(self,train_file,test_file,padding_idx=0):
        super(SessionDataSet,self).__init__()
        self.index_count = 0
        self.session_count = 0
        self.train_count = 0
        self.test_count = 0
        self.max_session_length = 0

        self.padding_idx = padding_idx
        self.item2index = dict()
        self.index2item = dict()
        self.session2index = dict()
        self.index2session = dict()
        self.item_total_num = dict()
        self.item2index["<pad>"] = padding_idx
        self.index2item[padding_idx] = "<pad>"
        self.train_data = self.load_data(train_file)
        print("training set is loaded, # index: ",len(self.item2index.keys()))
        self.train_count = self.session_count
        print("train_session_num",self.train_count)
        self.test_data = self.load_data(test_file)
        print("testing set is loaded, # index: ",len(self.index2item.keys()))
        print("# item",self.index_count)
        self.test_count = self.session_count-self.train_count
        print("# test session:",self.test_count)
        self.all_training_data = []
        self.all_testing_data = []
        self.all_meta_training_data = []
        self.all_meta_testing_data = []
        self.train_session_length = 0
        self.test_session_length = 0
    
    def load_data(self,file_path):
        data =  pickle.load(open(file_path, 'rb'))
        session_ids = data[0]
        session_data = data[1]
        session_label = data[2]

        result_data = []
        lenth = len(session_ids)
        print("# session",lenth)

        last_session_id = session_ids[0]
        
        session_item_indexes = []

        for item_id in session_data[0]:
            if item_id not in self.item2index.keys():
                self.index_count+=1
                self.item2index[item_id] = self.index_count
                self.index2item[self.index_count] = item_id
                self.item_total_num[self.index_count] = 0
            session_item_indexes.append(self.item2index[item_id])
            self.item_total_num[self.item2index[item_id]] += 1
        target_item = session_label[0]
        if target_item not in self.item2index.keys():
            self.index_count+=1
            self.item2index[target_item] = self.index_count
            self.index2item[self.index_count] = target_item
            self.item_total_num[self.index_count] = 0
        session_item_indexes.append(self.item2index[target_item])
        self.item_total_num[self.item2index[target_item]] += 1

        for session_id,items,target_item in zip(session_ids,session_data,session_label):
            if session_id!=last_session_id:

                self.session_count+=1
                self.session2index[last_session_id] = self.session_count
                self.index2session[self.session_count] = last_session_id
                if len(session_item_indexes)>self.max_session_length:
                    self.max_session_length = len(session_item_indexes)
                new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
                result_data.append(new_session)
                last_session_id = session_id
                session_item_indexes = []
                for item_id in items:
                    if item_id not in self.item2index.keys():
                        self.index_count+=1
                        self.item2index[item_id] = self.index_count
                        self.index2item[self.index_count] = item_id
                        self.item_total_num[self.index_count] = 0
                    session_item_indexes.append(self.item2index[item_id])
                    self.item_total_num[self.item2index[item_id]] += 1
                if target_item not in self.item2index.keys():
                    self.index_count+=1
                    self.item2index[target_item] = self.index_count
                    self.index2item[self.index_count] = target_item
                    self.item_total_num[self.index_count] = 0
                session_item_indexes.append(self.item2index[target_item])
                self.item_total_num[self.item2index[target_item]] += 1
            else:
                continue

        self.session_count+=1
        self.session2index[last_session_id] = self.session_count
        new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
        result_data.append(new_session)
        print("loaded")
        print(new_session)
        
        return result_data
    

    def get_batch(self,batch_size,session_length=10,predict_length=1,all_data=None,phase="train",neg_num=1,sampling_mathod="random"):

        if phase == "train":
            if all_data is None:
                all_data = self.get_all_training_data(session_length)
            indexes = np.random.permutation(all_data.shape[0])
            all_data = all_data[indexes]
        else:
            if all_data is None:
                all_data = self.get_all_testing_data(session_length)
        
        sindex = 0
        eindex = batch_size
        while eindex < all_data.shape[0]:
            batch = all_data[sindex: eindex]

            temp = eindex
            eindex = eindex + batch_size
            sindex = temp
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch

        if eindex >= all_data.shape[0]:
            batch = all_data[sindex:]
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch
    
    def divid_and_extend_negative_samples(self,batch_data,session_length,predict_length=1,neg_num=1,method="random"):
        """
        divid and extend negative samples
        """
        neg_items = []
        if method == "random":
            for session_and_target in batch_data:
                neg_item = []
                for i in range(neg_num):
                    rand_item = random.randint(1,self.index_count)
                    while rand_item in session_and_target or rand_item in neg_item:
                        rand_item = random.randint(1,self.index_count)
                    neg_item.append(rand_item)
                neg_items.append(neg_item)
        else:

            total_list = set()
            for session in batch_data:
                for i in session:
                    total_list.add(i) 
            total_list = list(total_list)
            total_list =  sorted(total_list, key=lambda item: self.item_total_num[item],reverse=True)
            for i,session in enumerate(batch_data):
                np.random.choice(total_list)
        session_items = batch_data[:,:session_length]
        target_item = batch_data[:,session_length:]
        neg_items = np.array(neg_items)
        return [session_items,target_item,neg_items]
    
    def get_all_training_data(self,session_length,predict_length=1):
        if len(self.all_training_data)!=0 and self.train_session_length==session_length:
#             print("The build is complete and there is no need to repeat the build")
            return self.all_training_data
        print("Start building the all training dataset")
        all_sessions = []
        for session_data in self.train_data:
            # 前session_length为session，后predict_length为target_item
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_training_data = all_sessions
        self.train_session_length=session_length
        print("The total number of training samples is：",all_sessions.shape)
        return all_sessions
    
    def get_all_testing_data(self,session_length,predict_length=1):
        if len(self.all_testing_data)!=0 and self.test_session_length==session_length:
            return self.all_testing_data
        all_sessions = []
        for session_data in self.test_data:
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_testing_data = all_sessions
        self.test_session_length=session_length
        print("The total number of testing samples is：",all_sessions.shape)
        return all_sessions

    def __getitem__(self,idx):
        pass
    
    def __len__(self):
        pass

In [5]:
# dataset = SessionDataSet(train_file="../../data/retailrocket/train.txt",test_file="../../data/srgnn/retailrocket/test.txt")
# dataset = SessionDataSet(train_file="../../data/diginetica/train.txt",test_file="../../data/srgnn/diginetica/test.txt")
dataset = SessionDataSet(train_file="../../data/yoochoose1_4/train.txt",test_file="../../data/srgnn/yoochoose1_4/test.txt")
# dataset = SessionDataSet(train_file="../../data/yoochoose1_64/train.txt",test_file="../../data/srgnn/yoochoose1_64/test.txt")

# session 749947
loaded
 session index = 294620
 session id = 1131204 
 the length of item list= 2 
 the fisrt item index in item list is 23118
training set is loaded, # index:  48990
train_session_num 294620
# session 28445
loaded
 session index = 306825
 session id = 1582915 
 the length of item list= 6 
 the fisrt item index in item list is 4767
testing set is loaded, # index:  48990
# item 48989
# test session: 12205


In [6]:
def bpr_loss(r):
    return torch.sum(-torch.log(torch.sigmoid(r)))
def get_hit_num(pred,y_truth):
    """
        pred: numpy type(batch_size,k) 
        y_truth: list type (batch_size,groudtruth_num)
    """

    hit_num = 0
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_num += np.sum(pred[i]==value)
    return hit_num

def get_rr(pred,y_truth):
    rr=0.
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_indexes = np.where(pred[i]==value)[0]
            for hit_index in hit_indexes:
                rr += 1/(hit_index+1)
    return rr

def get_dcg(pred,y_truth):
    y_pred_score = np.zeros_like(pred)

    for i in range(len(y_truth)):

        for j,y_pred in enumerate(pred[i]):
            if y_pred == y_truth[i][0]:
                y_pred_score[i][j]=1
    gain = 2 ** y_pred_score - 1
    discounts = np.tile(np.log2(np.arange(pred.shape[1]) + 2),(len(y_truth),1))
    dcg = np.sum(gain / discounts,axis=1)
    return dcg

def get_ndcg(pred,y_truth):
    dcg = get_dcg(pred, y_truth)
    idcg = get_dcg(np.concatenate((y_truth,np.zeros_like(pred)[:,:-1]-1),axis=1), y_truth)
    ndcg = np.sum(dcg / idcg)

    return ndcg

def dcg_score(y_pre, y_true, k):
    y_pre_score = np.zeros(k)
    if len(y_pre) > k:
        y_pre = y_pre[:k]
    for i in range(len(y_pre)):
        pre_tag = y_pre[i]
        if pre_tag in y_true:
            y_pre_score[i] = 1
    gain = 2 ** y_pre_score - 1
    discounts = np.log2(np.arange(k) + 2)
    return np.sum(gain / discounts)


def ndcg_score(y_pre, y_true, k=5):
    dcg = dcg_score(y_pre, y_true, k)
    idcg = dcg_score(y_true, y_true, k)
    return dcg / idcg

loss_function = torch.nn.CrossEntropyLoss()

In [7]:
class NARM(torch.nn.Module):
    def __init__(self, itemNum, hidden_size, embedding_dim, batch_size, layerNum = 1,padding_idx=0,posNum=11, dropout=0.5,embedding_dropout=0.25,activate="tanh"):
        super(NARM, self).__init__()
        self.itemNum = itemNum
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.layerNum = layerNum
        self.embedding_dim = embedding_dim
        self.item_embedding = torch.nn.Embedding(itemNum, self.embedding_dim, padding_idx=padding_idx)
        torch.nn.init.constant_(self.item_embedding.weight[0],0)
        self.embedding_dropout = torch.nn.Dropout(embedding_dropout)
        self.gru = torch.nn.GRU(self.embedding_dim, self.hidden_size, self.layerNum)
        self.a_1 = torch.nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.a_2 = torch.nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.v_t = torch.nn.Linear(self.hidden_size, 1, bias=False)
        self.ct_dropout = torch.nn.Dropout(dropout)
        self.b = torch.nn.Linear(self.embedding_dim, 2 * self.hidden_size, bias=False)
        #self.sf = torch.nn.Softmax()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def forward(self, seq, lengths):
        seq = seq.t()
        hidden = self.init_hidden(seq.size(1))
        embs = self.embedding_dropout(self.item_embedding(seq))
#         print("before",embs.shape)
        embs = pack_padded_sequence(embs, lengths)
#         print("after",embs.shape)
        gru_out, hidden = self.gru(embs, hidden)

        gru_out, lengths = pad_packed_sequence(gru_out)
#         print("after aa",gru_out,lengths)
        # fetch the last hidden state of last timestamp
        ht = hidden[-1]
        gru_out = gru_out.permute(1, 0, 2)

        c_global = ht
        q1 = self.a_1(gru_out.contiguous().view(-1, self.hidden_size)).view(gru_out.size())  
        q2 = self.a_2(ht)

        mask = torch.where(seq.permute(1, 0) > 0, torch.tensor([1.], device = self.device), torch.tensor([0.], device = self.device))
        q2_expand = q2.unsqueeze(1).expand_as(q1)
        q2_masked = mask.unsqueeze(2).expand_as(q1) * q2_expand

        alpha = self.v_t(torch.sigmoid(q1 + q2_masked).view(-1, self.hidden_size)).view(mask.size())
        c_local = torch.sum(alpha.unsqueeze(2).expand_as(gru_out) * gru_out, 1)

        c_t = torch.cat([c_local, c_global], 1)
        c_t = self.ct_dropout(c_t)
        
        item_embs = self.item_embedding.weight[1:]
        scores = torch.matmul(c_t, self.b(item_embs).permute(1, 0))

        return scores

    def init_hidden(self, batch_size):
        return torch.zeros((self.layerNum, batch_size, self.hidden_size), requires_grad=True).to(self.device)
    
    def predict_top_k(self,seq, lengths, k=20):
        seq = seq.t()
        hidden = self.init_hidden(seq.size(1))
        embs = self.item_embedding(seq)
#         print("embs.shape",embs.shape)
        embs = pack_padded_sequence(embs, lengths)
        gru_out, hidden = self.gru(embs, hidden)
        gru_out, lengths = pad_packed_sequence(gru_out)

        # fetch the last hidden state of last timestamp
        ht = hidden[-1]
        gru_out = gru_out.permute(1, 0, 2)

        c_global = ht
        q1 = self.a_1(gru_out.contiguous().view(-1, self.hidden_size)).view(gru_out.size())  
        q2 = self.a_2(ht)

        mask = torch.where(seq.permute(1, 0) > 0, torch.tensor([1.], device = self.device), torch.tensor([0.], device = self.device))
        q2_expand = q2.unsqueeze(1).expand_as(q1)
        q2_masked = mask.unsqueeze(2).expand_as(q1) * q2_expand

        alpha = self.v_t(torch.sigmoid(q1 + q2_masked).view(-1, self.hidden_size)).view(mask.size())
        c_local = torch.sum(alpha.unsqueeze(2).expand_as(gru_out) * gru_out, 1)

        c_t = torch.cat([c_local, c_global], 1)
#         c_t = self.ct_dropout(c_t)
        
        item_embs = self.item_embedding.weight[1:]
        scores = torch.matmul(c_t, self.b(item_embs).permute(1, 0))
        result = torch.topk(scores,k,dim=-1)[1]
        return result

# CIKM S >= 2  
    HR@20=0.63722  MRR@20=0.29634, session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.18599  MRR@1=0.18599  NDCG@1=0.18599
        HR@5=0.42754  MRR@5=0.27494  NDCG@5=0.31293
        HR@10=0.53296  MRR@10=0.28907  NDCG@10=0.34708
        HR@20=0.63722  MRR@20=0.29634  NDCG@20=0.37349
# RR S >= 2   
    HR@20=0.61160  MRR@20=0.34716, session_length=19, hidden_size=100, lr=0.0020, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.25558  MRR@1=0.25558  NDCG@1=0.25558
        HR@5=0.46015  MRR@5=0.33150  NDCG@5=0.36357
        HR@10=0.53946  MRR@10=0.34214  NDCG@10=0.38927
        HR@20=0.61160  MRR@20=0.34716  NDCG@20=0.40753
# RSC64 S >= 2   
    HR@20=0.70543  MRR@20=0.30329, session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.17045  MRR@1=0.17045  NDCG@1=0.17045
        HR@5=0.46873  MRR@5=0.27826  NDCG@5=0.32558
        HR@10=0.59816  MRR@10=0.29570  NDCG@10=0.36761
        HR@20=0.70543  MRR@20=0.30329  NDCG@20=0.39491
# RSC4 S >= 2   
    HR@20=0.71781  MRR@20=0.30821, hyper-parameters: current model hyper-parameters: session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.17330  MRR@1=0.17330  NDCG@1=0.17330
        HR@5=0.47427  MRR@5=0.28242  NDCG@5=0.33010
        HR@10=0.60947  MRR@10=0.30059  NDCG@10=0.37394
        HR@20=0.71781  MRR@20=0.30821  NDCG@20=0.40146

In [8]:
epochs=50
def train(args):
    hidden_size = args["hidden_size"] if "hidden_size" in args.keys() else 100
    embedding_dim = args["embedding_dim"] if "hidden_size" in args.keys() else 100
    dropout = args["dropout"] if "dropout" in args.keys()  else 0.5
    embedding_dropout = args["embedding_dropout"] if "dropout" in args.keys()  else 0.5
    lr = args["lr"] if "lr" in args.keys()  else 1e-3
    session_length = args["session_length"] if "session_length" in args.keys() else 20
    model = NARM(hidden_size=hidden_size, embedding_dim=embedding_dim,itemNum=dataset.index_count+1, batch_size=batch_size,posNum=session_length+1, padding_idx=0, dropout=dropout,embedding_dropout=embedding_dropout).to(device)
    opti = torch.optim.Adam(model.parameters(),lr=lr)
    best_model_hr = 0.0
    best_model_mrr = 0.0
    best_r1m = 0.0
    best_model = None
    first_loss = 0.0
    predict_nums = [1,5,10,20]
    for epoch in range(epochs):
        batch_losses = []
        epoch_losses = []
        for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="train")):
            sessions = torch.tensor(batch_data[0]).to(device)
            target_items = torch.tensor(batch_data[1]).squeeze().to(device)-1
            result_pos = model(sessions,torch.tensor(session_length).unsqueeze(0).repeat(target_items.shape[0]))
            loss = loss_function(result_pos,target_items)
            opti.zero_grad()
            loss.backward()
            opti.step()
            batch_losses.append(loss.cpu().detach().numpy())
            epoch_losses.append(loss.cpu().detach().numpy())
            if i % plot_num == 0:
                time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print("[%s] [%d/%d] %d mean_batch_loss : %0.6f" % (time, epoch+1, epochs, i, np.mean(batch_losses)))
                batch_losses = []
        with torch.no_grad():
            start_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("Start predicting",start_test_time)
            rrs = [0 for _ in range(len(predict_nums))]
            hit_nums = [0 for _ in range(len(predict_nums))]
            ndcgs = [0 for _ in range(len(predict_nums))]
            for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="test")):
                sessions = torch.tensor(batch_data[0]).to(device)
                target_items = np.array(batch_data[1])-1
                y_pred = model.predict_top_k(sessions,torch.tensor(session_length).unsqueeze(0).repeat(target_items.shape[0]),20).cpu().numpy()
#                 print(y_pred[:2],target_items[:2])
                
                for j,predict_num in enumerate(predict_nums):
                    hit_nums[j]+=get_hit_num(y_pred[:,:predict_num],target_items)
                    rrs[j]+=get_rr(y_pred[:,:predict_num],target_items)
                    ndcgs[j]+=get_ndcg(y_pred[:,:predict_num],target_items)
                    
            end_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            
            hrs = [hit_num/len(dataset.all_testing_data) for hit_num in hit_nums]
            mrrs = [rr/len(dataset.all_testing_data) for rr in rrs]
            mndcgs = [ndcg/len(dataset.all_testing_data) for ndcg in ndcgs]
            if hrs[-1] + mrrs[-1] > best_r1m:
                print("change best")
                best_model = deepcopy(model)
                best_model_hr = hrs[-1]
                best_model_mrr = mrrs[-1]
                best_r1m = hrs[-1] + mrrs[-1]
                no_improvement_epoch = 0
            else:
                no_improvement_epoch +=1
            print("testing finish [%s] "%end_test_time)
            for k,predict_num in enumerate(predict_nums):
                print("\tHR@%d=%.5f  MRR@%d=%.5f  NDCG@%d=%.5f"%(predict_num,hrs[k],predict_num,mrrs[k],predict_num,mndcgs[k]))
        if no_improvement_epoch>=patience:
            print("early stopping")
            break
    return best_model,best_model_hr,best_model_mrr

In [9]:
hidden_sizes = [100]
embedding_dims = [100]
dropouts = [0.5]
embedding_dropouts = [0.25]
lrs = [3e-3,2e-3,5e-3]
session_lengths = [19]
patience = 5
best_params = ""
best_all_model = 0.0
best_all_hr = 0.0
best_all_mrr = 0.0
best_all_r1m = 0.0
for session_length in session_lengths:
    for hidden_size in hidden_sizes:
        for embedding_dim in embedding_dims:
            for dropout in dropouts:
                for embedding_dropout in embedding_dropouts:
                    for lr in lrs:
                        args = {}
                        print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,embedding_dim,embedding_dropout,dropout))
                        args["session_length"] = session_length
                        args["hidden_size"] = hidden_size
                        args["embedding_dim"] = embedding_dim
                        args["dropout"] = dropout
                        args["embedding_dropout"] = embedding_dropout
                        args["patience"] = patience
                        args["lr"] = lr
                        best_model,best_model_hr,best_model_mrr = train(args)
                        if best_model_hr + best_model_mrr > best_all_r1m:
                            print("best model change")
                            best_all_r1m = best_model_hr + best_model_mrr
                            best_all_hr = best_model_hr
                            best_all_mrr = best_model_mrr
                            best_all_model = best_model
                            best_params = "current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,embedding_dim,embedding_dropout,dropout)
                        best_model = None
                        print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,embedding_dim,embedding_dropout,dropout))
                        print("current model HR@20=%.5f  MRR@20=%.5f"%(best_model_hr,best_model_mrr))
                        print("the best result so far. HR@20=%.5f  MRR@20=%.5f, %s \n"%(best_model_hr,best_all_mrr,best_params))
print("The best result HR@20=%.5f  MRR@20=%.5f, hyper-parameters: %s. "%(best_all_hr,best_all_mrr,best_params))
print("over.")

current model hyper-parameters: session_length=19, hidden_size=100, lr=0.0030, embedding_dim=100, embedding_dropout=0.25, dropout=0.50

Start building the all training dataset
The total number of training samples is： (749947, 20)
[2019-12-25 20:33:42] [1/50] 0 mean_batch_loss : 19.477875
[2019-12-25 20:33:52] [1/50] 500 mean_batch_loss : 9.517089
[2019-12-25 20:34:02] [1/50] 1000 mean_batch_loss : 8.867698
Start predicting 2019-12-25 20:34:11
The total number of testing samples is： (28445, 20)
change best
testing finish [2019-12-25 20:34:13] 
	HR@1=0.26388  MRR@1=0.26388  NDCG@1=0.26388
	HR@5=0.32491  MRR@5=0.28867  NDCG@5=0.29782
	HR@10=0.34133  MRR@10=0.29088  NDCG@10=0.30315
	HR@20=0.35521  MRR@20=0.29186  NDCG@20=0.30668
[2019-12-25 20:34:13] [2/50] 0 mean_batch_loss : 8.189659
[2019-12-25 20:34:23] [2/50] 500 mean_batch_loss : 7.957202
[2019-12-25 20:34:33] [2/50] 1000 mean_batch_loss : 7.712500
Start predicting 2019-12-25 20:34:42
change best
testing finish [2019-12-25 20:34:44] 

change best
testing finish [2019-12-25 20:42:56] 
	HR@1=0.25323  MRR@1=0.25323  NDCG@1=0.25323
	HR@5=0.45867  MRR@5=0.32990  NDCG@5=0.36202
	HR@10=0.53465  MRR@10=0.34009  NDCG@10=0.38664
	HR@20=0.60826  MRR@20=0.34525  NDCG@20=0.40532
[2019-12-25 20:42:56] [19/50] 0 mean_batch_loss : 4.145279
[2019-12-25 20:43:05] [19/50] 500 mean_batch_loss : 4.036162
[2019-12-25 20:43:15] [19/50] 1000 mean_batch_loss : 4.107106
Start predicting 2019-12-25 20:43:24
testing finish [2019-12-25 20:43:26] 
	HR@1=0.25414  MRR@1=0.25414  NDCG@1=0.25414
	HR@5=0.45678  MRR@5=0.32968  NDCG@5=0.36137
	HR@10=0.53517  MRR@10=0.34021  NDCG@10=0.38679
	HR@20=0.60679  MRR@20=0.34521  NDCG@20=0.40494
[2019-12-25 20:43:27] [20/50] 0 mean_batch_loss : 3.774957
[2019-12-25 20:43:36] [20/50] 500 mean_batch_loss : 4.007042
[2019-12-25 20:43:46] [20/50] 1000 mean_batch_loss : 4.084434
Start predicting 2019-12-25 20:43:55
testing finish [2019-12-25 20:43:57] 
	HR@1=0.25298  MRR@1=0.25298  NDCG@1=0.25298
	HR@5=0.45727  MRR@

[2019-12-25 20:55:05] [10/50] 500 mean_batch_loss : 4.855823
[2019-12-25 20:55:21] [10/50] 1000 mean_batch_loss : 4.874726
Start predicting 2019-12-25 20:55:36
change best
testing finish [2019-12-25 20:55:38] 
	HR@1=0.25632  MRR@1=0.25632  NDCG@1=0.25632
	HR@5=0.44110  MRR@5=0.32507  NDCG@5=0.35399
	HR@10=0.50526  MRR@10=0.33368  NDCG@10=0.37478
	HR@20=0.56709  MRR@20=0.33800  NDCG@20=0.39045
[2019-12-25 20:55:39] [11/50] 0 mean_batch_loss : 4.647040
[2019-12-25 20:55:54] [11/50] 500 mean_batch_loss : 4.717923
[2019-12-25 20:56:09] [11/50] 1000 mean_batch_loss : 4.736305
Start predicting 2019-12-25 20:56:24
change best
testing finish [2019-12-25 20:56:27] 
	HR@1=0.25611  MRR@1=0.25611  NDCG@1=0.25611
	HR@5=0.44127  MRR@5=0.32516  NDCG@5=0.35412
	HR@10=0.51134  MRR@10=0.33463  NDCG@10=0.37689
	HR@20=0.57402  MRR@20=0.33901  NDCG@20=0.39278
[2019-12-25 20:56:27] [12/50] 0 mean_batch_loss : 4.658372
[2019-12-25 20:56:42] [12/50] 500 mean_batch_loss : 4.598761
[2019-12-25 20:56:58] [12/50]

[2019-12-25 21:10:11] [28/50] 1000 mean_batch_loss : 4.006223
Start predicting 2019-12-25 21:10:26
change best
testing finish [2019-12-25 21:10:29] 
	HR@1=0.25590  MRR@1=0.25590  NDCG@1=0.25590
	HR@5=0.46068  MRR@5=0.33208  NDCG@5=0.36414
	HR@10=0.53799  MRR@10=0.34247  NDCG@10=0.38920
	HR@20=0.61030  MRR@20=0.34752  NDCG@20=0.40753
[2019-12-25 21:10:29] [29/50] 0 mean_batch_loss : 3.672319
[2019-12-25 21:10:46] [29/50] 500 mean_batch_loss : 3.944469
[2019-12-25 21:11:04] [29/50] 1000 mean_batch_loss : 3.985588
Start predicting 2019-12-25 21:11:16
change best
testing finish [2019-12-25 21:11:19] 
	HR@1=0.25688  MRR@1=0.25688  NDCG@1=0.25688
	HR@5=0.46128  MRR@5=0.33285  NDCG@5=0.36486
	HR@10=0.53623  MRR@10=0.34283  NDCG@10=0.38908
	HR@20=0.61020  MRR@20=0.34800  NDCG@20=0.40782
[2019-12-25 21:11:19] [30/50] 0 mean_batch_loss : 3.839088
[2019-12-25 21:11:34] [30/50] 500 mean_batch_loss : 3.913616
[2019-12-25 21:11:49] [30/50] 1000 mean_batch_loss : 3.984500
Start predicting 2019-12-25 

Start predicting 2019-12-25 21:24:02
change best
testing finish [2019-12-25 21:24:04] 
	HR@1=0.24619  MRR@1=0.24619  NDCG@1=0.24619
	HR@5=0.43635  MRR@5=0.31737  NDCG@5=0.34706
	HR@10=0.50786  MRR@10=0.32694  NDCG@10=0.37021
	HR@20=0.57669  MRR@20=0.33173  NDCG@20=0.38763
[2019-12-25 21:24:04] [7/50] 0 mean_batch_loss : 4.589965
[2019-12-25 21:24:17] [7/50] 500 mean_batch_loss : 4.585111
[2019-12-25 21:24:32] [7/50] 1000 mean_batch_loss : 4.639550
Start predicting 2019-12-25 21:24:46
change best
testing finish [2019-12-25 21:24:48] 
	HR@1=0.24728  MRR@1=0.24728  NDCG@1=0.24728
	HR@5=0.44103  MRR@5=0.31906  NDCG@5=0.34944
	HR@10=0.51492  MRR@10=0.32904  NDCG@10=0.37346
	HR@20=0.58365  MRR@20=0.33381  NDCG@20=0.39084
[2019-12-25 21:24:48] [8/50] 0 mean_batch_loss : 4.465520
[2019-12-25 21:25:00] [8/50] 500 mean_batch_loss : 4.455627
[2019-12-25 21:25:14] [8/50] 1000 mean_batch_loss : 4.543701
Start predicting 2019-12-25 21:25:28
change best
testing finish [2019-12-25 21:25:30] 
	HR@1=0.2

[2019-12-25 21:37:13] [25/50] 500 mean_batch_loss : 3.885161
[2019-12-25 21:37:27] [25/50] 1000 mean_batch_loss : 4.017419
Start predicting 2019-12-25 21:37:40
testing finish [2019-12-25 21:37:43] 
	HR@1=0.24869  MRR@1=0.24869  NDCG@1=0.24869
	HR@5=0.45259  MRR@5=0.32457  NDCG@5=0.35648
	HR@10=0.53092  MRR@10=0.33507  NDCG@10=0.38186
	HR@20=0.60390  MRR@20=0.34019  NDCG@20=0.40037
[2019-12-25 21:37:43] [26/50] 0 mean_batch_loss : 3.937309
[2019-12-25 21:37:57] [26/50] 500 mean_batch_loss : 3.865547
[2019-12-25 21:38:11] [26/50] 1000 mean_batch_loss : 3.954139
Start predicting 2019-12-25 21:38:23
testing finish [2019-12-25 21:38:26] 
	HR@1=0.24946  MRR@1=0.24946  NDCG@1=0.24946
	HR@5=0.45080  MRR@5=0.32398  NDCG@5=0.35558
	HR@10=0.53036  MRR@10=0.33464  NDCG@10=0.38134
	HR@20=0.60285  MRR@20=0.33970  NDCG@20=0.39971
[2019-12-25 21:38:26] [27/50] 0 mean_batch_loss : 3.513484
[2019-12-25 21:38:40] [27/50] 500 mean_batch_loss : 3.860877
[2019-12-25 21:38:54] [27/50] 1000 mean_batch_loss : 