In [1]:
%matplotlib inline
%matplotlib inline
import torch
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
import math
import random
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import tqdm
from matplotlib import pyplot as plt
from copy import deepcopy
import os
import datetime
import pickle
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device("cuda:0")
# device = torch.device("cpu")

In [3]:
session_length = 19
batch_size = 512
plot_num = 50000
epochs = 30

In [4]:

class SessionData(object):
    def __init__(self,session_index,session_id,items_indexes):
        self.session_index = session_index
        self.session_id = session_id
        self.item_list = items_indexes

    def generate_seq_datas(self,session_length,padding_idx=0,predict_length=1):
        sessions = []
        if len(self.item_list)<2:
            self.item_list.append[self.item_list[0]]
        if predict_length==1:
            # when session length>=3
            for i in range(1,len(self.item_list)-1):
#             # when session length >=2
#             for i in range(len(self.item_list)-1):
                if i <session_length:
                    train_data = [0 for _ in range(session_length-i-1)]
                    train_data.extend(self.item_list[:i+1])
                    train_data.append(self.item_list[i+1])
                else:
                    train_data = self.item_list[i+1-session_length:i+1]
                    train_data.append(self.item_list[i+1])
                sessions.append(train_data)
        else:

            pass
        return self.session_index,sessions
    def __str__(self):
        info = " session index = {}\n session id = {} \n the length of item list= {} \n the fisrt item index in item list is {}".format(self.session_index,self.session_id,len(self.item_list),self.item_list[0])
        return info
class SessionDataSet(object):
    def __init__(self,train_file,test_file,padding_idx=0):
        super(SessionDataSet,self).__init__()
        self.index_count = 0
        self.session_count = 0
        self.train_count = 0
        self.test_count = 0
        self.max_session_length = 0

        self.padding_idx = padding_idx
        self.item2index = dict()
        self.index2item = dict()
        self.session2index = dict()
        self.index2session = dict()
        self.item_total_num = dict()
        self.item2index["<pad>"] = padding_idx
        self.index2item[padding_idx] = "<pad>"
        self.train_data = self.load_data(train_file)
        print("training set is loaded, # index: ",len(self.item2index.keys()))
        self.train_count = self.session_count
        print("train_session_num",self.train_count)
        self.test_data = self.load_data(test_file)
        print("testing set is loaded, # index: ",len(self.index2item.keys()))
        print("# item",self.index_count)
        self.test_count = self.session_count-self.train_count
        print("# test session:",self.test_count)
        self.all_training_data = []
        self.all_testing_data = []
        self.all_meta_training_data = []
        self.all_meta_testing_data = []
        self.train_session_length = 0
        self.test_session_length = 0
    
    def load_data(self,file_path):
        data =  pickle.load(open(file_path, 'rb'))
        session_ids = data[0]
        session_data = data[1]
        session_label = data[2]

        result_data = []
        lenth = len(session_ids)
        print("# session",lenth)

        last_session_id = session_ids[0]
        
        session_item_indexes = []

        for item_id in session_data[0]:
            if item_id not in self.item2index.keys():
                self.index_count+=1
                self.item2index[item_id] = self.index_count
                self.index2item[self.index_count] = item_id
                self.item_total_num[self.index_count] = 0
            session_item_indexes.append(self.item2index[item_id])
            self.item_total_num[self.item2index[item_id]] += 1
        target_item = session_label[0]
        if target_item not in self.item2index.keys():
            self.index_count+=1
            self.item2index[target_item] = self.index_count
            self.index2item[self.index_count] = target_item
            self.item_total_num[self.index_count] = 0
        session_item_indexes.append(self.item2index[target_item])
        self.item_total_num[self.item2index[target_item]] += 1

        for session_id,items,target_item in zip(session_ids,session_data,session_label):
            if session_id!=last_session_id:

                self.session_count+=1
                self.session2index[last_session_id] = self.session_count
                self.index2session[self.session_count] = last_session_id
                last_session_id = session_id
                if len(session_item_indexes)>self.max_session_length:
                    self.max_session_length = len(session_item_indexes)
                new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
                result_data.append(new_session)
                session_item_indexes = []
                for item_id in items:
                    if item_id not in self.item2index.keys():
                        self.index_count+=1
                        self.item2index[item_id] = self.index_count
                        self.index2item[self.index_count] = item_id
                        self.item_total_num[self.index_count] = 0
                    session_item_indexes.append(self.item2index[item_id])
                    self.item_total_num[self.item2index[item_id]] += 1
                if target_item not in self.item2index.keys():
                    self.index_count+=1
                    self.item2index[target_item] = self.index_count
                    self.index2item[self.index_count] = target_item
                    self.item_total_num[self.index_count] = 0
                session_item_indexes.append(self.item2index[target_item])
                self.item_total_num[self.item2index[target_item]] += 1
            else:
                continue

        self.session_count+=1
        self.session2index[last_session_id] = self.session_count
        new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
        result_data.append(new_session)
        print("loaded")
        print(new_session)
        
        return result_data
    

    def get_batch(self,batch_size,session_length=10,predict_length=1,all_data=None,phase="train",neg_num=1,sampling_mathod="random"):

        if phase == "train":
            if all_data is None:
                all_data = self.get_all_training_data(session_length)
            indexes = np.random.permutation(all_data.shape[0])
            all_data = all_data[indexes]
        else:
            if all_data is None:
                all_data = self.get_all_testing_data(session_length)
        
        sindex = 0
        eindex = batch_size
        while eindex < all_data.shape[0]:
            batch = all_data[sindex: eindex]

            temp = eindex
            eindex = eindex + batch_size
            sindex = temp
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch

        if eindex >= all_data.shape[0]:
            batch = all_data[sindex:]
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch
    
    def divid_and_extend_negative_samples(self,batch_data,session_length,predict_length=1,neg_num=1,method="random"):
        """
        divid and extend negative samples
        """
        neg_items = []
        if method == "random":
            for session_and_target in batch_data:
                neg_item = []
                for i in range(neg_num):
                    rand_item = random.randint(1,self.index_count)
                    while rand_item in session_and_target or rand_item in neg_item:
                        rand_item = random.randint(1,self.index_count)
                    neg_item.append(rand_item)
                neg_items.append(neg_item)
        else:

            total_list = set()
            for session in batch_data:
                for i in session:
                    total_list.add(i) 
            total_list = list(total_list)
            total_list =  sorted(total_list, key=lambda item: self.item_total_num[item],reverse=True)
            for i,session in enumerate(batch_data):
                np.random.choice(total_list)
        session_items = batch_data[:,:session_length]
        target_item = batch_data[:,session_length:]
        neg_items = np.array(neg_items)
        return [session_items,target_item,neg_items]
    
    def get_all_training_data(self,session_length,predict_length=1):
        if len(self.all_training_data)!=0 and self.train_session_length==session_length:
#             print("The build is complete and there is no need to repeat the build")
            return self.all_training_data
        print("Start building the all training dataset")
        all_sessions = []
        for session_data in self.train_data:
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_training_data = all_sessions
        self.train_session_length=session_length
        print("The total number of training samples is：",all_sessions.shape)
        return all_sessions
    
    def get_all_testing_data(self,session_length,predict_length=1):
        if len(self.all_testing_data)!=0 and self.test_session_length==session_length:
            return self.all_testing_data
        all_sessions = []
        for session_data in self.test_data:
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_testing_data = all_sessions
        self.test_session_length=session_length
        print("The total number of testing samples is：",all_sessions.shape)
        return all_sessions

    def __getitem__(self,idx):
        pass
    
    def __len__(self):
        pass

In [5]:
# dataset = SessionDataSet(train_file="../../data/retailrocket_gcsan_my/train.txt",test_file="../../data/retailrocket_gcsan_my/test.txt")
dataset = SessionDataSet(train_file="../../data/diginetica_gcsan_my/train.txt",test_file="../../data/diginetica_gcsan_my/test.txt")
# dataset = SessionDataSet(train_file="../../data/yoochoose1_4_gcsan_my/train.txt",test_file="../../data/yoochoose1_4_gcsan_my/test.txt")
# dataset = SessionDataSet(train_file="../../data/yoochoose1_64_gcsan_my/train.txt",test_file="../../data/yoochoose1_64_gcsan_my/test.txt")

# session 526135
loaded
 session index = 132501
 session id = 598664 
 the length of item list= 5 
 the fisrt item index in item list is 15612
training set is loaded, # index:  40841
train_session_num 132501
# session 44279
loaded
 session index = 143847
 session id = 600240 
 the length of item list= 4 
 the fisrt item index in item list is 2093
testing set is loaded, # index:  40841
# item 40840
# test session: 11346


In [6]:
def bpr_loss(r):
    return torch.sum(-torch.log(torch.sigmoid(r)))
def get_hit_num(pred,y_truth):
    """
        pred: numpy type(batch_size,k) 
        y_truth: list type (batch_size,groudtruth_num)
    """

    hit_num = 0
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_num += np.sum(pred[i]==value)
    return hit_num

def get_rr(pred,y_truth):
    rr=0.
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_indexes = np.where(pred[i]==value)[0]
            for hit_index in hit_indexes:
                rr += 1/(hit_index+1)
    return rr

def get_dcg(pred,y_truth):
    y_pred_score = np.zeros_like(pred)

    for i in range(len(y_truth)):

        for j,y_pred in enumerate(pred[i]):
            if y_pred == y_truth[i][0]:
                y_pred_score[i][j]=1
    gain = 2 ** y_pred_score - 1
    discounts = np.tile(np.log2(np.arange(pred.shape[1]) + 2),(len(y_truth),1))
    dcg = np.sum(gain / discounts,axis=1)
    return dcg

def get_ndcg(pred,y_truth):
    dcg = get_dcg(pred, y_truth)
    idcg = get_dcg(np.concatenate((y_truth,np.zeros_like(pred)[:,:-1]-1),axis=1), y_truth)
    ndcg = np.sum(dcg / idcg)

    return ndcg

def dcg_score(y_pre, y_true, k):
    y_pre_score = np.zeros(k)
    if len(y_pre) > k:
        y_pre = y_pre[:k]
    for i in range(len(y_pre)):
        pre_tag = y_pre[i]
        if pre_tag in y_true:
            y_pre_score[i] = 1
    gain = 2 ** y_pre_score - 1
    discounts = np.log2(np.arange(k) + 2)
    return np.sum(gain / discounts)


def ndcg_score(y_pre, y_true, k=5):
    dcg = dcg_score(y_pre, y_true, k)
    idcg = dcg_score(y_true, y_true, k)
    return dcg / idcg

loss_function = torch.nn.CrossEntropyLoss()

In [7]:
class NARM(torch.nn.Module):
    def __init__(self, itemNum, hidden_size, embedding_dim, batch_size, layerNum = 1,padding_idx=0,posNum=11, dropout=0.5,embedding_dropout=0.25,activate="tanh"):
        super(NARM, self).__init__()
        self.itemNum = itemNum
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.layerNum = layerNum
        self.embedding_dim = embedding_dim
        self.item_embedding = torch.nn.Embedding(itemNum, self.embedding_dim, padding_idx=padding_idx)
        torch.nn.init.constant_(self.item_embedding.weight[0],0)
        self.embedding_dropout = torch.nn.Dropout(embedding_dropout)
        self.gru = torch.nn.GRU(self.embedding_dim, self.hidden_size, self.layerNum)
        self.a_1 = torch.nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.a_2 = torch.nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.v_t = torch.nn.Linear(self.hidden_size, 1, bias=False)
        self.ct_dropout = torch.nn.Dropout(dropout)
        self.b = torch.nn.Linear(self.embedding_dim, 2 * self.hidden_size, bias=False)
        #self.sf = torch.nn.Softmax()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def forward(self, seq, lengths):
        seq = seq.t()
        hidden = self.init_hidden(seq.size(1))
        embs = self.embedding_dropout(self.item_embedding(seq))
#         print("before",embs.shape)
        embs = pack_padded_sequence(embs, lengths)
#         print("after",embs.shape)
        gru_out, hidden = self.gru(embs, hidden)

        gru_out, lengths = pad_packed_sequence(gru_out)
#         print("after aa",gru_out,lengths)
        # fetch the last hidden state of last timestamp
        ht = hidden[-1]
        gru_out = gru_out.permute(1, 0, 2)

        c_global = ht
        q1 = self.a_1(gru_out.contiguous().view(-1, self.hidden_size)).view(gru_out.size())  
        q2 = self.a_2(ht)

        mask = torch.where(seq.permute(1, 0) > 0, torch.tensor([1.], device = self.device), torch.tensor([0.], device = self.device))
        q2_expand = q2.unsqueeze(1).expand_as(q1)
        q2_masked = mask.unsqueeze(2).expand_as(q1) * q2_expand

        alpha = self.v_t(torch.sigmoid(q1 + q2_masked).view(-1, self.hidden_size)).view(mask.size())
        c_local = torch.sum(alpha.unsqueeze(2).expand_as(gru_out) * gru_out, 1)

        c_t = torch.cat([c_local, c_global], 1)
        c_t = self.ct_dropout(c_t)
        
        item_embs = self.item_embedding.weight[1:]
        scores = torch.matmul(c_t, self.b(item_embs).permute(1, 0))

        return scores

    def init_hidden(self, batch_size):
        return torch.zeros((self.layerNum, batch_size, self.hidden_size), requires_grad=True).to(self.device)
    
    def predict_top_k(self,seq, lengths, k=20):
        seq = seq.t()
        hidden = self.init_hidden(seq.size(1))
        embs = self.item_embedding(seq)
#         print("embs.shape",embs.shape)
        embs = pack_padded_sequence(embs, lengths)
        gru_out, hidden = self.gru(embs, hidden)
        gru_out, lengths = pad_packed_sequence(gru_out)

        # fetch the last hidden state of last timestamp
        ht = hidden[-1]
        gru_out = gru_out.permute(1, 0, 2)

        c_global = ht
        q1 = self.a_1(gru_out.contiguous().view(-1, self.hidden_size)).view(gru_out.size())  
        q2 = self.a_2(ht)

        mask = torch.where(seq.permute(1, 0) > 0, torch.tensor([1.], device = self.device), torch.tensor([0.], device = self.device))
        q2_expand = q2.unsqueeze(1).expand_as(q1)
        q2_masked = mask.unsqueeze(2).expand_as(q1) * q2_expand

        alpha = self.v_t(torch.sigmoid(q1 + q2_masked).view(-1, self.hidden_size)).view(mask.size())
        c_local = torch.sum(alpha.unsqueeze(2).expand_as(gru_out) * gru_out, 1)

        c_t = torch.cat([c_local, c_global], 1)
#         c_t = self.ct_dropout(c_t)
        
        item_embs = self.item_embedding.weight[1:]
        scores = torch.matmul(c_t, self.b(item_embs).permute(1, 0))
        result = torch.topk(scores,k,dim=-1)[1]
        return result

# CIKM S >= 3   Total 867.4‬s Avg 17.35s
    HR@20=0.64258  MRR@20=0.29960, hyper-parameters: current model hyper-parameters: session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.18919  MRR@1=0.18919  NDCG@1=0.18919
        HR@5=0.43032  MRR@5=0.27794  NDCG@5=0.31586
        HR@10=0.53732  MRR@10=0.29225  NDCG@10=0.35049
        HR@20=0.64258  MRR@20=0.29960  NDCG@20=0.37716
# RR S >= 3   Total 522.1s Avg 11.60s
    HR@20=0.53992  MRR@20=0.28436, hyper-parameters: current model hyper-parameters: session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.19898  MRR@1=0.19898  NDCG@1=0.19898
        HR@5=0.38931  MRR@5=0.26896  NDCG@5=0.29891
        HR@10=0.46643  MRR@10=0.27927  NDCG@10=0.32387
        HR@20=0.53992  MRR@20=0.28436  NDCG@20=0.34243
        
# RSC64 S >= 3   Total 205.6s Avg 7.34s
    HR@20=0.69375  MRR@20=0.27877, hyper-parameters: current model hyper-parameters: session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.14854  MRR@1=0.14854  NDCG@1=0.14854
        HR@5=0.43585  MRR@5=0.25130  NDCG@5=0.29709
        HR@10=0.58163  MRR@10=0.27086  NDCG@10=0.34434
        HR@20=0.69375  MRR@20=0.27877  NDCG@20=0.37284
# RSC4 S >= 3   Total 2,672.2‬s Avg 92.14s
    HR@20=0.71441  MRR@20=0.28263，session_length=19, hidden_size=100, lr=0.0010, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
        HR@1=0.14575  MRR@1=0.14575  NDCG@1=0.14575
        HR@5=0.45036  MRR@5=0.25469  NDCG@5=0.30324
        HR@10=0.59594  MRR@10=0.27422  NDCG@10=0.35042
        HR@20=0.71441  MRR@20=0.28263  NDCG@20=0.38060

In [8]:
epochs=50
def train(args):
    hidden_size = args["hidden_size"] if "hidden_size" in args.keys() else 100
    embedding_dim = args["embedding_dim"] if "hidden_size" in args.keys() else 100
    dropout = args["dropout"] if "dropout" in args.keys()  else 0.5
    embedding_dropout = args["embedding_dropout"] if "dropout" in args.keys()  else 0.5
    lr = args["lr"] if "lr" in args.keys()  else 1e-3
    session_length = args["session_length"] if "session_length" in args.keys() else 20
    patience = args["patience"] if "patience" in args.keys() else 5
    model = NARM(hidden_size=hidden_size, embedding_dim=embedding_dim,itemNum=dataset.index_count+1, batch_size=batch_size,posNum=session_length+1, padding_idx=0, dropout=dropout,embedding_dropout=embedding_dropout).to(device)
    opti = torch.optim.Adam(model.parameters(),lr=lr)
    best_model_hr = 0.0
    best_model_mrr = 0.0
    best_r1m = 0.0
    best_model = None
    first_loss = 0.0
    predict_nums = [1,5,10,20]
    no_improvement_epoch = 0
    start_train_time = datetime.datetime.now()
    for epoch in range(epochs):
        batch_losses = []
        epoch_losses = []
        for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="train")):
            sessions = torch.tensor(batch_data[0]).to(device)
            target_items = torch.tensor(batch_data[1]).squeeze().to(device)-1
            result_pos = model(sessions,torch.tensor(session_length).unsqueeze(0).repeat(target_items.shape[0]))
            loss = loss_function(result_pos,target_items)
            opti.zero_grad()
            loss.backward()
            opti.step()
            batch_losses.append(loss.cpu().detach().numpy())
            epoch_losses.append(loss.cpu().detach().numpy())
            if i % plot_num == 0:
                time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print("[%s] [%d/%d] %d mean_batch_loss : %0.6f" % (time, epoch+1, epochs, i, np.mean(batch_losses)))
                batch_losses = []
        with torch.no_grad():
            start_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("Start predicting",start_test_time)
            rrs = [0 for _ in range(len(predict_nums))]
            hit_nums = [0 for _ in range(len(predict_nums))]
            ndcgs = [0 for _ in range(len(predict_nums))]
            for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="test")):
                sessions = torch.tensor(batch_data[0]).to(device)
                target_items = np.array(batch_data[1])-1
                y_pred = model.predict_top_k(sessions,torch.tensor(session_length).unsqueeze(0).repeat(target_items.shape[0]),20).cpu().numpy()
#                 print(y_pred[:2],target_items[:2])
                
                for j,predict_num in enumerate(predict_nums):
                    hit_nums[j]+=get_hit_num(y_pred[:,:predict_num],target_items)
                    rrs[j]+=get_rr(y_pred[:,:predict_num],target_items)
                    ndcgs[j]+=get_ndcg(y_pred[:,:predict_num],target_items)
                    
            end_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            
            hrs = [hit_num/len(dataset.all_testing_data) for hit_num in hit_nums]
            mrrs = [rr/len(dataset.all_testing_data) for rr in rrs]
            mndcgs = [ndcg/len(dataset.all_testing_data) for ndcg in ndcgs]
            if hrs[-1] + mrrs[-1] > best_r1m:
                print("change best")
                best_model = deepcopy(model)
                best_model_hr = hrs[-1]
                best_model_mrr = mrrs[-1]
                best_r1m = hrs[-1] + mrrs[-1]
                no_improvement_epoch = 0
            else:
                no_improvement_epoch +=1
            print("testing finish [%s] "%end_test_time)
            for k,predict_num in enumerate(predict_nums):
                print("\tHR@%d=%.5f  MRR@%d=%.5f  NDCG@%d=%.5f"%(predict_num,hrs[k],predict_num,mrrs[k],predict_num,mndcgs[k]))
        if no_improvement_epoch>=patience:
            print("early stopping")
            break
    end_train_time = datetime.datetime.now()
    print("training and testting over, Total time",end_train_time-start_train_time)
    return best_model,best_model_hr,best_model_mrr

In [9]:
hidden_sizes = [10,20,50,100,200,300]
# embedding_dims = [100]
dropouts = [0.5]
embedding_dropouts = [0.25]
lrs = [3e-3]
session_lengths = [20]
patience = 5
best_params = ""
best_all_model = 0.0
best_all_hr = 0.0
best_all_mrr = 0.0
best_all_r1m = 0.0
for session_length in session_lengths:
    for hidden_size in hidden_sizes:
        for dropout in dropouts:
            for embedding_dropout in embedding_dropouts:
                for lr in lrs:
                    args = {}
                    print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,hidden_size,embedding_dropout,dropout))
                    args["session_length"] = session_length
                    args["hidden_size"] = hidden_size
                    args["embedding_dim"] = hidden_size
                    args["dropout"] = dropout
                    args["embedding_dropout"] = embedding_dropout
                    args["patience"] = patience
                    args["lr"] = lr
                    best_model,best_model_hr,best_model_mrr = train(args)
                    if best_model_hr + best_model_mrr > best_all_r1m:
                        print("best model change")
                        best_all_r1m = best_model_hr + best_model_mrr
                        best_all_hr = best_model_hr
                        best_all_mrr = best_model_mrr
                        best_all_model = best_model
                        best_params = "current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,hidden_size,embedding_dropout,dropout)
                    best_model = None
                    print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f, embedding_dim=%d, embedding_dropout=%.2f, dropout=%.2f\n" % (session_length,hidden_size,lr,hidden_size,embedding_dropout,dropout))
                    print("current model HR@20=%.5f  MRR@20=%.5f"%(best_model_hr,best_model_mrr))
                    print("the best result so far. HR@20=%.5f  MRR@20=%.5f, %s \n"%(best_model_hr,best_all_mrr,best_params))
print("The best result HR@20=%.5f  MRR@20=%.5f, hyper-parameters: %s. "%(best_all_hr,best_all_mrr,best_params))
print("over.")

current model hyper-parameters: session_length=20, hidden_size=10, lr=0.0030, embedding_dim=10, embedding_dropout=0.25, dropout=0.50

Start building the all training dataset
The total number of training samples is： (526135, 21)
[2020-03-02 15:04:29] [1/50] 0 mean_batch_loss : 13.777618
Start predicting 2020-03-02 15:04:41
The total number of testing samples is： (44279, 21)
change best
testing finish [2020-03-02 15:04:44] 
	HR@1=0.00346  MRR@1=0.00346  NDCG@1=0.00346
	HR@5=0.00976  MRR@5=0.00565  NDCG@5=0.00667
	HR@10=0.01482  MRR@10=0.00632  NDCG@10=0.00829
	HR@20=0.02279  MRR@20=0.00687  NDCG@20=0.01031
[2020-03-02 15:04:44] [2/50] 0 mean_batch_loss : 10.182892
Start predicting 2020-03-02 15:04:56
change best
testing finish [2020-03-02 15:04:59] 
	HR@1=0.00813  MRR@1=0.00813  NDCG@1=0.00813
	HR@5=0.02080  MRR@5=0.01244  NDCG@5=0.01450
	HR@10=0.03105  MRR@10=0.01378  NDCG@10=0.01778
	HR@20=0.04569  MRR@20=0.01478  NDCG@20=0.02147
[2020-03-02 15:04:59] [3/50] 0 mean_batch_loss : 9.72779

Start predicting 2020-03-02 15:10:35
testing finish [2020-03-02 15:10:38] 
	HR@1=0.04819  MRR@1=0.04819  NDCG@1=0.04819
	HR@5=0.15594  MRR@5=0.08469  NDCG@5=0.10225
	HR@10=0.24156  MRR@10=0.09594  NDCG@10=0.12975
	HR@20=0.34951  MRR@20=0.10333  NDCG@20=0.15693
[2020-03-02 15:10:38] [26/50] 0 mean_batch_loss : 7.160785
Start predicting 2020-03-02 15:10:50
change best
testing finish [2020-03-02 15:10:53] 
	HR@1=0.04578  MRR@1=0.04578  NDCG@1=0.04578
	HR@5=0.15822  MRR@5=0.08360  NDCG@5=0.10197
	HR@10=0.24361  MRR@10=0.09488  NDCG@10=0.12946
	HR@20=0.35369  MRR@20=0.10247  NDCG@20=0.15723
[2020-03-02 15:10:53] [27/50] 0 mean_batch_loss : 7.068726
Start predicting 2020-03-02 15:11:05
testing finish [2020-03-02 15:11:08] 
	HR@1=0.04573  MRR@1=0.04573  NDCG@1=0.04573
	HR@5=0.15775  MRR@5=0.08358  NDCG@5=0.10185
	HR@10=0.24513  MRR@10=0.09515  NDCG@10=0.13002
	HR@20=0.35279  MRR@20=0.10256  NDCG@20=0.15715
[2020-03-02 15:11:08] [28/50] 0 mean_batch_loss : 6.811332
Start predicting 2020-03-02 

Start predicting 2020-03-02 15:16:37
change best
testing finish [2020-03-02 15:16:40] 
	HR@1=0.09049  MRR@1=0.09049  NDCG@1=0.09049
	HR@5=0.25854  MRR@5=0.14926  NDCG@5=0.17630
	HR@10=0.36026  MRR@10=0.16271  NDCG@10=0.20906
	HR@20=0.47332  MRR@20=0.17054  NDCG@20=0.23762
[2020-03-02 15:16:40] [15/50] 0 mean_batch_loss : 6.030202
Start predicting 2020-03-02 15:16:52
change best
testing finish [2020-03-02 15:16:55] 
	HR@1=0.09054  MRR@1=0.09054  NDCG@1=0.09054
	HR@5=0.26344  MRR@5=0.15078  NDCG@5=0.17864
	HR@10=0.36774  MRR@10=0.16461  NDCG@10=0.21227
	HR@20=0.48334  MRR@20=0.17257  NDCG@20=0.24143
[2020-03-02 15:16:55] [16/50] 0 mean_batch_loss : 6.019308
Start predicting 2020-03-02 15:17:07
change best
testing finish [2020-03-02 15:17:10] 
	HR@1=0.08973  MRR@1=0.08973  NDCG@1=0.08973
	HR@5=0.26326  MRR@5=0.15065  NDCG@5=0.17852
	HR@10=0.36796  MRR@10=0.16457  NDCG@10=0.21232
	HR@20=0.48483  MRR@20=0.17263  NDCG@20=0.24181
[2020-03-02 15:17:10] [17/50] 0 mean_batch_loss : 6.136656
Star

Start predicting 2020-03-02 15:22:59
testing finish [2020-03-02 15:23:02] 
	HR@1=0.10409  MRR@1=0.10409  NDCG@1=0.10409
	HR@5=0.30516  MRR@5=0.17458  NDCG@5=0.20690
	HR@10=0.42253  MRR@10=0.19022  NDCG@10=0.24483
	HR@20=0.55094  MRR@20=0.19913  NDCG@20=0.27730
[2020-03-02 15:23:02] [40/50] 0 mean_batch_loss : 5.447358
Start predicting 2020-03-02 15:23:14
change best
testing finish [2020-03-02 15:23:17] 
	HR@1=0.10477  MRR@1=0.10477  NDCG@1=0.10477
	HR@5=0.30635  MRR@5=0.17574  NDCG@5=0.20808
	HR@10=0.42492  MRR@10=0.19146  NDCG@10=0.24631
	HR@20=0.55354  MRR@20=0.20044  NDCG@20=0.27890
[2020-03-02 15:23:17] [41/50] 0 mean_batch_loss : 5.206198
Start predicting 2020-03-02 15:23:29
change best
testing finish [2020-03-02 15:23:32] 
	HR@1=0.10450  MRR@1=0.10450  NDCG@1=0.10450
	HR@5=0.30879  MRR@5=0.17575  NDCG@5=0.20865
	HR@10=0.42962  MRR@10=0.19180  NDCG@10=0.24764
	HR@20=0.55735  MRR@20=0.20067  NDCG@20=0.27994
[2020-03-02 15:23:33] [42/50] 0 mean_batch_loss : 5.334809
Start predicting

change best
testing finish [2020-03-02 15:29:16] 
	HR@1=0.16312  MRR@1=0.16312  NDCG@1=0.16312
	HR@5=0.36439  MRR@5=0.23586  NDCG@5=0.26778
	HR@10=0.47009  MRR@10=0.24998  NDCG@10=0.30197
	HR@20=0.58070  MRR@20=0.25768  NDCG@20=0.32996
[2020-03-02 15:29:16] [13/50] 0 mean_batch_loss : 4.769242
Start predicting 2020-03-02 15:29:30
change best
testing finish [2020-03-02 15:29:33] 
	HR@1=0.16511  MRR@1=0.16511  NDCG@1=0.16511
	HR@5=0.37415  MRR@5=0.24131  NDCG@5=0.27434
	HR@10=0.47926  MRR@10=0.25533  NDCG@10=0.30831
	HR@20=0.58746  MRR@20=0.26285  NDCG@20=0.33569
[2020-03-02 15:29:33] [14/50] 0 mean_batch_loss : 4.924292
Start predicting 2020-03-02 15:29:47
change best
testing finish [2020-03-02 15:29:50] 
	HR@1=0.16373  MRR@1=0.16373  NDCG@1=0.16373
	HR@5=0.37736  MRR@5=0.24136  NDCG@5=0.27516
	HR@10=0.48513  MRR@10=0.25576  NDCG@10=0.31003
	HR@20=0.59471  MRR@20=0.26342  NDCG@20=0.33781
[2020-03-02 15:29:50] [15/50] 0 mean_batch_loss : 4.821004
Start predicting 2020-03-02 15:30:05
chan

testing finish [2020-03-02 15:36:29] 
	HR@1=0.17198  MRR@1=0.17198  NDCG@1=0.17198
	HR@5=0.41485  MRR@5=0.26064  NDCG@5=0.29898
	HR@10=0.52788  MRR@10=0.27569  NDCG@10=0.33550
	HR@20=0.63678  MRR@20=0.28327  NDCG@20=0.36305
[2020-03-02 15:36:29] [38/50] 0 mean_batch_loss : 4.159074
Start predicting 2020-03-02 15:36:43
change best
testing finish [2020-03-02 15:36:47] 
	HR@1=0.17315  MRR@1=0.17315  NDCG@1=0.17315
	HR@5=0.41699  MRR@5=0.26202  NDCG@5=0.30056
	HR@10=0.53057  MRR@10=0.27721  NDCG@10=0.33731
	HR@20=0.64089  MRR@20=0.28491  NDCG@20=0.36526
[2020-03-02 15:36:47] [39/50] 0 mean_batch_loss : 4.154287
Start predicting 2020-03-02 15:37:01
change best
testing finish [2020-03-02 15:37:04] 
	HR@1=0.17354  MRR@1=0.17354  NDCG@1=0.17354
	HR@5=0.41819  MRR@5=0.26281  NDCG@5=0.30144
	HR@10=0.53289  MRR@10=0.27823  NDCG@10=0.33864
	HR@20=0.64225  MRR@20=0.28582  NDCG@20=0.36629
[2020-03-02 15:37:04] [40/50] 0 mean_batch_loss : 4.058253
Start predicting 2020-03-02 15:37:18
testing finish [

Start predicting 2020-03-02 15:44:13
change best
testing finish [2020-03-02 15:44:16] 
	HR@1=0.17855  MRR@1=0.17855  NDCG@1=0.17855
	HR@5=0.40078  MRR@5=0.25993  NDCG@5=0.29496
	HR@10=0.50688  MRR@10=0.27411  NDCG@10=0.32929
	HR@20=0.61472  MRR@20=0.28159  NDCG@20=0.35655
[2020-03-02 15:44:16] [13/50] 0 mean_batch_loss : 4.254182
Start predicting 2020-03-02 15:44:34
change best
testing finish [2020-03-02 15:44:38] 
	HR@1=0.17864  MRR@1=0.17864  NDCG@1=0.17864
	HR@5=0.40642  MRR@5=0.26244  NDCG@5=0.29827
	HR@10=0.51345  MRR@10=0.27672  NDCG@10=0.33288
	HR@20=0.62036  MRR@20=0.28420  NDCG@20=0.35998
[2020-03-02 15:44:38] [14/50] 0 mean_batch_loss : 3.933518
Start predicting 2020-03-02 15:44:56
change best
testing finish [2020-03-02 15:44:59] 
	HR@1=0.17776  MRR@1=0.17776  NDCG@1=0.17776
	HR@5=0.40929  MRR@5=0.26238  NDCG@5=0.29892
	HR@10=0.51557  MRR@10=0.27659  NDCG@10=0.33331
	HR@20=0.62224  MRR@20=0.28398  NDCG@20=0.36027
[2020-03-02 15:44:59] [15/50] 0 mean_batch_loss : 3.877083
Star

Start predicting 2020-03-02 15:54:44
change best
testing finish [2020-03-02 15:54:50] 
	HR@1=0.16683  MRR@1=0.16683  NDCG@1=0.16683
	HR@5=0.30895  MRR@5=0.21846  NDCG@5=0.24095
	HR@10=0.37950  MRR@10=0.22778  NDCG@10=0.26367
	HR@20=0.45107  MRR@20=0.23277  NDCG@20=0.28178
[2020-03-02 15:54:51] [4/50] 0 mean_batch_loss : 6.158460
Start predicting 2020-03-02 15:55:42
change best
testing finish [2020-03-02 15:55:48] 
	HR@1=0.16459  MRR@1=0.16459  NDCG@1=0.16459
	HR@5=0.33747  MRR@5=0.22758  NDCG@5=0.25490
	HR@10=0.42431  MRR@10=0.23915  NDCG@10=0.28296
	HR@20=0.51286  MRR@20=0.24528  NDCG@20=0.30533
[2020-03-02 15:55:48] [5/50] 0 mean_batch_loss : 5.391572
Start predicting 2020-03-02 15:56:40
change best
testing finish [2020-03-02 15:56:45] 
	HR@1=0.15816  MRR@1=0.15816  NDCG@1=0.15816
	HR@5=0.35008  MRR@5=0.22777  NDCG@5=0.25815
	HR@10=0.44622  MRR@10=0.24053  NDCG@10=0.28917
	HR@20=0.54617  MRR@20=0.24749  NDCG@20=0.31448
[2020-03-02 15:56:45] [6/50] 0 mean_batch_loss : 4.874934
Start p

Start predicting 2020-03-02 16:16:50
testing finish [2020-03-02 16:16:56] 
	HR@1=0.06310  MRR@1=0.06310  NDCG@1=0.06310
	HR@5=0.16202  MRR@5=0.09863  NDCG@5=0.11435
	HR@10=0.21861  MRR@10=0.10618  NDCG@10=0.13265
	HR@20=0.27842  MRR@20=0.11032  NDCG@20=0.14776
[2020-03-02 16:16:56] [4/50] 0 mean_batch_loss : 8.013636
Start predicting 2020-03-02 16:17:48
change best
testing finish [2020-03-02 16:17:54] 
	HR@1=0.09860  MRR@1=0.09860  NDCG@1=0.09860
	HR@5=0.23801  MRR@5=0.14864  NDCG@5=0.17082
	HR@10=0.31270  MRR@10=0.15857  NDCG@10=0.19493
	HR@20=0.39113  MRR@20=0.16402  NDCG@20=0.21477
[2020-03-02 16:17:54] [5/50] 0 mean_batch_loss : 6.762211
Start predicting 2020-03-02 16:18:46
change best
testing finish [2020-03-02 16:18:52] 
	HR@1=0.12243  MRR@1=0.12243  NDCG@1=0.12243
	HR@5=0.28485  MRR@5=0.18081  NDCG@5=0.20663
	HR@10=0.37130  MRR@10=0.19242  NDCG@10=0.23466
	HR@20=0.46498  MRR@20=0.19893  NDCG@20=0.25836
[2020-03-02 16:18:52] [6/50] 0 mean_batch_loss : 5.872862
Start predicting 20

testing finish [2020-03-02 16:42:36] 
	HR@1=0.15472  MRR@1=0.15472  NDCG@1=0.15472
	HR@5=0.40448  MRR@5=0.24499  NDCG@5=0.28460
	HR@10=0.52011  MRR@10=0.26049  NDCG@10=0.32207
	HR@20=0.62540  MRR@20=0.26788  NDCG@20=0.34878
early stopping
training and testting over, Total time 0:28:33.733335
current model hyper-parameters: session_length=20, hidden_size=300, lr=0.0030, embedding_dim=300, embedding_dropout=0.25, dropout=0.50

current model HR@20=0.63057  MRR@20=0.27289
the best result so far. HR@20=0.63057  MRR@20=0.29136, current model hyper-parameters: session_length=20, hidden_size=100, lr=0.0030, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
 

The best result HR@20=0.64674  MRR@20=0.29136, hyper-parameters: current model hyper-parameters: session_length=20, hidden_size=100, lr=0.0030, embedding_dim=100, embedding_dropout=0.25, dropout=0.50
. 
over.
