In [1]:
%matplotlib inline
import torch
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
import math
import random
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import tqdm
from matplotlib import pyplot as plt
from copy import deepcopy
import os
import datetime
import pickle
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device("cuda:0")
# device = torch.device("cpu")

In [3]:
batch_size = 512
plot_num = 50000
epochs = 30

In [4]:
class SessionData(object):
    def __init__(self,session_index,session_id,items_indexes):
        self.session_index = session_index
        self.session_id = session_id
        self.item_list = items_indexes
    def generate_seq_datas(self,session_length,padding_idx=0,predict_length=1):
        sessions = []
        if len(self.item_list)<2:
            self.item_list.append[self.item_list[0]]
        if predict_length==1:
            # when session length>=3
            for i in range(1,len(self.item_list)-1):
#             # when session length >=2
#             for i in range(len(self.item_list)-1):
                if i <session_length:
                    train_data = [0 for _ in range(session_length-i-1)]
                    train_data.extend(self.item_list[:i+1])
                    train_data.append(self.item_list[i+1])
                else:
                    train_data = self.item_list[i+1-session_length:i+1]
                    train_data.append(self.item_list[i+1])
                sessions.append(train_data)
        else:
            pass
        return self.session_index,sessions
    def __str__(self):
        info = " session index = {}\n session id = {} \n the length of item list= {} \n the fisrt item index in item list is {}".format(self.session_index,self.session_id,len(self.item_list),self.item_list[0])
        return info

In [5]:
class SessionDataSet(object):
    def __init__(self,train_file,test_file,padding_idx=0):
        super(SessionDataSet,self).__init__()
        self.index_count = 0
        self.session_count = 0
        self.train_count = 0
        self.test_count = 0
        self.max_session_length = 0

        self.padding_idx = padding_idx
        self.item2index = dict()
        self.index2item = dict()
        self.session2index = dict()
        self.index2session = dict()
        self.item_total_num = dict()
        self.item2index["<pad>"] = padding_idx
        self.index2item[padding_idx] = "<pad>"
        self.train_data = self.load_data(train_file)
        print("training set is loaded, # index: ",len(self.item2index.keys()))
        self.train_count = self.session_count
        print("train_session_num",self.train_count)
        self.test_data = self.load_data(test_file)
        print("testing set is loaded, # index: ",len(self.index2item.keys()))
        print("# item",self.index_count)
        self.test_count = self.session_count-self.train_count
        print("# test session:",self.test_count)
        self.all_training_data = []
        self.all_testing_data = []
        self.all_meta_training_data = []
        self.all_meta_testing_data = []
        self.train_session_length = 0
        self.test_session_length = 0
    
    def load_data(self,file_path):
        data =  pickle.load(open(file_path, 'rb'))
        session_ids = data[0]
        session_data = data[1]
        session_label = data[2]

        result_data = []
        lenth = len(session_ids)
        print("# session",lenth)

        last_session_id = session_ids[0]
        
        session_item_indexes = []

        for item_id in session_data[0]:
            if item_id not in self.item2index.keys():
                self.index_count+=1
                self.item2index[item_id] = self.index_count
                self.index2item[self.index_count] = item_id
                self.item_total_num[self.index_count] = 0
            session_item_indexes.append(self.item2index[item_id])
            self.item_total_num[self.item2index[item_id]] += 1
        target_item = session_label[0]
        if target_item not in self.item2index.keys():
            self.index_count+=1
            self.item2index[target_item] = self.index_count
            self.index2item[self.index_count] = target_item
            self.item_total_num[self.index_count] = 0
        session_item_indexes.append(self.item2index[target_item])
        self.item_total_num[self.item2index[target_item]] += 1

        for session_id,items,target_item in zip(session_ids,session_data,session_label):
            if session_id!=last_session_id:

                self.session_count+=1
                self.session2index[last_session_id] = self.session_count
                self.index2session[self.session_count] = last_session_id
                last_session_id = session_id
                if len(session_item_indexes)>self.max_session_length:
                    self.max_session_length = len(session_item_indexes)
                new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
                result_data.append(new_session)
                session_item_indexes = []
                for item_id in items:
                    if item_id not in self.item2index.keys():
                        self.index_count+=1
                        self.item2index[item_id] = self.index_count
                        self.index2item[self.index_count] = item_id
                        self.item_total_num[self.index_count] = 0
                    session_item_indexes.append(self.item2index[item_id])
                    self.item_total_num[self.item2index[item_id]] += 1
                if target_item not in self.item2index.keys():
                    self.index_count+=1
                    self.item2index[target_item] = self.index_count
                    self.index2item[self.index_count] = target_item
                    self.item_total_num[self.index_count] = 0
                session_item_indexes.append(self.item2index[target_item])
                self.item_total_num[self.item2index[target_item]] += 1
            else:
                continue

        self.session_count+=1
        self.session2index[last_session_id] = self.session_count
        new_session = SessionData(self.session_count,last_session_id,session_item_indexes)
        result_data.append(new_session)
        print("loaded")
        print(new_session)
        
        return result_data
    

    def get_batch(self,batch_size,session_length=10,predict_length=1,all_data=None,phase="train",neg_num=1,sampling_mathod="random"):

        if phase == "train":
            if all_data is None:
                all_data = self.get_all_training_data(session_length)
            indexes = np.random.permutation(all_data.shape[0])
            all_data = all_data[indexes]
        else:
            if all_data is None:
                all_data = self.get_all_testing_data(session_length)
        
        sindex = 0
        eindex = batch_size
        while eindex < all_data.shape[0]:
            batch = all_data[sindex: eindex]

            temp = eindex
            eindex = eindex + batch_size
            sindex = temp
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch

        if eindex >= all_data.shape[0]:
            batch = all_data[sindex:]
            if phase =="train":
                batch = self.divid_and_extend_negative_samples(batch,session_length=session_length,predict_length=predict_length,neg_num=neg_num,method=sampling_mathod)
            else:
                batch = [batch[:,:session_length],batch[:,session_length:]]
            yield batch
    
    def divid_and_extend_negative_samples(self,batch_data,session_length,predict_length=1,neg_num=1,method="random"):
        """
        divid and extend negative samples
        """
        neg_items = []
        if method == "random":
            for session_and_target in batch_data:
                neg_item = []
                for i in range(neg_num):
                    rand_item = random.randint(1,self.index_count)
                    while rand_item in session_and_target or rand_item in neg_item:
                        rand_item = random.randint(1,self.index_count)
                    neg_item.append(rand_item)
                neg_items.append(neg_item)
        else:

            total_list = set()
            for session in batch_data:
                for i in session:
                    total_list.add(i) 
            total_list = list(total_list)
            total_list =  sorted(total_list, key=lambda item: self.item_total_num[item],reverse=True)
            for i,session in enumerate(batch_data):
                np.random.choice(total_list)
        session_items = batch_data[:,:session_length]
        target_item = batch_data[:,session_length:]
        neg_items = np.array(neg_items)
        return [session_items,target_item,neg_items]
    
    def get_all_training_data(self,session_length,predict_length=1):
        if len(self.all_training_data)!=0 and self.train_session_length==session_length:
#             print("The build is complete and there is no need to repeat the build")
            return self.all_training_data
        print("Start building the all training dataset")
        all_sessions = []
        for session_data in self.train_data:
            # 前session_length为session，后predict_length为target_item
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_training_data = all_sessions
        self.train_session_length=session_length
        print("The total number of training samples is：",all_sessions.shape)
        return all_sessions
    
    def get_all_testing_data(self,session_length,predict_length=1):
        if len(self.all_testing_data)!=0 and self.test_session_length==session_length:
            return self.all_testing_data
        all_sessions = []
        for session_data in self.test_data:
            session_index,sessions = session_data.generate_seq_datas(session_length,padding_idx=self.padding_idx)
            if sessions is not None:
                all_sessions.extend(sessions)
        all_sessions = np.array(all_sessions)
        self.all_testing_data = all_sessions
        self.test_session_length=session_length
        print("The total number of testing samples is：",all_sessions.shape)
        return all_sessions

    def __getitem__(self,idx):
        pass
    
    def __len__(self):
        pass

In [6]:
# dataset = SessionDataSet(train_file="../data/retailrocket_gcsan_my/train.txt",test_file="../data/srgnn/retailrocket_gcsan_my/test.txt")
dataset = SessionDataSet(train_file="../data/diginetica_gcsan_my/train.txt",test_file="../data/srgnn/diginetica_gcsan_my/test.txt")
# dataset = SessionDataSet(train_file="../data/yoochoose1_4_gcsan_my/train.txt",test_file="../data/srgnn/yoochoose1_4_gcsan_my/test.txt")
# dataset = SessionDataSet(train_file="../data/yoochoose1_64_gcsan_my/train.txt",test_file="../data/srgnn/yoochoose1_64_gcsan_my/test.txt")

# session 433648
loaded
 session index = 126168
 session id = 946108 
 the length of item list= 3 
 the fisrt item index in item list is 3314
training set is loaded, # index:  36969
train_session_num 126168
# session 15132
loaded
 session index = 130903
 session id = 1582915 
 the length of item list= 6 
 the fisrt item index in item list is 12498
testing set is loaded, # index:  36969
# item 36968
# test session: 4735


In [7]:
def bpr_loss(r):
    return torch.sum(-torch.log(torch.sigmoid(r)))
def get_hit_num(pred,y_truth):
    """
        pred: numpy type(batch_size,k) 
        y_truth: list type (batch_size,groudtruth_num)
    """

    hit_num = 0
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_num += np.sum(pred[i]==value)
    return hit_num

def get_rr(pred,y_truth):
    rr=0.
    for i in range(len(y_truth)):
        for value in y_truth[i]:
            hit_indexes = np.where(pred[i]==value)[0]
            for hit_index in hit_indexes:
                rr += 1/(hit_index+1)
    return rr

def get_dcg(pred,y_truth):
    y_pred_score = np.zeros_like(pred)

    for i in range(len(y_truth)):

        for j,y_pred in enumerate(pred[i]):
            if y_pred == y_truth[i][0]:
                y_pred_score[i][j]=1
    gain = 2 ** y_pred_score - 1
    discounts = np.tile(np.log2(np.arange(pred.shape[1]) + 2),(len(y_truth),1))
    dcg = np.sum(gain / discounts,axis=1)
    return dcg

def get_ndcg(pred,y_truth):
    dcg = get_dcg(pred, y_truth)
    idcg = get_dcg(np.concatenate((y_truth,np.zeros_like(pred)[:,:-1]-1),axis=1), y_truth)
    ndcg = np.sum(dcg / idcg)

    return ndcg

def dcg_score(y_pre, y_true, k):
    y_pre_score = np.zeros(k)
    if len(y_pre) > k:
        y_pre = y_pre[:k]
    for i in range(len(y_pre)):
        pre_tag = y_pre[i]
        if pre_tag in y_true:
            y_pre_score[i] = 1
    gain = 2 ** y_pre_score - 1
    discounts = np.log2(np.arange(k) + 2)
    return np.sum(gain / discounts)


def ndcg_score(y_pre, y_true, k=5):
    dcg = dcg_score(y_pre, y_true, k)
    idcg = dcg_score(y_true, y_true, k)
    return dcg / idcg

loss_function = torch.nn.CrossEntropyLoss()

In [8]:
# SelfAttention Layer
class SelfAttention(torch.nn.Module):
    def __init__(self, method, hidden_size,activate="selu",dropout=0):
        super(SelfAttention, self).__init__()
        self.config = list()
        # 使用的Attention方法
        self.method = method
        # 隐藏层大小
        self.hidden_size = hidden_size
        if self.method not in ['dot', 'general']:
            raise ValueError(self.method, "Attention method do not exists.")

        if self.method == "dot":
            self.query = torch.nn.Linear(self.hidden_size *2, self.hidden_size*2)
            self.key = torch.nn.Linear(self.hidden_size*2, self.hidden_size*2)
            torch.nn.init.constant_(self.query.bias,0)
            torch.nn.init.constant_(self.key.bias,0)

        if self.method == "general":
            self.attention = torch.nn.Linear(self.hidden_size*2, self.hidden_size*2)
            torch.nn.init.constant_(self.attention.bias,0)
        
        if activate == "relu":
            self.activate = torch.relu
        elif activate == "tanh":
            self.activate = torch.tanh
        elif activate == "elu":
            self.activate = torch.nn.ELU()
        elif activate == "selu":
            self.activate = torch.selu
        else:
            self.activate = torch.sigmoid
        
        self.dropout = torch.nn.Dropout(dropout)
        torch.nn.utils.clip_grad_norm_(self.parameters(),max_norm=110)

    def dot_score(self, encoder_output,is_train=True,weights=None):

        if weights is None:
            if is_train:
                query = self.dropout(self.activate(self.query(encoder_output)))
                key = self.dropout(self.activate(self.key(encoder_output)))
            else:
                query = self.activate(self.query(encoder_output))
                key = self.activate(self.key(encoder_output))
        else:
            query = self.activate(torch.matmul(encoder_output,weights[0].t())+weights[1])
            key = self.activate(torch.matmul(encoder_output,weights[2].t())+weights[3])
        dot = query.bmm(key.permute(0, 2, 1))
        return dot

    def general_score(self, encoder_output,is_train=True,weights=None):
        if weights is None:
            if is_train:
                energy = self.dropout(self.activate(self.attention(encoder_output)))
            else:
                energy = self.activate(self.attention(encoder_output))
        else:
            energy = self.activate(torch.matmul(encoder_output,weights[0].t())+weights[1])
        return encoder_output.bmm(energy.permute(0, 2, 1))

    def forward(self, encoder_outputs, mask=None,is_train=True):
        # (batch_size,length,dim)
        if self.method == "general":
            attention_energies = self.general_score(encoder_outputs,is_train=is_train)
        elif self.method == "dot":
            attention_energies = self.dot_score(encoder_outputs,is_train=is_train)

        #  (batch_size,length,length)
        attention_energies.div_(torch.sqrt(torch.tensor(self.hidden_size, dtype=torch.float)))
        if mask is not None:
            new_mask = (1 - (1 - mask.float()).unsqueeze(1).permute(0, 2, 1).bmm(
                (1 - mask.float()).unsqueeze(1)))

            attention_energies = attention_energies - new_mask*1e12
            weights = F.softmax(attention_energies, dim=2)
            weights = weights*(1-new_mask)
            # batch_size,length,length)*(batch_size,length,dim)->(batch_size,length,dim)->(batch_size,1,dim)->(batch_size,dim)
            outputs = weights.bmm(encoder_outputs)
            outputs.div_(mask.shape[1]-torch.sum(mask,dim=1).unsqueeze(1).unsqueeze(2).repeat((1,mask.shape[1],outputs.shape[2])).float())
            outputs = outputs.sum(dim=1).squeeze(1)
        else:
            weights = F.softmax(attention_energies, dim=2)
            # (batch_size,length,length)*(batch_size,length,dim)->(batch_size,length,dim)->(batch_size,1,dim)->(batch_size,dim)
            outputs = (weights.bmm(encoder_outputs).sum(dim=1) / encoder_outputs.shape[1]).squeeze(1)
        sa_weights = weights.sum(dim=1).squeeze(1)
        return outputs, sa_weights
    

In [9]:
class POEM(torch.nn.Module):
    def __init__(self, hidden_size=64, itemNum=0, posNum=0, padding_idx=0, dropout=0.5,attention_method="dot",head_num=4,
                 activate="selu",session_length=20,delta=16.0):
        super(POEM, self).__init__()
        self.padding_idx = padding_idx
        self.hidden_size = hidden_size
        self.head_num = head_num
        self.delta = delta
        self.session_length = session_length
        if activate == "sigmoid":
            self.activate = torch.sigmoid
        elif activate == "tanh":
            self.activate = torch.tanh
        elif activate == "relu":
            self.activate = torch.relu
        elif activate == "elu":
            self.activate = torch.nn.ELU()
        else:
            self.activate = torch.selu
        self.dropout = torch.nn.Dropout(dropout)
        
        self.item_embedding = torch.nn.Embedding(itemNum, hidden_size, padding_idx=self.padding_idx,max_norm=1.5)
        
        self.position_embedding = torch.nn.Embedding(posNum,hidden_size,padding_idx=self.padding_idx,max_norm=1.5)
    
        self.position_weights = torch.nn.Embedding(posNum,1,padding_idx=self.padding_idx)
        
        self.self_attention = SelfAttention(attention_method, hidden_size,activate=activate,dropout=dropout).to(device)
        torch.nn.init.constant_(self.item_embedding.weight[0],0)
        torch.nn.init.constant_(self.position_embedding.weight[0],0)
        torch.nn.init.constant_(self.position_weights.weight,1)
        torch.nn.init.constant_(self.position_weights.weight[0],0)
        
        self.gen_mlp = torch.nn.Linear(hidden_size*2, hidden_size)
        self.cur_mlp = torch.nn.Linear(hidden_size, hidden_size)
        self.deep_mlp = torch.nn.Linear(hidden_size*3, hidden_size,bias=False)
        
    def forward(self, session,item=None,bpr_loss=False,neg_num=50):

        mask = (session!=0).float()
        length = torch.sum(mask,1).unsqueeze(1).expand((session.shape[0],self.hidden_size))
        mask = mask.unsqueeze(2).expand((session.shape[0],session.shape[1],self.hidden_size))
        session_item_embeddings = F.normalize(self.item_embedding(session),dim=-1)* mask
        positions = session.shape[1] - torch.arange(0,session.shape[1]).unsqueeze(0).expand_as(session).to(device)
        session_position_embeddings = self.dropout(self.position_embedding(positions))*mask
        session_item_vecs = torch.cat((session_item_embeddings,session_position_embeddings), dim=2)
        attention_mask = (session == self.padding_idx)
        sa_output, sa_weights = self.self_attention(session_item_vecs, attention_mask)
        session_position_weights = self.dropout(self.position_weights(positions))*mask
        sa_weights = sa_weights.unsqueeze(2).expand_as(session_item_embeddings)
        session_item_vecs2 = session_item_embeddings * session_position_weights * sa_weights
        psa_output = torch.sum(session_item_vecs2, dim=1)/length
        gen_output = self.dropout(self.activate(self.gen_mlp(sa_output)))
        cur_output = self.dropout(self.activate(self.cur_mlp(session_item_embeddings[:,-1])))
        deep_output = self.dropout(self.activate(self.deep_mlp(torch.cat((sa_output,session_item_embeddings[:,-1]),1))))
        session_output =  F.normalize(gen_output * cur_output + deep_output + psa_output,dim=-1)
        session_output = session_output*self.delta
        item_embedding_weight = F.normalize(self.item_embedding.weight[1:],dim=-1)
        result = torch.matmul(session_output,item_embedding_weight.t())
        return result
    
    def predict_top_k(self, session, k=20):
        mask = (session!=0).float()
        length = torch.sum(mask,1).unsqueeze(1).expand((session.shape[0],self.hidden_size))
        mask = mask.unsqueeze(2).expand((session.shape[0],session.shape[1],self.hidden_size))
        session_item_embeddings = F.normalize(self.item_embedding(session),dim=-1)* mask
        positions = session.shape[1] - torch.arange(0,session.shape[1]).unsqueeze(0).expand_as(session).to(device)
        session_position_embeddings = self.position_embedding(positions)*mask
        session_item_vecs = torch.cat((session_item_embeddings,session_position_embeddings), dim=2)
        attention_mask = (session == self.padding_idx)
        sa_output, sa_weights = self.self_attention(session_item_vecs, attention_mask,is_train=False)
        session_position_weights = self.position_weights(positions)*mask
        sa_weights = sa_weights.unsqueeze(2).expand_as(session_item_embeddings)
        session_item_vecs2 = session_item_embeddings * session_position_weights * sa_weights
        psa_output = torch.sum(session_item_vecs2, dim=1)/length
        gen_output =self.activate(self.gen_mlp(sa_output))

        cur_output = self.activate(self.cur_mlp(session_item_embeddings[:,-1]))
        deep_output = self.activate(self.deep_mlp(torch.cat((sa_output,session_item_embeddings[:,-1]),1)))
        session_output =  F.normalize(gen_output * cur_output + deep_output + psa_output,dim=-1)
        session_output = session_output * self.delta
        item_embedding_weight = F.normalize(self.item_embedding.weight[1:],dim=-1)
        result = torch.matmul(session_output,item_embedding_weight.t())
        result = torch.topk(result,k,dim=1)[1]
        
        return result
        

In [10]:
epochs=50
def train(args):
    hidden_size = args["hidden_size"] if "hidden_size" in args.keys() else 100
    dropout = args["dropout"] if "dropout" in args.keys()  else 0.5
    attention_method = args["method"] if "method" in args.keys()  else "general"
    lr = args["lr"] if "lr" in args.keys()  else 5e-4
    weight_decay = args["weight_decay"] if "weight_decay" in args.keys()  else 1e-5
    amsgrad = args["amsgrad"] if "amsgrad" in args.keys() else True
    session_length = args["session_length"] if "session_length" in args.keys() else 20
    delta = args["delta"] if "delta" in args.keys() else 20
    model = POEM(hidden_size=hidden_size, itemNum=dataset.index_count+1, posNum=session_length+1, padding_idx=0, dropout=dropout,
                 activate="selu",attention_method=attention_method,delta=delta).to(device)
    opti = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay,amsgrad=amsgrad)
    patience = args["patience"] if "patience" in args.keys() else 5
    best_model_hr = 0.0
    best_model_mrr = 0.0
    best_r1m = 0.0
    best_model = None
    predict_nums = [1,5,10,20]
    no_improvement_epoch = 0
    for epoch in range(epochs):
        batch_losses = []
        epoch_losses = []
        for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="train")):
            sessions = torch.tensor(batch_data[0]).to(device)
            target_items = torch.tensor(batch_data[1]).squeeze().to(device)-1
            result_pos = model(sessions)
            loss = loss_function(result_pos,target_items)
            opti.zero_grad()
            loss.backward()
            opti.step()
            batch_losses.append(loss.cpu().detach().numpy())
            epoch_losses.append(loss.cpu().detach().numpy())
            if i % plot_num == 0:
                time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print("[%s] [%d/%d] %d mean_batch_loss : %0.6f" % (time, epoch+1, epochs, i, np.mean(batch_losses)))
                batch_losses = []
        with torch.no_grad():
            start_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("Start predicting",start_test_time)
            rrs = [0 for _ in range(len(predict_nums))]
            hit_nums = [0 for _ in range(len(predict_nums))]
            ndcgs = [0 for _ in range(len(predict_nums))]
            for i,batch_data in enumerate(dataset.get_batch(batch_size,session_length,phase="test")):
                
                sessions = torch.tensor(batch_data[0]).to(device)
                target_items = np.array(batch_data[1])-1
                y_pred = model.predict_top_k(sessions,20).cpu().numpy()
                
                for j,predict_num in enumerate(predict_nums):
                    hit_nums[j]+=get_hit_num(y_pred[:,:predict_num],target_items)
                    rrs[j]+=get_rr(y_pred[:,:predict_num],target_items)
                    ndcgs[j]+=get_ndcg(y_pred[:,:predict_num],target_items)
                    
            end_test_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            
            hrs = [hit_num/len(dataset.all_testing_data) for hit_num in hit_nums]
            mrrs = [rr/len(dataset.all_testing_data) for rr in rrs]
            mndcgs = [ndcg/len(dataset.all_testing_data) for ndcg in ndcgs]
            if hrs[-1] + mrrs[-1] > best_r1m:
                print("change best")
                best_model = deepcopy(model)
                best_model_hr = hrs[-1]
                best_model_mrr = mrrs[-1]
                best_r1m = hrs[-1] + mrrs[-1]
                no_improvement_epoch = 0
            else:
                no_improvement_epoch +=1
            print("testing finish [%s] "%end_test_time)
            for k,predict_num in enumerate(predict_nums):
                print("\tHR@%d=%.5f  MRR@%d=%.5f  NDCG@%d=%.5f"%(predict_num,hrs[k],predict_num,mrrs[k],predict_num,mndcgs[k]))
        if no_improvement_epoch>=patience:
            print("early stopping")
            break
    return best_model,best_model_hr,best_model_mrr

# CIKM-Session>2
    HR@20=0.67804  MRR@20=0.33200, hyper-parameters: session_length-20, hidden_size-100, lr-0.0010,delta=16.0, amsgrad-True, method-general, dropout-0.5, weight_decay-0.000000. 
        HR@1=0.21631  MRR@1=0.21631  NDCG@1=0.21631
        HR@5=0.46975  MRR@5=0.31073  NDCG@5=0.35037
        HR@10=0.57556  MRR@10=0.32485  NDCG@10=0.38459
        HR@20=0.67804  MRR@20=0.33200  NDCG@20=0.41054
# RR-Session>2
    HR@20=0.57600  MRR@20=0.31254, hyper-parameters: session_length-20, hidden_size-100, lr-0.0010,delta=16.0, amsgrad-True, method-general, dropout-0.5, weight_decay-0.000000. 
        HR@1=0.22548  MRR@1=0.22548  NDCG@1=0.22548
        HR@5=0.42030  MRR@5=0.29653  NDCG@5=0.32730
        HR@10=0.49914  MRR@10=0.30720  NDCG@10=0.35294
        HR@20=0.57600  MRR@20=0.31254  NDCG@20=0.37238
# RSC64-Session>2
    HR@20=0.71005  MRR@20=0.29621, hyper-parameters: session_length-20, hidden_size-100, lr-0.0005,delta=16.0, amsgrad-True, method-general, dropout-0.5, weight_decay-0.000000. 
        HR@1=0.16425  MRR@1=0.16425  NDCG@1=0.16425
        HR@5=0.46047  MRR@5=0.26957  NDCG@5=0.31691
        HR@10=0.60094  MRR@10=0.28852  NDCG@10=0.36253
        HR@20=0.71005  MRR@20=0.29621  NDCG@20=0.39027
# RSC4-Session>2
    HR@20=0.72369  MRR@20=0.30340, hyper-parameters: session_length-20, hidden_size-100, lr-0.0005,delta=16.0, amsgrad-True, method-general, dropout-0.3, weight_decay-0.000000. 
        HR@1=0.16948  MRR@1=0.16948  NDCG@1=0.16948
        HR@5=0.46912  MRR@5=0.27635  NDCG@5=0.32416
        HR@10=0.61063  MRR@10=0.29543  NDCG@10=0.37012
        HR@20=0.72369  MRR@20=0.30340  NDCG@20=0.39886

In [11]:
hidden_sizes = [100]
dropouts = [0.5]
attention_methods = ["general"]
lrs = [1e-3]
session_lengths = [20]
weight_decays = [0]
patience = 5
deltas = [16.0]
amsgrads = [True]
best_params = ""
best_all_model = 0.0
best_all_hr = 0.0
best_all_mrr = 0.0
best_all_r1m = 0.0
for session_length in session_lengths:
    for hidden_size in hidden_sizes:
        for amsgrad in amsgrads:
            for attention_method in attention_methods:
                for dropout in dropouts:
                    for weight_decay in weight_decays:
                        for lr in lrs:
                            for delta in deltas:
                                args = {}
                                print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f,delta=%.1f, amsgrad=%s, method=%s, dropout=%.1f, weight_decay=%.6f. \n" % (session_length,hidden_size,lr,delta,str(amsgrad),attention_method,dropout,weight_decay))
                                args["session_length"] = session_length
                                args["hidden_size"] = hidden_size
                                args["amsgrad"] = amsgrad
                                args["method"] = attention_method
                                args["dropout"] = dropout
                                args["weight_decay"] = weight_decay
                                args["lr"] = lr
                                args["delta"] = delta
                                args["patience"] = patience
                                best_model,best_model_hr,best_model_mrr = train(args)
                                if best_model_hr + best_model_mrr > best_all_r1m:
                                    print("best model change")
                                    best_all_r1m = best_model_hr + best_model_mrr
                                    best_all_hr = best_model_hr
                                    best_all_mrr = best_model_mrr
                                    best_all_model = best_model
                                    best_params = "session_length-%d, hidden_size-%d, lr-%.4f,delta=%.1f, amsgrad-%s, method-%s, dropout-%.1f, weight_decay-%.6f"%(session_length,hidden_size,lr,delta,str(amsgrad),attention_method,dropout,weight_decay)
                                best_model = None
                                print("current model hyper-parameters: session_length=%d, hidden_size=%d, lr=%.4f,delta=%.1f, amsgrad=%s, method=%s, dropout=%.1f, weight_decay=%.6f. \n" % (session_length,hidden_size,lr,delta,str(amsgrad),attention_method,dropout,weight_decay))
                                print("current model HR@20=%.5f  MRR@20=%.5f."%(best_model_hr,best_model_mrr))
                                print("the best result so far. HR@20=%.5f  MRR@20=%.5f， hyper-parameters: %s. \n"%(best_all_hr,best_all_mrr,best_params))
print("The best result HR@20=%.5f  MRR@20=%.5f, hyper-parameters: %s. "%(best_all_hr,best_all_mrr,best_params))
print("over.")

current model hyper-parameters: session_length=20, hidden_size=100, lr=0.0010,delta=16.0, amsgrad=True, method=general, dropout=0.5, weight_decay=0.000000. 

Start building the all training dataset
The total number of training samples is： (433648, 21)
[2019-12-26 20:34:16] [1/50] 0 mean_batch_loss : 9.858598
Start predicting 2019-12-26 20:34:31
The total number of testing samples is： (15132, 21)
change best
testing finish [2019-12-26 20:34:32] 
	HR@1=0.20011  MRR@1=0.20011  NDCG@1=0.20011
	HR@5=0.35065  MRR@5=0.26428  NDCG@5=0.28624
	HR@10=0.37483  MRR@10=0.26753  NDCG@10=0.29408
	HR@20=0.39671  MRR@20=0.26905  NDCG@20=0.29961
[2019-12-26 20:34:32] [2/50] 0 mean_batch_loss : 6.961129
Start predicting 2019-12-26 20:34:44
change best
testing finish [2019-12-26 20:34:45] 
	HR@1=0.21425  MRR@1=0.21425  NDCG@1=0.21425
	HR@5=0.38805  MRR@5=0.28305  NDCG@5=0.30944
	HR@10=0.43438  MRR@10=0.28933  NDCG@10=0.32452
	HR@20=0.47363  MRR@20=0.29209  NDCG@20=0.33449
[2019-12-26 20:34:45] [3/50] 0 mea

In [12]:
best_all_model.position_weights.weight

Parameter containing:
tensor([[0.0000],
        [0.8468],
        [0.6264],
        [1.2987],
        [1.3130],
        [1.9952],
        [1.8754],
        [2.1132],
        [2.0780],
        [1.9817],
        [2.0239],
        [1.9664],
        [1.9574],
        [1.9945],
        [1.8578],
        [1.8428],
        [1.9018],
        [1.7739],
        [1.8916],
        [1.9151],
        [1.6250]], device='cuda:0', requires_grad=True)

In [13]:
# best_all_model.gate_weights.weight