In [None]:
import pandas as pd
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

import time
import datetime
import os
from tqdm import tqdm

In [None]:
result_dir = "Results"
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

In [None]:
class Dataset(object):
    def __init__(self, path, itemMap=None):
        self.sessionKey = 'SessionId'
        self.itemKey = 'ItemId'
        self.timeKey = 'Time'
        self.ItemIdx = 'ItemIdx'
        self.data = pd.read_csv(path, sep=',', dtype={self.sessionKey: int, self.itemKey: int, self.timeKey: float})

        # アイテムマップの作成し結合（評価時は学習データのアイテムマップを使用する）
        self.itemMap = itemMap;
        if (self.itemMap is None):
            itemIds = self.data[self.itemKey].unique()
            itemMap = pd.Series(data=np.arange(len(itemIds)), index=itemIds);
            item_indices = itemMap[itemIds].values
            itemDict = {
                self.itemKey: itemIds,
                self.ItemIdx: item_indices
            }
            self.itemMap = pd.DataFrame(itemDict)
        self.data = pd.merge(self.data, self.itemMap, on=self.itemKey, how='inner')
        
        # セッション数の累積合計リスト（セッションの開始位置リスト）を取得
        self.data.sort_values([self.sessionKey, self.timeKey], inplace=True)
        session_nItem = self.data.groupby(self.sessionKey).size()
        cum_session_nItem = session_nItem.cumsum()
        self.sessionsArray = np.r_[0, cum_session_nItem.values]

        self.nItems = len(self.itemMap)

class DataGenerator():
    def __init__(self, dataset, isTrain=True):
        self.data = dataset.data
        self.nItems = dataset.nItems
        self.sessionsArray = dataset.sessionsArray

        self.batchSize = 32
        self.nSample = 2048   
        self.sampleBuffer = 10000000
        self.isTrain = isTrain

        # 学習時は人気度に応じてネガティブサンプリングを行う
        if self.isTrain:
            self.popularArray = self.createPopular(dataset.itemMap, dataset.itemKey);
        self.sessionByTime = np.argsort(self.data.groupby(dataset.sessionKey)[dataset.timeKey].min().values) 
        self.totalIters = ((len(self.data) - len(self.sessionsArray)) // self.batchSize)

    def __iter__(self):
        data_items = self.data.ItemIdx.values
        sessions_array = self.sessionsArray
        session_by_time = self.sessionByTime
        
        iterators = np.arange(self.batchSize)
        max_iterator = iterators.max()
        start_positions = sessions_array[session_by_time[iterators]]
        end_positions = sessions_array[session_by_time[iterators] + 1]
        num_sessions = len(sessions_array) - 1
        finished_mask = (end_positions - start_positions <= 1)
        valid_mask = (iterators < num_sessions)

        # バッチサイズのセッション数ごとに繰り返す
        finished = False
        while not finished:
            min_length = (end_positions - start_positions).min()
            out_index = data_items[start_positions]

            # セッションの最小値ごと、入力と出力（正解）のインデックスを作成
            for i in range(min_length - 1):
                in_index = out_index # (バッチサイズ,)
                out_index = data_items[start_positions + i + 1] # (バッチサイズ,)
                
                # 学習時は人気度に応じてネガティブサンプリングを行う
                if self.isTrain:
                    negative_sample = self._get_sample()
                    # target_values = np.hstack([out_index, negative_sample])

                    input_tensor = torch.LongTensor(in_index)
                    output_tensor = torch.LongTensor(out_index)
                    negative_sample_tensor = torch.LongTensor(negative_sample)
                    
                    yield input_tensor, output_tensor, negative_sample_tensor, finished_mask, valid_mask
                else:
                    target_values = out_index # (バッチサイズ + ネガティブサンプル数,)
                    
                # input_tensor = torch.LongTensor(in_index) # 1次元 [バッチサイズ]
                # target_tensor = torch.LongTensor(target_values) # 1次元 [バッチサイズ + ネガティブサンプル数]
                

                # yield input_tensor, target_tensor, finished_mask, valid_mask
                
                finished_mask[:] = False
                valid_mask[:] = True
            
            start_positions += min_length - 1
            finished_mask = (end_positions - start_positions <= 1)
            num_finished = finished_mask.sum()
            iterators[finished_mask] = max_iterator + np.arange(1, num_finished + 1)
            max_iterator += num_finished
            
            valid_mask = (iterators < num_sessions)
            
            if valid_mask.sum() == 0:
                finished = True
                break
            
            iterators[~valid_mask] = 0
            session_updates = session_by_time[iterators[finished_mask]]
            start_positions[finished_mask] = sessions_array[session_updates]
            end_positions[finished_mask] = sessions_array[session_updates + 1]
            iterators = iterators[valid_mask]
            start_positions = start_positions[valid_mask]
            end_positions = end_positions[valid_mask]

    def _get_sample(self):
        # 用意したネガティブサンプルから一つづつ取り出す
        if self.samplePointer == self.generatelength:
            self.negativSamples = self.generateNegSamples(self.popularArray, self.generatelength)
            self.samplePointer = 0
        sample = self.negativSamples[self.samplePointer]
        self.samplePointer += 1
        return sample

    def generateNegSamples(self, popularArray, length):
        sample = np.searchsorted(popularArray, np.random.rand(self.nSample * length))
        sample = sample.reshape((length, self.nSample))
        return sample

    def createPopular(self, itemMap, itemKey):
        popularArray = self.data.groupby(itemKey).size()
        itemIds = itemMap.loc[:, itemKey].values
        
        # 人気度の確率分布を作成
        popularArray = popularArray[itemIds].values
        popularArray = popularArray.cumsum() / popularArray.sum()

        # ネガティブサンプリング
        self.generatelength = (self.sampleBuffer // self.nSample) #サンプルバッファの総数 ÷ 一度に生成するサンプル数 = サンプル生成の回数
        self.negativSamples = self.generateNegSamples(popularArray, self.generatelength)
        self.samplePointer = 0

        return popularArray

In [None]:
dataFolder = '../../../data/'
trainDataFile = 'Train.csv'
validDataFile = 'Valid.csv'

sessionKey = 'SessionId';
itemKey = 'ItemId';
timeKey = 'Time';

trainPath = os.path.join(dataFolder, trainDataFile)
validPath = os.path.join(dataFolder, validDataFile)

trainDataSet = Dataset(trainPath)
validDataSet = Dataset(validPath, itemMap=trainDataSet.itemMap)

In [None]:
class GRU4Rec(nn.Module):
    def __init__(self, inputSize, outputSize):

        super(GRU4Rec, self).__init__()

        self.inputSize = inputSize
        self.outputSize = outputSize
        self.batchSize = 32
        self.hiddenSize = 100
        self.nLayers = 1
        self.sigma = 0.0
        self.negative = True

        self.embeddingDim = -1
        
        self.dropoutHidden = 0.0
        self.dropoutEmbed = 0.0
        self.initAsNormal = False
        
        self.device = torch.device('cuda' if torch.cuda.is_available()  else 'cpu')

        self.onehotBuffer = torch.FloatTensor(self.batchSize, self.outputSize)
        
        self.gru = nn.GRU(self.inputSize, self.hiddenSize, self.nLayers, bias=False, dropout=self.dropoutHidden)
        self.linear = nn.Linear(self.hiddenSize, self.outputSize)
        self.Tanh = nn.Tanh()

        self.initParams();
        
        self = self.to(self.device)

    def forward(self, input, hidden, target=None):
        # 1-of-Nエンコーディング
        embedded = self.onehotEncode(input) # 2次元 [バッチサイズ, インプットサイズ]
        embedded = embedded.unsqueeze(0) # 3次元 [1, バッチサイズ, インプットサイズ]
        
        output, hNew = self.gru(embedded, hidden) # 3次元 [1, バッチサイズ, 隠し層数]
        output = output.view(-1, output.size(-1)) # 2次元 [バッチサイズ, 隠し層数]
        output = self.linear(output) # 2次元 [バッチサイズ, アウトプットサイズ]
        output = output[:, target.view(-1)] # 2次元 [バッチサイズ, ネガティブサンプル数 + バッチサイズ]
        output = self.Tanh(output)
        
        return output, hNew

    def onehotEncode(self, input):
        self.onehotBuffer.zero_()
        index = input.view(-1, 1)
        onehot = self.onehotBuffer[:len(index)].scatter_(1, index, 1)
        
        return onehot

    def initHidden(self, batchSize):
        h0 = torch.zeros(self.nLayers, int(batchSize), self.hiddenSize).to(self.device)
        return h0
    
    def resetHidden(self, hidden, finishedMask, validMask):
        if any(finishedMask):
            hidden[:, finishedMask, :] = 0 

        if any((~validMask)):
            hidden = hidden[:, validMask, :]
            
        return hidden.data;
    
    def initParams(self):
        for name, param in self.gru.named_parameters():
            if 'weight' in name:
                WR, WZ, WN = param.chunk(3, 0)
                self.initMatrix(WR)
                self.initMatrix(WZ)
                self.initMatrix(WN)
    
            elif 'bias' in name :
                param.data.zero_()
        
        self.initMatrix(self.linear.weight)
        self.linear.bias.data.zero_()
        
    def initMatrix(self, param):
        
        shape = list(param.shape)
        sigma = np.sqrt(6.0 / np.sum(shape))
        param.data.uniform_(-sigma, sigma)

In [None]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        self._lossFn = TOP1Loss()

    def forward(self, input, target=None):
        return self._lossFn(input, target)

class TOP1Loss(nn.Module):
    def __init__(self):
        super(TOP1Loss, self).__init__()
    def forward(self, input, target=None):
        diff = -(input.diag().view(-1, 1).expand_as(input) - input)
        loss = torch.sigmoid(diff).mean() + torch.sigmoid(input ** 2).mean()
        return loss

class BPRLoss(nn.Module):
    def __init__(self):
        super(BPRLoss, self).__init__()

    def forward(self, input, target=None):
        diff = input.diag().view(-1, 1).expand_as(input) - input
        loss = -torch.mean(F.logsigmoid(diff))
        return loss

In [None]:
class Optimizer:
    def __init__(self, params):
        self.optimizer = optim.Adagrad(params, lr=0.05, weight_decay=0, eps=1e-6)

    def zero_grad(self):
        self.optimizer.zero_grad()

    def step(self):
        self.optimizer.step()

In [None]:
def getRecall(indices, targets): 
    targets = targets.view(-1, 1).expand_as(indices)
    hits = (targets == indices).nonzero()
    if len(hits) == 0:
        return 0
    n_hits = (targets == indices).nonzero()[:, :-1].size(0)
    recall = float(n_hits) / targets.size(0)
    return recall

def getMrr(indices, targets):
    tmp = targets.view(-1, 1)
    targets = tmp.expand_as(indices)
    hits = (targets == indices).nonzero()
    ranks = hits[:, -1] + 1
    ranks = ranks.float()
    rranks = torch.reciprocal(ranks)
    mrr = torch.sum(rranks).data / targets.size(0)
    return mrr
    
def calc(indices, targets, k=20):
    _, indices = torch.topk(indices, k, -1)
    recall = getRecall(indices, targets)
    mrr = getMrr(indices, targets)
    return recall, mrr

In [None]:
class Evaluation(object):
    def __init__(self, model, lossFunc=None, k=20):
        self.model = model
        self.lossFunc = lossFunc
        self.topk = k
        self.device = model.device

    def evalute(self, validGenerator):
        self.model.eval()
        losses = []
        recalls = []
        mrrs = []
        with torch.no_grad():
            batchSize = validGenerator.batchSize
            hidden = self.model.initHidden(batchSize)
            for ii , (input, target, finishedMask, validMask) in tqdm(enumerate(validGenerator),
                                                                     total=validGenerator.totalIters,
                                                                     miniters=1000, position=0, leave=True):
                input = input.to(self.device)
                target = target.to(self.device)
                hidden = self.model.resetHidden(hidden, finishedMask, validMask)
                logit, hidden = self.model(input, hidden)
               
                if(self.lossFunc is not None):
                    loss = self.lossFunc(logit, target)
                    if(~np.isnan(loss.item())):
                        losses.append(loss.item())

                recall, mrr = calc(logit, target, k=self.topk)
                recalls.append(recall)
                mrrs.append(mrr.cpu().numpy())
                
        if(len(losses)):
            meanLoss = np.mean(losses)
        else :
            meanLoss = 0
                    
        meanRecall = np.mean(recalls)
        meanMrr = np.mean(mrrs)

        return meanLoss, meanRecall, meanMrr

In [None]:
class Trainer(object):
    def __init__(self, model, trainGenerator, validGenerator, optim, lossFunc, topN, resultDir):
                
        self.topN = topN
        self.model = model
        self.optim = optim
        self.lossFunc = lossFunc
        self.resultDir = resultDir
        self.device = model.device
        self.evalutor = Evaluation(self.model, self.lossFunc, k=topN)
        
        self.trainGenerator = trainGenerator
        self.validGenerator = validGenerator
        
    def train(self, nEpochs=10):
        for epoch in range(nEpochs):
            st = time.time()
            print('Start Epoch #', epoch)
            
            trainLoss = self.trainEpoch(epoch)
            validLoss, recall, mrr = self.evalutor.evalute(self.validGenerator)
            
            print("Epoch: {}, train loss: {:.4f}, validloss: {:.4f}, recall: {:.4f}, mrr: {:.4f}, time: {}".format(epoch, trainLoss, validLoss, recall, mrr, time.time() - st))
            self.saveModel(epoch, validLoss, trainLoss, recall, mrr) 

    def trainEpoch(self, epoch):
        losses = []
        self.model.train()
        batchSize = float(self.trainGenerator.batchSize)
        hidden = self.model.initHidden(batchSize)
        
        for _ , (input, target, negative, finishedMask, validMask) in tqdm(enumerate(self.trainGenerator), total=self.trainGenerator.totalIters, miniters=1000, position=0, leave=True):
            input = input.to(self.device)
            target = target.to(self.device)            
            hidden = self.model.resetHidden(hidden, finishedMask, validMask)
            logit, hidden = self.model(input, hidden, target) # logit [バッチサイズ、バッチサイズ]

            loss = self.lossFunc(logit, target)   
            loss = (float(len(input)) / batchSize) * loss
            if(~np.isnan(loss.item())):
                losses.append(loss.item())
                loss.backward()
                self.optim.step() 
                self.optim.zero_grad()
            
        meanLoss = np.mean(losses)
        return meanLoss
    
    def saveModel(self, epoch, validLoss, trainLoss, recall, mrr):
        checkPoints = {
              'model': self.model,
              'epoch': epoch,
              'optim': self.optim,
              'validLoss': validLoss,
              'trainLoss': trainLoss,
              'recall': recall,
              'mrr': mrr
        }
        modelName = os.path.join(self.resultDir, "model_{0:05d}.pt".format(epoch))
        torch.save(checkPoints, modelName)
        print("Save model as %s" % modelName)

In [None]:
inputSize = trainDataSet.nItems 
outputSize = inputSize 

In [None]:
model = GRU4Rec(inputSize=inputSize, outputSize=outputSize)

In [None]:
optimizer = Optimizer(model.parameters())   

In [None]:
lossFunc = LossFunction()

In [None]:
trainGenerator = DataGenerator(trainDataSet)
validGenerator = DataGenerator(validDataSet, isTrain=False)

In [None]:
topN = 20
resultDir = 'Results'

In [None]:
trainer = Trainer(model, trainGenerator=trainGenerator, validGenerator=validGenerator, optim=optimizer, lossFunc=lossFunc, topN=topN, resultDir=resultDir)

In [None]:
nEpochs = 1

In [None]:
trainer.train(nEpochs)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BPRLoss(nn.Module):
    def __init__(self):
        super(BPRLoss, self).__init__()

    def forward(self, input):
        # 仮定: input[:, 0] が正例の予測値、input[:, 1:] が負例の予測値
        positive_preds = input[:, 0].unsqueeze(1)  # 形状を [バッチサイズ, 1] に変更
        negative_preds = input[:, 1:]

        # 正例と負例の予測値の差
        diff = positive_preds - negative_preds

        # BPR損失の計算
        loss = -torch.mean(F.logsigmoid(diff))
        return loss

# サンプルの入力値を作成（バッチサイズ = 2, 正例数 = 32, 負例数 = 2048）
sample_input = torch.randn(32, 2080)  # ランダムな予測値

# 損失関数のインスタンスを作成
loss_function = BPRLoss()

# 損失を計算
loss = loss_function(sample_input)

# 期待される出力値（損失）
loss


In [None]:
positive_preds = sample_input[:, 0].unsqueeze(1) 
positive_preds.shape

In [None]:
sample_input = torch.randn(32, 2080)

In [None]:
sample_input

In [None]:
sample_input[:, 0]

In [None]:
sample_input[:, 1]

In [None]:
sample_input[:, 2]