# Initialization

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
from copy import deepcopy
import copy
import pickle

In [None]:
import argparse
import torch
import numpy as np
import os
import datetime
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import TransformerEncoder
from torch.nn import TransformerEncoderLayer
from torch.nn import Module, Parameter
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import time
import math

from collections import OrderedDict
from typing import List, Tuple, Union
import matplotlib.pyplot as plt

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")

In [None]:
import random
SEED = 42

torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

In [None]:
class Options:
    def __init__(self):
      self.dataset = 'diginetica'
      self.batchSize = 100
      self.hiddenSize = 96
      self.nhead = 2
      self.layer = 1
      self.feedforward = 4
      self.epoch = 12
      self.lr = 0.001
      self.lr_dc = 0.1
      self.lr_dc_step = 3
      self.l2 = 1e-5
      self.patience = 12

opt = Options()

# Now you can access parameters like this:
print(opt.dataset)

In [None]:
train_data = []
valid_data = []

for i in range(19):
  if os.path.isfile(f'{working_dir}/SR-SAN/diginetica_client2/train_{i}.txt'):
    print(i)
    tr_data = pickle.load(open(f'./SR_SAN_Diginetica/train_{i}.txt', 'rb'))
    ts_data = pickle.load(open(f'./SR_SAN_Diginetica/test_{i}.txt', 'rb'))

    train_data.append(tr_data)
    valid_data.append(ts_data)

print(f"len train: {len(train_data)}")
print(f"len validation: {len(valid_data)}")

In [None]:
def data_masks(all_usr_pois, item_tail):
  us_lens = [len(upois) for upois in all_usr_pois]
  len_max = max(us_lens)
  us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
  us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
  return us_pois, us_msks, len_max

In [None]:
class Data():
    def __init__(self, data, shuffle=False, graph=None):
      inputs = data[0]
      inputs, mask, len_max = data_masks(inputs, [0])
      self.inputs = np.asarray(inputs)
      self.mask = np.asarray(mask)
      self.len_max = len_max
      self.targets = np.asarray(data[1])
      self.length = len(inputs)
      self.shuffle = shuffle
      self.graph = graph

    def generate_batch(self, batch_size):
      if self.shuffle:
        shuffled_arg = np.arange(self.length)
        np.random.shuffle(shuffled_arg)
        self.inputs = self.inputs[shuffled_arg]
        self.mask = self.mask[shuffled_arg]
        self.targets = self.targets[shuffled_arg]
      n_batch = int(self.length / batch_size)
      if self.length % batch_size != 0:
        n_batch += 1
      slices = np.split(np.arange(n_batch * batch_size), n_batch)
      slices[-1] = slices[-1][:(self.length - batch_size * (n_batch - 1))]
      return slices

    def get_slice(self, i):
      inputs, mask, targets = self.inputs[i], self.mask[i], self.targets[i]
      items, n_node, A, alias_inputs = [], [], [], []
      for u_input in inputs:
        n_node.append(len(np.unique(u_input)))
      max_n_node = np.max(n_node)
      for u_input in inputs:
        node = np.unique(u_input)
        items.append(node.tolist() + (max_n_node - len(node)) * [0])
        u_A = np.zeros((max_n_node, max_n_node))
        for i in np.arange(len(u_input) - 1):
          if u_input[i + 1] == 0:
            break
          u = np.where(node == u_input[i])[0][0]
          v = np.where(node == u_input[i + 1])[0][0]
          u_A[u][v] = 1
        u_sum_in = np.sum(u_A, 0)
        u_sum_in[np.where(u_sum_in == 0)] = 1
        u_A_in = np.divide(u_A, u_sum_in)
        u_sum_out = np.sum(u_A, 1)
        u_sum_out[np.where(u_sum_out == 0)] = 1
        u_A_out = np.divide(u_A.transpose(), u_sum_out)
        u_A = np.concatenate([u_A_in, u_A_out]).transpose()
        A.append(u_A)
        alias_inputs.append([np.where(node == i)[0][0] for i in u_input])
      return alias_inputs, A, items, mask, targets

    def __len__(self):
      return self.length  # or return len(self.inputs)

In [None]:
class SelfAttentionNetwork(Module):
  def __init__(self, opt, n_node):
    super(SelfAttentionNetwork, self).__init__()
    self.hidden_size = opt.hiddenSize
    self.n_node = n_node
    self.batch_size = opt.batchSize
    self.embedding = nn.Embedding(self.n_node, self.hidden_size)
    self.transformerEncoderLayer = TransformerEncoderLayer(d_model=self.hidden_size, nhead=opt.nhead,dim_feedforward=self.hidden_size * opt.feedforward)
    self.transformerEncoder = TransformerEncoder(self.transformerEncoderLayer, opt.layer)
    self.loss_function = nn.CrossEntropyLoss()
    self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
    self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)

  def compute_scores(self, hidden, mask):
    ht = hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1]  # batch_size x latent_size
    b = self.embedding.weight[1:]  # n_nodes x latent_size
    scores = torch.matmul(ht, b.transpose(1, 0))
    return scores

  def forward(self, inputs, A):
    hidden = self.embedding(inputs)
    hidden = hidden.transpose(0,1).contiguous()
    hidden = self.transformerEncoder(hidden)
    hidden = hidden.transpose(0,1).contiguous()
    return hidden

In [None]:
def trans_to_cuda(variable):
  if torch.cuda.is_available():
    return variable.cuda()
  else:
    return variable

In [None]:
def trans_to_cpu(variable):
  if torch.cuda.is_available():
    return variable.cpu()
  else:
    return variable

In [None]:
def forward(model, i, data):
  alias_inputs, A, items, mask, targets = data.get_slice(i)
  alias_inputs = trans_to_cuda(torch.Tensor(alias_inputs).long())
  items = trans_to_cuda(torch.Tensor(items).long())
  A = trans_to_cuda(torch.Tensor(A).float())
  mask = trans_to_cuda(torch.Tensor(mask).long())
  hidden = model(items, A)
  get = lambda i: hidden[i][alias_inputs[i]]
  seq_hidden = torch.stack([get(i) for i in torch.arange(len(alias_inputs)).long()])
  return targets, model.compute_scores(seq_hidden, mask)

In [None]:
def train_test(model, train_data, test_data):
  print('start training: ', datetime.datetime.now())
  model.train()
  total_loss = 0.0
  slices = train_data.generate_batch(model.batch_size)
  for i, j in zip(slices, np.arange(len(slices))):
    model.optimizer.zero_grad()
    targets, scores = forward(model, i, train_data)
    targets = trans_to_cuda(torch.Tensor(targets).long())
    loss = model.loss_function(scores, targets - 1)
    loss.backward()
    model.optimizer.step()
    total_loss += loss
    if j % int(len(slices) / 5 + 1) == 0:
      print('[%d/%d] Loss: %.4f' % (j, len(slices), loss.item()))
  print('\tLoss:\t%.3f' % total_loss)

  print('start predicting: ', datetime.datetime.now())
  model.eval()
  hit, mrr = [], []
  slices = test_data.generate_batch(model.batch_size)
  for i in slices:
    targets, scores = forward(model, i, test_data)
    sub_scores = scores.topk(5)[1]
    sub_scores = trans_to_cpu(sub_scores).detach().numpy()
    for score, target, mask in zip(sub_scores, targets, test_data.mask):
      hit.append(np.isin(target - 1, score))
      if len(np.where(score == target - 1)[0]) == 0:
        mrr.append(0)
      else:
        mrr.append(1 / (np.where(score == target - 1)[0][0] + 1))
  hit = np.mean(hit) * 100
  mrr = np.mean(mrr) * 100
  model.scheduler.step()
  return hit, mrr