# Initialization

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
from copy import deepcopy
import copy
import pickle

In [2]:
import argparse
import torch
import numpy as np
import os
import datetime
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import TransformerEncoder
from torch.nn import TransformerEncoderLayer
from torch.nn import Module, Parameter
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import time
import math

from collections import OrderedDict
from typing import List, Tuple, Union
import matplotlib.pyplot as plt

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")

Training on cuda:0


In [3]:
import random
SEED = 42

torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

In [4]:
class Options:
    def __init__(self):
      self.dataset = 'diginetica'
      self.batchSize = 32
      self.hiddenSize = 96
      self.nhead = 2
      self.layer = 1
      self.feedforward = 4
      self.epoch = 12
      self.lr = 0.001
      self.lr_dc = 0.1
      self.lr_dc_step = 3
      self.l2 = 1e-5
      self.patience = 12

opt = Options()

# Now you can access parameters like this:
print(opt.dataset)

diginetica


In [5]:
train_data = []
valid_data = []

for i in range(45):
  if os.path.isfile(f'./SR_SAN_Diginetica/train_{i}.txt'):
    print(i)
    tr_data = pickle.load(open(f'./SR_SAN_Diginetica/train_{i}.txt', 'rb'))
    ts_data = pickle.load(open(f'./SR_SAN_Diginetica/test_{i}.txt', 'rb'))

    train_data.append(tr_data)
    valid_data.append(ts_data)

print(f"len train: {len(train_data)}")
print(f"len validation: {len(valid_data)}")

0
3
7
16
len train: 4
len validation: 4


In [6]:
# def data_masks(all_usr_pois, item_tail):
#   us_lens = [len(upois) for upois in all_usr_pois]
#   len_max = max(us_lens)
#   us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
#   us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
#   return us_pois, us_msks, len_max

In [7]:
def data_masks(all_usr_pois, item_tail):
    if not all_usr_pois or all(len(upois) == 0 for upois in all_usr_pois):
        raise ValueError("Input all_usr_pois is empty or contains only empty lists")

    us_lens = [len(upois) for upois in all_usr_pois]
    len_max = max(us_lens)
    us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
    us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
    return us_pois, us_msks, len_max

In [8]:
class Data():
    def __init__(self, data, shuffle=False, graph=None):
      inputs = data[0]
      inputs, mask, len_max = data_masks(inputs, [0])
      self.inputs = np.asarray(inputs)
      self.mask = np.asarray(mask)
      self.len_max = len_max
      self.targets = np.asarray(data[1])
      self.length = len(inputs)
      self.shuffle = shuffle
      self.graph = graph

    def generate_batch(self, batch_size):
      if self.shuffle:
        shuffled_arg = np.arange(self.length)
        np.random.shuffle(shuffled_arg)
        self.inputs = self.inputs[shuffled_arg]
        self.mask = self.mask[shuffled_arg]
        self.targets = self.targets[shuffled_arg]
      n_batch = int(self.length / batch_size)
      if self.length % batch_size != 0:
        n_batch += 1
      slices = np.split(np.arange(n_batch * batch_size), n_batch)
      slices[-1] = slices[-1][:(self.length - batch_size * (n_batch - 1))]
      return slices

    def get_slice(self, i):
      inputs, mask, targets = self.inputs[i], self.mask[i], self.targets[i]
      items, n_node, A, alias_inputs = [], [], [], []
      for u_input in inputs:
        n_node.append(len(np.unique(u_input)))
      max_n_node = np.max(n_node)
      for u_input in inputs:
        node = np.unique(u_input)
        items.append(node.tolist() + (max_n_node - len(node)) * [0])
        u_A = np.zeros((max_n_node, max_n_node))
        for i in np.arange(len(u_input) - 1):
          if u_input[i + 1] == 0:
            break
          u = np.where(node == u_input[i])[0][0]
          v = np.where(node == u_input[i + 1])[0][0]
          u_A[u][v] = 1
        u_sum_in = np.sum(u_A, 0)
        u_sum_in[np.where(u_sum_in == 0)] = 1
        u_A_in = np.divide(u_A, u_sum_in)
        u_sum_out = np.sum(u_A, 1)
        u_sum_out[np.where(u_sum_out == 0)] = 1
        u_A_out = np.divide(u_A.transpose(), u_sum_out)
        u_A = np.concatenate([u_A_in, u_A_out]).transpose()
        A.append(u_A)
        alias_inputs.append([np.where(node == i)[0][0] for i in u_input])
      return alias_inputs, A, items, mask, targets

    def __len__(self):
      return self.length  # or return len(self.inputs)

In [9]:
class SelfAttentionNetwork(Module):
  def __init__(self, opt, n_node):
    super(SelfAttentionNetwork, self).__init__()
    self.hidden_size = opt.hiddenSize
    self.n_node = n_node
    self.batch_size = opt.batchSize
    self.embedding = nn.Embedding(self.n_node, self.hidden_size)
    self.transformerEncoderLayer = TransformerEncoderLayer(d_model=self.hidden_size, nhead=opt.nhead,dim_feedforward=self.hidden_size * opt.feedforward)
    self.transformerEncoder = TransformerEncoder(self.transformerEncoderLayer, opt.layer)
    self.loss_function = nn.CrossEntropyLoss()
    self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
    self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)

  def compute_scores(self, hidden, mask):
    ht = hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1]  # batch_size x latent_size
    b = self.embedding.weight[1:]  # n_nodes x latent_size
    scores = torch.matmul(ht, b.transpose(1, 0))
    return scores

  def forward(self, inputs, A):
    hidden = self.embedding(inputs)
    hidden = hidden.transpose(0,1).contiguous()
    hidden = self.transformerEncoder(hidden)
    hidden = hidden.transpose(0,1).contiguous()
    return hidden

In [10]:
def trans_to_cuda(variable):
  if torch.cuda.is_available():
    return variable.cuda()
  else:
    return variable

In [11]:
def trans_to_cpu(variable):
  if torch.cuda.is_available():
    return variable.cpu()
  else:
    return variable

In [12]:
def forward(model, i, data):
  alias_inputs, A, items, mask, targets = data.get_slice(i)
  alias_inputs = trans_to_cuda(torch.Tensor(alias_inputs).long())
  items = trans_to_cuda(torch.Tensor(items).long())
  A = trans_to_cuda(torch.Tensor(A).float())
  mask = trans_to_cuda(torch.Tensor(mask).long())
  hidden = model(items, A)
  get = lambda i: hidden[i][alias_inputs[i]]
  seq_hidden = torch.stack([get(i) for i in torch.arange(len(alias_inputs)).long()])
  return targets, model.compute_scores(seq_hidden, mask)

In [13]:
def test(model, test_data, device):
    print('start predicting: ', datetime.datetime.now())
    model.to(device)
    model.eval()
    hit, mrr = [], []
    slices = test_data.generate_batch(model.batch_size)

    for i in slices:
        targets, scores = forward(model, i, test_data)
        # targets = targets.to(device)  # Ensure targets are on the correct device
        sub_scores = scores.topk(5)[1]
        sub_scores = sub_scores.cpu().detach().numpy()  # Move to CPU if necessary

        for score, target, mask in zip(sub_scores, targets, test_data.mask):
            hit.append(np.isin(target - 1, score))
            mrr_index = np.where(score == target - 1)[0]
            mrr.append(0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1))

    hit = np.mean(hit) * 100
    mrr = np.mean(mrr) * 100

    # model.scheduler.step()  # Typically used in training, not testing

    return hit, mrr


In [14]:
def train(model, train_data, test_data, epochs, device):
    print('start training: ', datetime.datetime.now())

    model.to(device)
    model.train()

    for epoch in range(epochs):
        total_loss = 0.0
        slices = train_data.generate_batch(model.batch_size)
        num_batches = len(slices)

        for i, j in zip(slices, np.arange(num_batches)):
            model.optimizer.zero_grad()
            targets, scores = forward(model, i, train_data)
            targets = trans_to_cuda(torch.Tensor(targets).long())
            # targets = targets.to(device)  # Ensure targets are on the correct device
            loss = model.loss_function(scores, targets - 1)
            loss.backward()
            model.optimizer.step()
            total_loss += loss.item()  # Use .item() to get the scalar value

            if j % int(num_batches / 5 + 1) == 0:
                print(f'[{j}/{num_batches}] Loss: {loss.item():.4f}')

        avg_loss = total_loss / num_batches
        print(f"Epoch: {epoch}, Average Loss: {avg_loss:.4f}")

        # Evaluate the model
        hit, mrr = test(model, test_data, device)

        model.scheduler.step()
    
    return hit, mrr

In [15]:
hit_list = []
mrr_list = []

if opt.dataset == 'diginetica':
  n_node = 889
else:
  n_node = 37484

#iterate over all clients
for i in range(len(train_data)):
  print(f"Client {i}")
  train_data_i = Data(train_data[i], shuffle=True)
  test_data_i = Data(valid_data[i], shuffle=False)
  model = trans_to_cuda(SelfAttentionNetwork(opt, n_node))
  hit, mrr = train(model, train_data_i, test_data_i, opt.epoch, DEVICE)
  hit_list.append(hit)
  mrr_list.append(mrr)

  #print hit and mrr for each client
  print(f"Hit: {hit:.4f}")
  print(f"MRR: {mrr:.4f}")

#print average hit and mrr over all clients
print(f"Average Hit: {np.mean(hit_list):.4f}")
print(f"Average MRR: {np.mean(mrr_list):.4f}")

Client 0




start training:  2023-12-04 02:36:27.021106


  A = trans_to_cuda(torch.Tensor(A).float())


[0/1] Loss: 6.8144
Epoch: 0, Average Loss: 6.8144
start predicting:  2023-12-04 02:36:28.683648
[0/1] Loss: 6.7674
Epoch: 1, Average Loss: 6.7674
start predicting:  2023-12-04 02:36:28.800969
[0/1] Loss: 6.7176
Epoch: 2, Average Loss: 6.7176
start predicting:  2023-12-04 02:36:28.811938
[0/1] Loss: 6.6711
Epoch: 3, Average Loss: 6.6711
start predicting:  2023-12-04 02:36:28.820942
[0/1] Loss: 6.6666
Epoch: 4, Average Loss: 6.6666
start predicting:  2023-12-04 02:36:28.829918
[0/1] Loss: 6.6620
Epoch: 5, Average Loss: 6.6620
start predicting:  2023-12-04 02:36:28.838866
[0/1] Loss: 6.6575
Epoch: 6, Average Loss: 6.6575
start predicting:  2023-12-04 02:36:28.848627
[0/1] Loss: 6.6570
Epoch: 7, Average Loss: 6.6570
start predicting:  2023-12-04 02:36:28.857604
[0/1] Loss: 6.6565
Epoch: 8, Average Loss: 6.6565
start predicting:  2023-12-04 02:36:28.867578
[0/1] Loss: 6.6561
Epoch: 9, Average Loss: 6.6561
start predicting:  2023-12-04 02:36:28.876554
[0/1] Loss: 6.6560
Epoch: 10, Average Lo

ValueError: Input all_usr_pois is empty or contains only empty lists

In [18]:
valid_data[2]

([], [])