In [1]:
import copy
import torch
import torch.nn as nn
import os
import time
import warnings
import numpy as np
import sys
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Optimizer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset, random_split
import random
import pandas as pd

from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import label_binarize

In [2]:
train_data = np.load(f'./train_data_diginetica.npy', allow_pickle=True)
valid_data = np.load(f'./test_data_diginetica.npy', allow_pickle=True)

In [3]:
dataset = 'diginetica'
attack_type = 'B' # A1: label_poison, A2: gaussian_attack, A3: scaling_attack, A4: reverse_attack
local_learning_rate = 0.01
local_steps= 1
data_path= f"."
learning_rate_decay_gamma= 0.99
learning_rate_decay= False
future_test= False
mu= 1
global_rounds= 100
num_clients= len(valid_data)
join_ratio= 1.0
attack_ratio= 0.0
algorithm= "FedCHAR"
future_ratio= 0.0
finetune_rounds= 0
eval_gap= 1
detailed_info= False
partition= "nature"
initial_rounds= 10
n_clusters= 3
metric= 'cosine'
linkage= 'complete'

In [4]:
seed = 42

cudnn.benchmark = False
cudnn.deterministic = True
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [28]:
#parameter for recommender system
input_size = 889
hidden_size = 100
output_size = input_size
batch_size = 32
K = 5

In [6]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")

Training on cuda:0


In [7]:
# concat all train data as one dataframe
train_combined = np.concatenate(train_data)
#convert to dataframe
train_combined = pd.DataFrame(train_combined)
train_combined.shape

(1455, 14)

In [8]:
# Step 1: Extract unique item IDs from the combined DataFrame
all_unique_items = train_combined[2].unique()

# Step 2: Create a universal item index mapping
universal_item_map = pd.DataFrame({
    'item_idx': np.arange(len(all_unique_items)),
    'itemId': all_unique_items
})

In [9]:
class GRUDataset(Dataset):
    def __init__(self, data, itemmap, session_key='sessionId', item_key='itemId', time_key='time'):
        self.data = data
        self.itemmap = itemmap
        self.session_key = session_key
        self.item_key = item_key
        self.time_key = time_key

        # Map items to indices
        self.data = pd.merge(self.data, self.itemmap, on=self.item_key, how='inner')

        # Sort by session and time
        self.data.sort_values([self.session_key, self.time_key], inplace=True)

        # Group data by session and collect item indices
        self.sessions = self.data.groupby(self.session_key)['item_idx'].apply(list)

    def __len__(self):
        return len(self.sessions)

    def __getitem__(self, index):
        session_items = self.sessions.iloc[index]
        sequence = torch.tensor(session_items[:-1], dtype=torch.long)
        target = torch.tensor(session_items[1:], dtype=torch.long)
        return sequence, target

In [10]:
def collate_fn(batch):
    sequences, targets = zip(*batch)
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=0)
    targets_padded = pad_sequence(targets, batch_first=True, padding_value=-1)
    return sequences_padded, targets_padded

def get_loader(data, itemmap, batch_size=32, shuffle=True):
    dataset = GRUDataset(data, itemmap=itemmap)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)


In [29]:
# # test get_loader
# train_loader = get_loader(train_data[0], universal_item_map, batch_size=batch_size, shuffle=True)
# x, y = next(iter(train_loader))
# y.shape

torch.Size([5, 8])

In [11]:
class PerturbedGradientDescent(Optimizer):
  def __init__(self, params, lr=0.01, mu=0.0):
    default = dict(lr=lr, mu=mu)
    super().__init__(params, default)

  @torch.no_grad()
  def step(self, global_params, device):
    for group in self.param_groups:
      for p, g in zip(group['params'], global_params):
        g = g.to(device)
        d_p = p.grad.data + group['mu'] * (p.data - g.data)
        p.data.add_(d_p, alpha=-group['lr'])

In [12]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        """
        Initialize the GRU model.

        Args:
            input_size (int): The number of expected features in the input `x`
            hidden_size (int): The number of features in the hidden state `h`
            output_size (int): The size of the output layer (number of items)
            num_layers (int, optional): Number of recurrent layers. Default: 1
        """
        super(GRUModel, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Embedding layer
        self.embedding = nn.Embedding(input_size, hidden_size)

        # GRU layer
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        """
        Forward pass through the model.

        Args:
            x: Input data
            hidden: Hidden state

        Returns:
            Output and new hidden state
        """
        # Embedding
        embedded = self.embedding(x)

        # GRU
        output, hidden = self.gru(embedded, hidden)

        # Predict next item
        output = self.fc(output[:, -1, :])

        return output, hidden

    def init_hidden(self, batch_size):
        """
        Initialize the hidden state of the GRU.

        Args:
            batch_size (int): The size of the batch

        Returns:
            Initial hidden state
        """
        return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(DEVICE)

In [13]:
class TOP1MaxLoss(torch.nn.Module):
    def __init__(self):
        super(TOP1MaxLoss, self).__init__()

    def forward(self, scores, targets):
        # Initialize loss
        loss = 0.0

        # Loop over each element in the batch
        for i in range(scores.size(0)):  # Loop over batch
            for j in range(targets.size(1)):  # Loop over sequence
                if targets[i, j] == -1:  # Skip padding
                    continue

                # Get the score of the target item
                pos_score = scores[i, targets[i, j]]

                # Calculate the difference with all other items
                diff = -torch.sigmoid(pos_score - scores[i])

                # Exclude the positive item from the loss
                diff[targets[i, j]] = 0

                # Add to the total loss
                loss += torch.sum(diff)

        # Average the loss
        loss = loss / (scores.size(0) * targets.size(1))

        return loss

In [None]:
class Client(object):
  """
  Base class for clients in federated learning.
  """

  def __init__(self, model, id, malicious, **kwargs):
    self.model = copy.deepcopy(model)
    self.dataset = dataset
    self.device = DEVICE
    self.id = id
    self.malicious = malicious
    self.attack_type = attack_type
    self.num_classes = output_size
    self.batch_size = batch_size
    self.learning_rate = local_learning_rate
    self.local_steps = local_steps
    self.data_path = data_path
    self.learning_rate_decay = learning_rate_decay
    self.future_test = future_test


    # check BatchNorm
    self.has_BatchNorm = False
    for layer in self.model.children():
      if isinstance(layer, nn.BatchNorm2d):
        self.has_BatchNorm = True
        break

    self.loss = TOP1MaxLoss()  # Replace with your loss function
    self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate) # momentum=0.9, weight_decay=1e-4
    self.learning_rate_scheduler = torch.optim.lr_scheduler.ExponentialLR(
      optimizer=self.optimizer,
      gamma=learning_rate_decay_gamma
    )

  def load_train_data(self, batch_size=None):
    if batch_size == None:
      batch_size = self.batch_size
    train_data = get_loader(train_data[self.id], itemmap=universal_item_map, batch_size=batch_size)

    # label poison attack
    if self.malicious and self.attack_type == 'A1':
      for idx in range(len(train_data)):
        train_data[idx][1] = self.num_classes - train_data[idx][1] - 1
    self.train_samples = len(train_data)
    return train_data

  def load_test_data(self, batch_size=None):
    """
    fine-tunes the model using the loaded training data
    """
    if batch_size == None:
      batch_size = self.batch_size
    test_data = get_loader(valid_data[self.id], itemmap=universal_item_map, batch_size=batch_size)
    return test_data
  
  def set_parameters(self, model):
    for new_param, old_param in zip(model.parameters(), self.model.parameters()):
      old_param.data = new_param.data.clone()

  def fine_tuning(self):
    trainloader = self.load_train_data()
    self.model.train()

    for i, (x, y) in enumerate(trainloader):
      # if type(x) == type([]):
      #   x[0] = x[0].to(self.device)
      # else:
      #   x = x.to(self.device)
      x = x.to(self.device)
      y = y.to(self.device)
      self.optimizer.zero_grad()
      hidden = self.model.init_hidden(x.size(0))
      output, _ = self.model(x, hidden)
      # output = self.model(x)
      loss = self.loss(output, y)
      loss.backward()
      self.optimizer.step()

  def new_test_metrics(self):
    """
    evaluates the model's performance on test data, particularly its accuracy.
    """

    testloaderfull = self.load_test_data()
    self.model.eval()

    total_recall = 0.0
    total_mrr = 0.0
    test_num = 0
    y_prob = [] #model outputs or probabilities
    y_true = []

    with torch.no_grad():
      for x, y in testloaderfull:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        x = x.to(self.device)
        y = y.to(self.device)
        hidden = self.model.init_hidden(x.size(0))
        # output = self.model(x)
        output, _ = self.model(x, hidden)

        # Select top-k items
        _, top_k_indices = torch.topk(output, K, dim=1)

        # test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
        # test_num += y.shape[0]

        # Calculate recall and MRR for each batch
        for i in range(x.size(0)):
          for y_item in y[i]:
            if y_item == -1:
              continue
            target_item_scalar = y_item.item()
            top_k_items = top_k_indices[i].tolist()

            # Calculate Recall@k
            if target_item_scalar in top_k_items:
              total_recall += 1

            # Calculate MRR@k
            if target_item_scalar in top_k_items:
              rank = top_k_items.index(target_item_scalar)
              total_mrr += 1 / (rank + 1)
          
          test_num += len(y[i][y[i] != -1])  # Count non-padding elements

        y_prob.append(output.detach().cpu().numpy())
        nc = self.num_classes
        if self.num_classes == 2:
          nc += 1
        lb = label_binarize(y.detach().cpu().numpy(), classes=np.arange(nc))
        if self.num_classes == 2:
          lb = lb[:, :2]
        y_true.append(lb)

    y_prob = np.concatenate(y_prob, axis=0)
    y_true = np.concatenate(y_true, axis=0)


    return total_recall, total_mrr, test_num

  def new_train_metrics(self):
    """
    evaluates the model's loss on the training data.
    """

    trainloader = self.load_train_data()
    self.model.eval()

    train_num = 0
    losses = 0.0
    with torch.no_grad():
      for x, y in trainloader:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        x = x.to(self.device)
        y = y.to(self.device)
        hidden = self.model.init_hidden(x.size(0))
        output, _ = self.model(x, hidden)
        # output = self.model(x)
        # calculate losses
        loss = self.loss(output, y)
        train_num += y.shape[0]
        losses += loss * y.shape[0]
        # loss = self.loss(output, y)
        # train_num += y.shape[0]
        # losses += loss.item() * y.shape[0]

    return losses, train_num

  def test_metrics_personalized(self):
    testloaderfull = self.load_test_data()

    self.model.eval()

    test_acc = 0
    test_num = 0
    y_prob = []
    y_true = []

    with torch.no_grad():
      for x, y in testloaderfull:
        if type(x) == type([]):
          x[0] = x[0].to(self.device)
        else:
          x = x.to(self.device)
        y = y.to(self.device)
        output = self.model(x)

        test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
        test_num += y.shape[0]

        y_prob.append(output.detach().cpu().numpy())
        nc = self.num_classes
        if self.num_classes == 2:
          nc += 1
        lb = label_binarize(y.detach().cpu().numpy(), classes=np.arange(nc))
        if self.num_classes == 2:
          lb = lb[:, :2]
        y_true.append(lb)

    y_prob = np.concatenate(y_prob, axis=0)
    y_true = np.concatenate(y_true, axis=0)

    return test_acc, test_num

  def train_metrics_personalized(self):
    trainloader = self.load_train_data()

    self.model.eval()

    train_num = 0
    losses = 0
    with torch.no_grad():
      for x, y in trainloader:
        if type(x) == type([]):
          x[0] = x[0].to(self.device)
        else:
          x = x.to(self.device)
        y = y.to(self.device)
        output = self.model(x)
        loss = self.loss(output, y)
        train_num += y.shape[0]
        losses += loss.item() * y.shape[0]

    return losses, train_num