In [1]:
import copy
import torch
import torch.nn as nn
import os
import time
import warnings
import numpy as np
import sys
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Optimizer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset, random_split
from torch.nn import TransformerEncoder
from torch.nn import TransformerEncoderLayer, Module
import random
import pandas as pd
import pickle
import math

from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import label_binarize

In [2]:
# raw_tr_data = np.load(f'./train_data_diginetica.npy', allow_pickle=True)
# raw_val_data = np.load(f'./test_data_diginetica.npy', allow_pickle=True)

In [3]:
raw_tr_data = []
raw_val_data = []
list_valid_users = []

for i in range(45):
  if os.path.isfile(f'./SR_SAN_Diginetica/train_{i}.txt'):
    # print(i)
    list_valid_users.append(i)
    tr_data = pickle.load(open(f'./SR_SAN_Diginetica/train_{i}.txt', 'rb'))
    ts_data = pickle.load(open(f'./SR_SAN_Diginetica/test_{i}.txt', 'rb'))

    raw_tr_data.append(tr_data)
    raw_val_data.append(ts_data)

print(f"len train: {len(raw_tr_data)}")
print(f"len validation: {len(raw_val_data)}")

len train: 21
len validation: 21


In [4]:
print(list_valid_users)

[0, 3, 7, 8, 9, 16, 21, 23, 24, 25, 28, 31, 32, 35, 36, 37, 39, 41, 42, 43, 44]


In [5]:
dataset = 'diginetica'
attack_type = 'B' # A1: label_poison, A2: gaussian_attack, A3: scaling_attack, A4: reverse_attack
local_learning_rate = 0.01
local_steps= 1
data_path= f"."
learning_rate_decay_gamma= 0.99
learning_rate_decay= False
future_test= False
mu= 1
global_rounds= 50
num_clients= len(raw_val_data)
join_ratio= 1.0
attack_ratio= 0.0
algorithm= "FedCHAR"
future_ratio= 0.0
finetune_rounds= 0
eval_gap= 1
detailed_info= False
partition= "nature"
initial_rounds= 10
n_clusters= 3
metric= 'cosine'
linkage= 'complete'
output_size = 889
n_node = 889

In [6]:
print(num_clients)

21


In [7]:
seed = 42

cudnn.benchmark = False
cudnn.deterministic = True
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [8]:
#parameter for recommender system
# input_size = 889
# hidden_size = 400
# num_layers = 3
# output_size = input_size
# batch_size = 10
K = 5

In [9]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")

Training on cuda:0


In [10]:
class Options:
    def __init__(self):
      self.dataset = 'diginetica'
      self.batchSize = 32
      self.hiddenSize = 200
      self.nhead = 2
      self.layer = 3
      self.feedforward = 4
      self.epoch = 12
      self.lr = 0.001
      self.lr_dc = 0.1
      self.lr_dc_step = 3
      self.l2 = 1e-5
      self.patience = 12

opt = Options()

# Now you can access parameters like this:
print(opt.dataset)

diginetica


In [11]:
def data_masks(all_usr_pois, item_tail):
    if not all_usr_pois or all(len(upois) == 0 for upois in all_usr_pois):
        raise ValueError("Input all_usr_pois is empty or contains only empty lists")

    us_lens = [len(upois) for upois in all_usr_pois]
    len_max = max(us_lens)
    us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
    us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
    return us_pois, us_msks, len_max

In [12]:
class Data():
    def __init__(self, data, shuffle=False, graph=None):
      inputs = data[0]
      inputs, mask, len_max = data_masks(inputs, [0])
      self.inputs = np.asarray(inputs)
      self.mask = np.asarray(mask)
      self.len_max = len_max
      self.targets = np.asarray(data[1])
      self.length = len(inputs)
      self.shuffle = shuffle
      self.graph = graph

    def generate_batch(self, batch_size):
      if self.shuffle:
        shuffled_arg = np.arange(self.length)
        np.random.shuffle(shuffled_arg)
        self.inputs = self.inputs[shuffled_arg]
        self.mask = self.mask[shuffled_arg]
        self.targets = self.targets[shuffled_arg]
      n_batch = int(self.length / batch_size)
      if self.length % batch_size != 0:
        n_batch += 1
      slices = np.split(np.arange(n_batch * batch_size), n_batch)
      slices[-1] = slices[-1][:(self.length - batch_size * (n_batch - 1))]
      return slices

    def get_slice(self, i):
      inputs, mask, targets = self.inputs[i], self.mask[i], self.targets[i]
      items, n_node, A, alias_inputs = [], [], [], []
      for u_input in inputs:
        n_node.append(len(np.unique(u_input)))
      max_n_node = np.max(n_node)
      for u_input in inputs:
        node = np.unique(u_input)
        items.append(node.tolist() + (max_n_node - len(node)) * [0])
        u_A = np.zeros((max_n_node, max_n_node))
        for i in np.arange(len(u_input) - 1):
          if u_input[i + 1] == 0:
            break
          u = np.where(node == u_input[i])[0][0]
          v = np.where(node == u_input[i + 1])[0][0]
          u_A[u][v] = 1
        u_sum_in = np.sum(u_A, 0)
        u_sum_in[np.where(u_sum_in == 0)] = 1
        u_A_in = np.divide(u_A, u_sum_in)
        u_sum_out = np.sum(u_A, 1)
        u_sum_out[np.where(u_sum_out == 0)] = 1
        u_A_out = np.divide(u_A.transpose(), u_sum_out)
        u_A = np.concatenate([u_A_in, u_A_out]).transpose()
        A.append(u_A)
        alias_inputs.append([np.where(node == i)[0][0] for i in u_input])
      return alias_inputs, A, items, mask, targets

    def __len__(self):
      return self.length  # or return len(self.inputs)

In [13]:
class PerturbedGradientDescent(Optimizer):
  def __init__(self, params, lr=0.01, mu=0.0):
    default = dict(lr=lr, mu=mu)
    super().__init__(params, default)

  @torch.no_grad()
  def step(self, global_params, device):
    for group in self.param_groups:
      for p, g in zip(group['params'], global_params):
        g = g.to(device)
        # print(p.grad)
        d_p = p.grad.data + group['mu'] * (p.data - g.data)
        p.data.add_(d_p, alpha=-group['lr'])

In [14]:
class SelfAttentionNetwork(Module):
  def __init__(self, opt, n_node):
    super(SelfAttentionNetwork, self).__init__()
    self.hidden_size = opt.hiddenSize
    self.n_node = n_node
    self.batch_size = opt.batchSize
    self.embedding = nn.Embedding(self.n_node, self.hidden_size)
    self.transformerEncoderLayer = TransformerEncoderLayer(d_model=self.hidden_size, nhead=opt.nhead,dim_feedforward=self.hidden_size * opt.feedforward)
    self.transformerEncoder = TransformerEncoder(self.transformerEncoderLayer, opt.layer)
    self.loss_function = nn.CrossEntropyLoss()
    self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
    self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)
    self.reset_parameters()

  def reset_parameters(self):
    stdv = 1.0 / math.sqrt(self.hidden_size)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)

  def compute_scores(self, hidden, mask):
    ht = hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1]  # batch_size x latent_size
    b = self.embedding.weight[1:]  # n_nodes x latent_size
    scores = torch.matmul(ht, b.transpose(1, 0))
    return scores

  def forward(self, inputs, A):
    hidden = self.embedding(inputs)
    hidden = hidden.transpose(0,1).contiguous()
    hidden = self.transformerEncoder(hidden)
    hidden = hidden.transpose(0,1).contiguous()
    return hidden

In [15]:
def trans_to_cuda(variable):
  if torch.cuda.is_available():
    return variable.cuda()
  else:
    return variable

In [16]:
def trans_to_cpu(variable):
  if torch.cuda.is_available():
    return variable.cpu()
  else:
    return variable

In [17]:
def forward(model, i, data):
  alias_inputs, A, items, mask, targets = data.get_slice(i)
  alias_inputs = trans_to_cuda(torch.Tensor(alias_inputs).long())
  items = trans_to_cuda(torch.Tensor(items).long())
  A = trans_to_cuda(torch.Tensor(A).float())
  mask = trans_to_cuda(torch.Tensor(mask).long())
  hidden = model(items, A)
  get = lambda i: hidden[i][alias_inputs[i]]
  seq_hidden = torch.stack([get(i) for i in torch.arange(len(alias_inputs)).long()])
  return targets, model.compute_scores(seq_hidden, mask)

In [18]:
def calculate_recall_at_k(scores, targets, k=K):
    recall_at_k = []
    top_k_scores = scores.topk(k)[1]
    for score, target in zip(top_k_scores, targets):
        recall_at_k.append((target - 1) in score)
    return np.mean(recall_at_k)

In [19]:
class Client(object):
  """
  Base class for clients in federated learning.
  """

  def __init__(self, model, id, malicious, **kwargs):
    self.model = copy.deepcopy(model)
    self.dataset = dataset
    self.device = DEVICE
    self.id = id
    self.malicious = malicious
    self.attack_type = attack_type
    self.num_classes = output_size
    self.batch_size = opt.batchSize
    self.learning_rate = local_learning_rate
    self.local_steps = local_steps
    self.data_path = data_path
    self.learning_rate_decay = learning_rate_decay
    self.future_test = future_test


    # check BatchNorm
    self.has_BatchNorm = False
    for layer in self.model.children():
      if isinstance(layer, nn.BatchNorm2d):
        self.has_BatchNorm = True
        break

    self.loss = nn.CrossEntropyLoss()  # Replace with your loss function
    self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate) # momentum=0.9, weight_decay=1e-4
    self.learning_rate_scheduler = torch.optim.lr_scheduler.ExponentialLR(
      optimizer=self.optimizer,
      gamma=learning_rate_decay_gamma
    )

  def load_train_data(self, batch_size=None):
    if batch_size == None:
      batch_size = self.batch_size
    train_data = Data(raw_tr_data[self.id], shuffle=True)

    # label poison attack
    if self.malicious and self.attack_type == 'A1':
      for idx in range(len(train_data)):
        train_data[idx][1] = self.num_classes - train_data[idx][1] - 1
    self.train_samples = len(train_data)
    return train_data

  def load_test_data(self, batch_size=None):
    """
    fine-tunes the model using the loaded training data
    """
    if batch_size == None:
      batch_size = self.batch_size
    test_data = Data(raw_val_data[self.id], shuffle=False)
    return test_data
  
  def set_parameters(self, model):
    for new_param, old_param in zip(model.parameters(), self.model.parameters()):
      old_param.data = new_param.data.clone()

  def fine_tuning(self):
    trainloader = self.load_train_data()
    self.model.train()
    slices = trainloader.generate_batch(self.model.batch_size)
    num_batches = len(slices)

    for i, j in zip(slices, np.arange(num_batches)):
      # if type(x) == type([]):
      #   x[0] = x[0].to(self.device)
      # else:
      #   x = x.to(self.device)
      self.optimizer.zero_grad()
      targets, scores = forward(self.model, i, trainloader)
      targets = trans_to_cuda(torch.Tensor(targets).long())
      # output = self.model(x)
      loss = self.loss(scores, targets - 1)
      loss.backward()
      self.optimizer.step()

  def new_test_metrics(self):
    """
    evaluates the model's performance on test data, particularly its accuracy.
    """

    testloaderfull = self.load_test_data()
    self.model.eval()

    total_recall = 0.0
    total_mrr = 0.0
    test_num = 0
    slices = testloaderfull.generate_batch(self.model.batch_size)

    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model, i, testloaderfull)
        targets = torch.Tensor(targets).long().to(DEVICE)

        sub_scores = scores.topk(K)[1] #top-k items
        sub_scores = sub_scores.cpu().detach().numpy()  # Move to CPU if necessary

        # Calculate recall and MRR for each batch
        targets_np = targets.cpu().numpy()
        for score, target_np, mask in zip(sub_scores, targets_np, testloaderfull.mask):
          mrr_index = np.where(score == target_np - 1)[0]
          # total_mrr.append(0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1))
          total_mrr += 0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1)

        recall = calculate_recall_at_k(scores, targets, k=5)
        # total_recall.append(recall)
        total_recall += recall          
        
    test_num = len(slices)
    # total_recall = np.mean(total_recall)
    # total_mrr = np.mean(total_mrr)


    return total_recall, total_mrr, test_num

  def new_train_metrics(self):
    """
    evaluates the model's loss on the training data.
    """

    trainloader = self.load_train_data()
    self.model.eval()

    train_num = 0
    losses = 0.0
    slices = trainloader.generate_batch(self.model.batch_size)
    train_num = len(slices)

    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model, i, trainloader)
        targets = torch.Tensor(targets).long().to(DEVICE)
        # output = self.model(x)
        # calculate losses
        loss = self.loss(scores, targets - 1)
        losses += loss.item()
        # loss = self.loss(output, y)
        # train_num += y.shape[0]
        # losses += loss.item() * y.shape[0]

    return losses, train_num

  def test_metrics_personalized(self):
    testloaderfull = self.load_test_data()

    self.model.eval()

    total_recall = 0.0
    total_mrr = 0.0
    test_num = 0
    # y_prob = []
    # y_true = []
    slices = testloaderfull.generate_batch(self.model.batch_size)

    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model, i, testloaderfull)
        targets = torch.Tensor(targets).long().to(DEVICE)
        #output = self.model(x)
        sub_scores = scores.topk(K)[1] #top-k items
        sub_scores = sub_scores.cpu().detach().numpy()

        # test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
        # test_num += y.shape[0]

        targets_np = targets.cpu().numpy()
        # Calculate recall and MRR for each batch
        for score, target_np, mask in zip(sub_scores, targets_np, testloaderfull.mask):
          mrr_index = np.where(score == target_np - 1)[0]
          # total_mrr.append(0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1))
          total_mrr += 0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1)

        recall = calculate_recall_at_k(scores, targets, k=5)
        # total_recall.append(recall)
        total_recall += recall

        # y_prob.append(output.detach().cpu().numpy())
        # nc = self.num_classes
        # if self.num_classes == 2:
        #   nc += 1
        # lb = label_binarize(y.detach().cpu().numpy(), classes=np.arange(nc))
        # if self.num_classes == 2:
        #   lb = lb[:, :2]
        # y_true.append(lb)

    # y_prob = np.concatenate(y_prob, axis=0)
    # y_true = np.concatenate(y_true, axis=0)
    test_num = len(slices)
    # total_recall = np.mean(total_recall)
    # total_mrr = np.mean(total_mrr)

    return total_recall, total_mrr, test_num

  def train_metrics_personalized(self):
    trainloader = self.load_train_data()

    self.model.eval()

    train_num = 0
    losses = 0.0
    slices = trainloader.generate_batch(self.model.batch_size)
    train_num = len(slices)
    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model, i, trainloader)
        targets = torch.Tensor(targets).long().to(DEVICE)

        # output = self.model(x)
        loss = self.loss(scores, targets - 1)
        losses += loss.item()

    return losses, train_num

In [20]:
class clientCHAR(Client):
  def __init__(self, model, id, malicious, **kwargs):
    super().__init__(model, id, malicious, **kwargs)
    self.mu = mu
    self.model_per = copy.deepcopy(self.model)
    self.optimizer_per = PerturbedGradientDescent(self.model_per.parameters(), lr=self.learning_rate, mu=self.mu)
    self.learning_rate_scheduler_per = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=self.optimizer_per,
        gamma=learning_rate_decay_gamma
        )

  def dtrain(self):
    trainloader = self.load_train_data()
    model = copy.deepcopy(self.model)
    self.model.train()
    self.model_per.train()

    max_local_steps = self.local_steps
    slices = trainloader.generate_batch(model.batch_size)
    num_batches = len(slices)

    for step in range(max_local_steps):
      for i, j in zip(slices, np.arange(num_batches)):
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        print(j)
        targets_p, scores_p = forward(self.model_per, i, trainloader)
        targets_p = trans_to_cuda(torch.Tensor(targets_p).long())
        # out_p = self.model_per(x)
        loss = self.model_per.loss_function(scores_p, targets_p - 1)
        self.optimizer_per.zero_grad()
        loss.backward()
        print(f"loss: {loss}")
        for name, param in self.model_per.named_parameters():
          if param.grad is None:
            print(f"Parameter name: {name}")
            print(f"Parameter shape: {param.shape}")
            print(f"Gradient: {param.grad}")
            print("=" * 20)
        

        self.optimizer_per.step(model.parameters(), self.device)

        targets_g, scores_g = forward(self.model, i, trainloader)
        targets_g = torch.Tensor(targets_g).long().to(DEVICE)
        # out_g = self.model(x)
        loss = self.loss(scores_g, targets_g - 1)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    if self.learning_rate_decay:
      self.learning_rate_scheduler.step()
      self.learning_rate_scheduler_per.step()

  def test_metrics_personalized(self):
    testloaderfull = self.load_test_data()
    self.model_per.eval()

    total_recall = 0.0
    total_mrr = 0.0
    test_num = 0
    # y_prob = []
    # y_true = []
    slices = testloaderfull.generate_batch(self.model_per.batch_size)
    test_num = len(slices)

    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model_per, i, testloaderfull)
        targets = torch.Tensor(targets).long().to(DEVICE)
        # output = self.model_per(x)

        # test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
        # test_num += y.shape[0]

        # Select top-k items
        sub_scores = scores.topk(K)[1] #top-k items
        sub_scores = sub_scores.cpu().detach().numpy()  # Move to CPU if necessary

        # Calculate recall and MRR for each batch
        targets_np = targets.cpu().numpy()
        for score, target_np, mask in zip(sub_scores, targets_np, testloaderfull.mask):
          mrr_index = np.where(score == target_np - 1)[0]
          # total_mrr.append(0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1))
          total_mrr += 0 if len(mrr_index) == 0 else 1 / (mrr_index[0] + 1)

        recall = calculate_recall_at_k(scores, targets, k=K)
        # total_recall.append(recall)
        total_recall += recall

    #     y_prob.append(F.softmax(output).detach().cpu().numpy())
    #     y_true.append(label_binarize(y.detach().cpu().numpy(), classes=np.arange(self.num_classes)))

    # y_prob = np.concatenate(y_prob, axis=0)
    # y_true = np.concatenate(y_true, axis=0)
    # total_recall = np.mean(total_recall)
    # total_mrr = np.mean(total_mrr)

    return total_recall, total_mrr, test_num

  # need to investigate the loss calculation
  def train_metrics_personalized(self):
    trainloader = self.load_train_data()
    self.model_per.eval()

    train_num = 0
    losses = 0
    slices = trainloader.generate_batch(self.model_per.batch_size)
    train_num = len(slices)

    with torch.no_grad():
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model_per, i, trainloader)
        targets = torch.Tensor(targets).long().to(DEVICE)
        # output = self.model_per(x)
        loss = self.loss(scores, targets - 1)

        #add a regularization term to the loss
        # ensure that the personalized model doesn't deviate too far from the global model.
        # The strength of this regularization is controlled by the parameter self.mu
        gm = torch.cat([p.data.view(-1) for p in self.model.parameters()], dim=0)
        pm = torch.cat([p.data.view(-1) for p in self.model_per.parameters()], dim=0)
        loss += 0.5 * self.mu * torch.norm(pm-gm, p=2) #element-wise difference using L2 norm

        # losses += loss.item() * y.shape[0]
        losses += loss.item()

    return losses, train_num

  def get_update(self, global_model):
    trainloader = self.load_train_data()
    model = copy.deepcopy(self.model) #old model
    self.set_parameters(global_model)
    self.model.train()

    max_local_steps = self.local_steps
    slices = trainloader.generate_batch(self.model.batch_size)

    for step in range(max_local_steps):
      for i in slices:
        # if type(x) == type([]):
        #   x[0] = x[0].to(self.device)
        # else:
        #   x = x.to(self.device)
        targets, scores = forward(self.model, i, trainloader)
        targets = torch.Tensor(targets).long().to(DEVICE)
        # output = self.model(x)
        loss = self.loss(scores, targets - 1)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    model_update = [c_param.data - s_param.data for c_param, s_param in zip(self.model.parameters(), global_model.parameters())]
    self.set_parameters(model)
    return model_update

In [21]:
class Server(object):
  def __init__(self, model):
    # Set up the main attributes
    self.device = DEVICE
    self.dataset = dataset
    self.num_classes = output_size
    self.global_rounds = global_rounds
    self.local_steps = local_steps
    self.batch_size = opt.batchSize
    self.learning_rate = local_learning_rate
    self.global_model = copy.deepcopy(model)
    self.num_clients = num_clients
    self.join_ratio = join_ratio
    self.attack_ratio = attack_ratio
    self.attack_type = attack_type
    self.seed = seed
    self.algorithm = algorithm
    self.current_round = -1
    self.future_test = future_test
    self.future_ratio = future_ratio
    self.num_training_clients = num_clients - int(num_clients * future_ratio)
    self.join_clients = int(self.num_training_clients * self.join_ratio)
    self.finetune_rounds = finetune_rounds
    self.eval_gap = eval_gap
    self.detailed_info = detailed_info
    self.partition = partition
    self.data_path = data_path

    self.clients = []
    self.training_clients = []
    self.malicious_ids = []
    self.selected_clients = []

    self.uploaded_weights = []
    self.uploaded_ids = []
    self.uploaded_models = []
    self.uploaded_updates = []

    self.rs_test_recall_g = []
    self.rs_test_mrr_g = []
    self.rs_train_loss_g = []
    self.rs_test_recalls_g = []
    self.rs_test_mrrs_g = []
    self.rs_test_recall_p = []
    self.rs_test_mrr_p = []
    self.rs_train_loss_p = []
    self.rs_test_recalls_p = []
    self.rs_test_mrrs_p = []
    self.ft_train_loss = []
    self.ft_test_recall = []
    self.ft_std_recall = []
    self.ft_test_mrr = []
    self.ft_std_mrr = []

  def set_clients(self, model, clientObj):

    if self.future_test == False:
      if self.attack_type == 'B':
        self.malicious_ids = []
        self.attack_ratio = 0.0
      else:
        self.malicious_ids = np.sort(np.random.choice(np.arange(self.num_clients), int(self.num_clients * self.attack_ratio), replace=False))


      for i in range(self.num_clients):
        client = clientObj(model=model, id=i,
                        malicious=True if i in self.malicious_ids else False)
        self.clients.append(client)

      self.training_clients = self.clients
      self.training_clients_ids = np.arange(self.num_clients)

    else:
      if self.algorithm != 'FedCHAR_DC':
        print('{} do not support future testing'.format(self.algorithm))
        raise NotImplementedError

      self.training_clients_ids = np.sort(np.random.choice(np.arange(self.num_clients), self.num_training_clients, replace=False))

      if self.attack_type == 'B':
        self.malicious_ids = []
        self.attack_ratio = 0.0
      else:
        self.malicious_ids = np.sort(np.random.choice(self.training_clients_ids, int(self.num_training_clients * self.attack_ratio),
                                                      replace=False))

      for i in range(self.num_clients):
        client = clientObj(model=model, id=i,
                        malicious=True if i in self.malicious_ids else False)
        self.clients.append(client)

        if i in self.training_clients_ids:
          self.training_clients.append(client)

    print('Malicious Clients: {}'.format(list(self.malicious_ids)))
    print('Future Clients: {}'.format(list(np.sort(np.setdiff1d(np.arange(self.num_clients), self.training_clients_ids)))))

  def select_clients(self):
    selected_clients = list(np.random.choice(self.training_clients, self.join_clients, replace=False))
    return selected_clients

  def send_models(self):
    for client in self.selected_clients:
      client.set_parameters(self.global_model)

  def send_models_to_future_clients(self):
    for client in self.selected_clients:
      client.set_parameters(self.global_model)

  def receive_models(self):
    self.uploaded_ids = []
    self.uploaded_weights = [] #weight based on the fraction of client's data
    self.uploaded_models = []

    tot_samples = 0
    for client in self.selected_clients:
      tot_samples += client.train_samples
      self.uploaded_ids.append(client.id)
      self.uploaded_weights.append(client.train_samples)
      self.uploaded_models.append(client.model)

    for i, w in enumerate(self.uploaded_weights):
      self.uploaded_weights[i] = w / tot_samples

  def load_model(self):
    model_path = os.path.join(f"./models", self.dataset)
    model_path = os.path.join(model_path, self.algorithm + "_server" + ".pt")
    assert (os.path.exists(model_path))
    self.global_model = torch.load(model_path)

  def model_exists(self):
    model_path = os.path.join(f"./models", self.dataset)
    model_path = os.path.join(model_path, self.algorithm + "_server" + ".pt")
    return os.path.exists(model_path)

  def save_results(self):
    filename = "{}_{}_{}_{}_{}_bz{}_lr{}_gr{}_ep{}_jr{}_nc{}_fur{}_ntc{}_ftr{}_seed{}".format(self.dataset, self.partition, self.algorithm,
                                                                                        self.attack_type, self.attack_ratio, self.batch_size,
                                                                                        self.learning_rate, self.global_rounds, self.local_steps,
                                                                                        self.join_ratio, self.num_clients, self.future_ratio,
                                                                                        self.num_training_clients, self.finetune_rounds,
                                                                                        self.seed)

    if self.algorithm == 'FedCHAR':
      filename = filename + '_ir{}_ng{}_mtrc{}_lkg{}'.format(self.initial_rounds, self.n_clusters, self.metric, self.linkage)

    elif self.algorithm == 'FedCHAR_DC':
      filename = filename + '_ir{}_ng{}_mtrc{}_lkg{}_rr{}'.format(self.initial_rounds, self.n_clusters, self.metric, self.linkage,
                                                                  self.recluster_rounds)

    result_path = f"./results/npz/"
    if not os.path.exists(result_path):
      os.makedirs(result_path)

    if len(self.rs_test_acc_g) or len(self.rs_test_acc_p):
      file_path = result_path + "{}.npz".format(filename)
      print("Result path: " + file_path)

      np.savez(file_path, test_acc_g=self.rs_test_acc_g,
              test_acc_p=self.rs_test_acc_p, test_accs_g=self.rs_test_accs_g,
              test_accs_p=self.rs_test_accs_p, train_loss_g=self.rs_train_loss_g,
              train_loss_p=self.rs_train_loss_p, ft_train_loss=self.ft_train_loss,
              ft_test_acc=self.ft_test_acc, ft_std_acc=self.ft_std_acc)

  # did not implemented the modification
  def test_metrics_for_future_clients(self):
    num_samples = []
    tot_correct = []

    for c in self.selected_clients:
      ct, ns = c.new_test_metrics()
      tot_correct.append(ct*1.0)
      num_samples.append(ns)

    ids = [c.id for c in self.selected_clients]
    return ids, num_samples, tot_correct

  # did not implemented the modification
  def train_metrics_for_future_clients(self):
    num_samples = []
    losses = []
    for c in self.selected_clients:
      cl, ns = c.new_train_metrics()
      num_samples.append(ns)
      losses.append(cl*1.0)

    ids = [c.id for c in self.selected_clients]
    return ids, num_samples, losses

  def evaluate_personalized(self, rec=None, loss=None, mrr=None):
    stats = self.test_metrics_personalized()
    stats_train = self.train_metrics_personalized()

    if self.malicious_ids != []: # skip this for now
      relative_malicious_ids = np.array([stats[0].index(i) for i in self.malicious_ids])

      stats_A = np.array(stats)[:, relative_malicious_ids].tolist()
      stats_train_A = np.array(stats_train)[:, relative_malicious_ids].tolist()

      test_acc_A = sum(stats_A[2])*1.0 / sum(stats_A[1])
      train_loss_A = sum(stats_train_A[2])*1.0 / sum(stats_train_A[1])
      accs_A = [a / n for a, n in zip(stats_A[2], stats_A[1])]
      losses_A = [a / n for a, n in zip(stats_train_A[2], stats_train_A[1])]

    else:
      test_acc_A = -1
      train_loss_A = -1
      accs_A = []
      losses_A = []

    benign_ids = np.sort(np.setdiff1d(self.training_clients_ids, self.malicious_ids))
    relative_benign_ids = np.array([stats[0].index(i) for i in benign_ids])

    stats_B = np.array(stats)[:, relative_benign_ids].tolist()
    stats_train_B = np.array(stats_train)[:, relative_benign_ids].tolist()

    stats = None
    stats_train = None

    # test_acc = sum(stats_B[2])*1.0 / sum(stats_B[1])
    # train_loss = sum(stats_train_B[2])*1.0 / sum(stats_train_B[1])
    # accs = [a / n for a, n in zip(stats_B[2], stats_B[1])]
    # losses = [a / n for a, n in zip(stats_train_B[2], stats_train_B[1])]

    test_recall = sum(stats_B[2])*1.0 / sum(stats_B[1])
    test_mrr = sum(stats_B[3])*1.0 / sum(stats_B[1])
    train_loss = sum(stats_train_B[2])*1.0 / sum(stats_train_B[1])
    recalls = [a / n for a, n in zip(stats_B[2], stats_B[1])]
    mrrs = [a / n for a, n in zip(stats_B[3], stats_B[1])]
    losses = [a / n for a, n in zip(stats_train_B[2], stats_train_B[1])]

    if rec == None:
      self.rs_test_recall_p.append(test_recall)
    else:
      rec.append(test_recall)

    if mrr == None:
      self.rs_test_mrr_p.append(test_mrr)
    else:
      mrr.append(test_mrr)

    if loss == None:
      self.rs_train_loss_p.append(train_loss)
    else:
      loss.append(train_loss)

    self.rs_test_recall_p.append(recalls)
    self.rs_test_mrr_p.append(mrrs)

    print("Benign Averaged Train Loss: {:.2f}".format(train_loss))
    # print("Benign Averaged Test Accurancy: {:.2f}%".format(test_acc*100))
    # print("Benign Std Test Accurancy: {:.2f}%".format(np.std(accs)*100))
    print("Benign Averaged Test Recall: {:.2f}%".format(test_recall*100))
    print("Benign Std Test Recall: {:.2f}%".format(np.std(recalls)*100))
    print("Benign Averaged Test MRR: {:.2f}%".format(test_mrr*100))
    print("Benign Std Test MRR: {:.2f}%".format(np.std(mrrs)*100))

    if self.malicious_ids != []:
      print("Malicious Averaged Train Loss: {:.2f}".format(train_loss_A))
      print("Malicious Averaged Test Accurancy: {:.2f}%".format(test_acc_A*100))

  # did not implemented the modification
  def evaluate_for_future_clients(self):
    stats = self.test_metrics_for_future_clients()
    stats_train = self.train_metrics_for_future_clients()
    stats = np.array(stats).tolist()
    stats_train = np.array(stats_train).tolist()
    test_acc = sum(stats[2])*1.0 / sum(stats[1])
    train_loss = sum(stats_train[2])*1.0 / sum(stats_train[1])
    accs = [a / n for a, n in zip(stats[2], stats[1])]
    losses = [a / n for a, n in zip(stats_train[2], stats_train[1])]

    print("Averaged Future Train Loss: {:.2f}".format(train_loss))
    print("Averaged Future Test Accurancy: {:.2f}%".format(test_acc*100))
    print("Std Future Test Accurancy: {:.2f}%".format(np.std(accs)*100))

    if self.detailed_info:
      print('Future Clients Train Loss:\n', [(int(stats[0][idx]), format(loss, '.2f')) for idx, loss in enumerate(losses)])
      print('Future Clients Test Accuracy:\n', [(int(stats[0][idx]), format(acc*100, '.2f')+'%') for idx, acc in enumerate(accs)])

    self.ft_train_loss.append(train_loss)
    self.ft_test_acc.append(test_acc)
    self.ft_std_acc.append(np.std(accs))

  def test_metrics_personalized(self):
    num_samples = []
    tot_recall = []
    tot_mrr = []

    for c in self.training_clients:
      rc, mrr, ns = c.test_metrics_personalized()
      tot_recall.append(rc)
      tot_mrr.append(mrr)
      num_samples.append(ns)

    ids = [c.id for c in self.training_clients]
    return ids, num_samples, tot_recall, tot_mrr

  def train_metrics_personalized(self):
    num_samples = []
    losses = []
    for c in self.training_clients:
      cl, ns = c.train_metrics_personalized()
      num_samples.append(ns)
      losses.append(cl*1.0)

    ids = [c.id for c in self.training_clients]
    return ids, num_samples, losses

In [22]:
class FedCHAR(Server):
  def __init__(self, model):
    super().__init__(model)

    self.set_clients(model, clientCHAR)

    print(f"\nJoin ratio / total clients: {self.join_ratio} / {self.num_training_clients}")
    print("Finished creating server and clients.")

    self.initial_rounds = initial_rounds
    self.n_clusters = n_clusters
    self.metric = metric
    self.linkage = linkage

  def train(self):
    # initial Stage
    for i in range(self.initial_rounds):
      self.selected_clients = self.select_clients()
      self.send_models()

      for client in self.selected_clients:
        client.dtrain()

      if i%self.eval_gap == 0:
        print(f"\n-------------Round number: {i}-------------")
        print("\nEvaluate personalized models for training clients.")
        self.evaluate_personalized()

      self.receive_models()
      self.aggregate_parameters()

    # Clustering Stage
    print(f"\n-------------Clustering-------------")
    clients_updates = self.collect()
    self.cluster_identity = self.cluster(clients_updates)
    cluster_info = [[('Malicious' if self.training_clients[idx].malicious else 'Benign', idx) for idx, g_id in enumerate(self.cluster_identity) if g_id == i] for i in range(max(self.cluster_identity)+1)]
    for idx, info in enumerate(cluster_info):
      print('Cluster {}: {}'.format(idx, info))

    self.group_models = [copy.deepcopy(self.global_model)] * (max(self.cluster_identity) + 1)

    # Remaining Stage
    for i in range(self.global_rounds - self.initial_rounds):
      self.selected_clients = self.select_clients()
      self.send_models_g()

      for client in self.selected_clients:
        client.dtrain()

      if i%self.eval_gap == 0:
        print(f"\n-------------Round number: {i+self.initial_rounds}-------------")
        print("\nEvaluate personalized models for training clients.")
        self.evaluate_personalized()

      self.receive_models_g()
      self.aggregate_parameters_g()

    print("\nFinal Average Personalized Recall: {}\n".format(self.rs_test_recall_p[-1]))
    print(f"Average Recall for All Users: {np.mean(self.rs_test_recall_p[-1])}")
    print("\nFinal Average Personalized Recall: {}\n".format(self.rs_test_mrr_p[-1]))
    print(f"Average MRR for All Users: {np.mean(self.rs_test_mrr_p[-1])}")

  def receive_models(self):
    self.uploaded_ids = []
    self.uploaded_weights = []
    self.uploaded_updates = []

    tot_samples = 0
    for client in self.selected_clients:
      tot_samples += client.train_samples
      self.uploaded_ids.append(client.id)
      self.uploaded_weights.append(client.train_samples)
      self.uploaded_updates.append([c_param.data - s_param.data for c_param, s_param in zip(client.model.parameters(), self.global_model.parameters())])

    if self.attack_type != 'B' and self.attack_type != 'A1':
      malicious_ids = [idx for idx, c_id in enumerate(self.uploaded_ids) if c_id in self.malicious_ids]
      self.uploaded_updates = eval(self.attack_type)(self.uploaded_updates, malicious_ids)

    for i, w in enumerate(self.uploaded_weights):
      self.uploaded_weights[i] = w / tot_samples

  def add_parameters(self, w, client_update):
    for server_param, client_param in zip(self.global_update, client_update):
      server_param.data += client_param.data.clone() * w

  def aggregate_parameters(self):
    self.global_update = copy.deepcopy(self.uploaded_updates[0])
    for param in self.global_update:
      param.data.zero_()

    for w, client_update in zip(self.uploaded_weights, self.uploaded_updates):
      self.add_parameters(w, client_update)

    for model_param, update_param in zip(self.global_model.parameters(), self.global_update):
      model_param.data += update_param.data.clone()

  def collect(self):
    clients_updates = []
    for client in self.training_clients:
      clients_updates.append(client.get_update(self.global_model))

    if self.attack_type != 'B' and self.attack_type != 'A1':
      malicious_ids = [idx for idx, c_id in enumerate(self.training_clients_ids) if c_id in self.malicious_ids]
      clients_updates = eval(self.attack_type)(clients_updates, malicious_ids, len(self.selected_clients))

    clients_updates = [torch.cat([uu.reshape(-1, 1) for uu in u], axis=0).detach().cpu().numpy().squeeze() for u in clients_updates]
    return clients_updates

  def cluster(self, clients_updates):
    clustering = AgglomerativeClustering(n_clusters=self.n_clusters, metric=self.metric, linkage=self.linkage).fit(clients_updates)
    return clustering.labels_

  def send_models_g(self):
    for client in self.selected_clients:
      c_idx = list(self.training_clients_ids).index(client.id)
      client.set_parameters(self.group_models[self.cluster_identity[c_idx]])

  def receive_models_g(self):
    self.uploaded_ids = []
    self.uploaded_weights = []
    self.uploaded_updates = []

    for client in self.selected_clients:
      self.uploaded_ids.append(client.id)
      self.uploaded_weights.append(client.train_samples)
      c_idx = list(self.training_clients_ids).index(client.id)
      self.uploaded_updates.append([c_param.data - s_param.data for c_param, s_param in zip(client.model.parameters(), self.group_models[self.cluster_identity[c_idx]].parameters())])

    if self.attack_type != 'B' and self.attack_type != 'A1':
      malicious_ids = [idx for idx, c_id in enumerate(self.uploaded_ids) if c_id in self.malicious_ids]
      self.uploaded_updates = eval(self.attack_type)(self.uploaded_updates, malicious_ids)

  def aggregate_parameters_g(self):
    for i in range(len(self.group_models)):
      self.global_update = copy.deepcopy(self.uploaded_updates[0])
      for param in self.global_update:
        param.data.zero_()

      user_idx_in_same_group = np.array([r_id for r_id, c_id in enumerate(self.uploaded_ids) if self.cluster_identity[list(self.training_clients_ids).index(c_id)] == i])
      uploaded_weights = [self.uploaded_weights[u_id] for u_id in range(len(self.uploaded_weights)) if u_id in user_idx_in_same_group]
      uploaded_weights = [weight / sum(uploaded_weights) for weight in uploaded_weights]
      uploaded_updates = [self.uploaded_updates[u_id] for u_id in range(len(self.uploaded_updates)) if u_id in user_idx_in_same_group]

      for w, client_update in zip(uploaded_weights, uploaded_updates):
        self.add_parameters(w, client_update)

      for model_param, update_param in zip(self.group_models[i].parameters(), self.global_update):
        model_param.data += update_param.data.clone()

In [23]:
warnings.simplefilter("ignore")
print("Creating server and clients ...")
start = time.time()
# model = HARCNN(in_channels=3, num_classes=num_classes, dim=3008).to(device)
model = trans_to_cuda(SelfAttentionNetwork(opt, n_node))

print(model)

server = FedCHAR(model)
server.train()
# server.save_results()
print(f"\nTime cost: {round((time.time()-start)/60, 2)}min.")

Creating server and clients ...


SelfAttentionNetwork(
  (embedding): Embedding(889, 200)
  (transformerEncoderLayer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=200, out_features=200, bias=True)
    )
    (linear1): Linear(in_features=200, out_features=800, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=800, out_features=200, bias=True)
    (norm1): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformerEncoder): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=200, out_features=200, bias=True)
        )
        (linear1): Linear(in_features=200, out_features=800, bias=True)
    

AttributeError: 'NoneType' object has no attribute 'data'