In [None]:
# %%bash
# !pip3 install pandas
# !pip3 install gymnasium[classic_control]
# !pip3 install scikit-learn
# !pip3 install torchvision
# !pip3 install matplotlib

In [None]:
# Import DL and ML modules
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim
import csv
# from torch.utils.tensorboard import SummaryWriter
from torch.optim import Adam
from torch.utils.data import ConcatDataset, DataLoader, Dataset, Subset, random_split
from torchvision.datasets import MNIST

## RL librairies
# import gymnasium as gym
import gym
from gym import spaces

## Other librairies and modules
import math
from collections import OrderedDict, namedtuple, deque
import copy
from datetime import datetime
from itertools import count
from typing import List, Optional, Tuple
from enum import Enum
import random
import time

## For parallelism and concurrency
import threading
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor

import matplotlib
import matplotlib.pyplot as plt


torch.autograd.set_detect_anomaly(True)
Transition = namedtuple('Transition',('state', 'action', 'done', 'next_state', 'reward'))

In [None]:
import logging
import gc
import os
import shutil
import sys
import warnings

logger = logging.getLogger('ddpg')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

def custom_exception_handler(exctype, value, traceback):
    print(f"Exception Type: {exctype}")
    print(f"Exception Value: {value}")
    print("Exception Traceback:")
    traceback.print_tb(traceback)

# Set the custom exception handler for all threads
sys.excepthook = custom_exception_handler
warnings.simplefilter("ignore")

In [None]:
# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

In [None]:
# Set device to GPU or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

In [None]:
# Create a folder for models weights
MODELS_PATH = 'models_weights'

shutil.rmtree(MODELS_PATH, ignore_errors=True)
os.makedirs(MODELS_PATH, exist_ok=True)

### Helper Functions

In [None]:
def generate_clients(num_clients=10):
  # Define discrete values for each characteristic
  cpu_cycles_values = np.random.randint(8, 12, size=num_clients) * 1e6  # Estimation of CPU cycles to process one image 28x28 with the proposed model architecture
  frequency_values = np.random.choice([100e6, 200e6, 500e6, 1000e6], size=num_clients)  # Discrete frequency values (Hz)
  x_coordinates = np.random.randint(0, 1001, size=num_clients)
  y_coordinates = np.random.randint(0, 1001, size=num_clients)
  z_coordinates = np.random.randint(0, 1001, size=num_clients)



def modify_noise_model_weights(noise_model, noise_strength=0.1):
  for param in noise_model.parameters():
    param.data += noise_strength * torch.randn_like(param.data)
  return noise_model



def delete_models(folder_path):
  global MODEL_POOL
  # Verify that the provided path is a directory
  if os.path.isdir(folder_path):
    # Iterate over the files in the directory and delete them
    for filename in os.listdir(folder_path):
      # print(f'Deleting file {filename}...')
      if int(os.path.splitext(filename)[0]) < 3/4 * len(MODEL_POOL):
        file_path = os.path.join(folder_path, filename)
        try:
          if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
          elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
        except Exception as e:
          print(f"Error while deleting {file_path}: {e}")
    print(f"Old model weights have been deleted.")
  else:
    print(f"The provided path {folder_path} is not a directory.")

### Federated Learning Functions

In [None]:
def load_datasets(num_clients = 10, inter_iid = True, intra_iid = True, val_ratio = 0.1, batch_size = 32, seed = 42):
  """Creates the dataloaders for all clients.
  Parameters
  ----------
  num_clients : int
      Number of clients in the network.
  inter_iid : bool
      Whether data is IID between clients.
  intra_iid : bool
      Whether data is IID between classes.
  val_ratio : float
      Ratio of validation samples.
  batch_size : int
      Size of a learning batch.

  Returns
  -------
  trainloaders : List[Dataloader]
      Training dataloader for all clients.
  valloaders : List[Dataloader]
      Validation dataloader for all clients.
  testloader : Dataloader
      Test dataloader to test the global model.
  """

  datasets, testset = _partition_data(num_clients, inter_iid, intra_iid, seed)
  # Split each partition into train/val and create DataLoader
  trainloaders = []
  valloaders = []
  for dataset in datasets:
    len_val = int(len(dataset) / (1 / val_ratio))
    lengths = [len(dataset) - len_val, len_val]
    ds_train, ds_val = random_split(
        dataset, lengths, torch.Generator().manual_seed(seed)
    )
    trainloaders.append(DataLoader(ds_train, batch_size=batch_size, shuffle=True))
    valloaders.append(DataLoader(ds_val, batch_size=batch_size))
  testloader = DataLoader(testset, batch_size=batch_size)
  return trainloaders, valloaders, testloader


def _download_data():
  """Downloads the MNIST dataset.
  Returns
  -------
  trainset : Dataset
      Dataset for training.
  testset : Dataset
      Dataset for testing.
  """
  transform = transforms.Compose(
      [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
  )
  trainset = MNIST("./dataset", train=True, download=True, transform=transform)
  testset = MNIST("./dataset", train=False, download=True, transform=transform)
  return trainset, testset


def _partition_data(num_clients=10, inter_iid=True, intra_iid=True, seed=42):
  """Split training set into IID or non-IID partitions to simulate different FL setting.
  Parameters
  ----------
  num_clients : int
      Number of clients in the network.
  inter_iid : bool
      Whether data is IID between clients.
  intra_iid : bool
      Whether data is IID between classes.

  Returns
  -------
  datasets : List[Subset]
      List of partioned data for all clients.
  testset : Dataset
      Testing dataset.
  """
  trainset, testset = _download_data()
  partition_size = int(len(trainset) / num_clients)
  lengths = [partition_size] * num_clients
  if inter_iid:
      datasets = random_split(trainset, lengths, torch.Generator().manual_seed(seed))
  else:
      if intra_iid:
          trainset = _balance_classes(trainset, seed)
          partition_size = int(len(trainset) / num_clients)
      shard_size = int(partition_size / 2)
      idxs = trainset.targets.argsort()
      sorted_data = Subset(trainset, idxs)
      tmp = []
      for idx in range(num_clients * 2):
          tmp.append(
              Subset(sorted_data, np.arange(shard_size * idx, shard_size * (idx + 1)))
          )
      idxs_list = torch.randperm(
          num_clients * 2, generator=torch.Generator().manual_seed(seed)
      )
      datasets = [
          ConcatDataset((tmp[idxs_list[2 * i]], tmp[idxs_list[2 * i + 1]]))
          for i in range(num_clients)
      ]

  return datasets, testset


def _balance_classes(trainset, seed=42):
  """Balance the classes of the trainset.
  Parameters
  ----------
  trainset : Dataset
      Training dataset.
  seed : int
      To initialize the random generator.

  Returns
  -------
  shuffled : Subset
      Balanced dataset.
  """
  class_counts = np.bincount(trainset.targets)
  smallest = np.min(class_counts)
  idxs = trainset.targets.argsort()
  tmp = [Subset(trainset, idxs[: int(smallest)])]
  tmp_targets = [trainset.targets[idxs[: int(smallest)]]]
  for count in class_counts:
      tmp.append(Subset(trainset, idxs[int(count) : int(count + smallest)]))
      tmp_targets.append(trainset.targets[idxs[int(count) : int(count + smallest)]])
  unshuffled = ConcatDataset(tmp)
  unshuffled_targets = torch.cat(tmp_targets)
  shuffled_idxs = torch.randperm(
      len(unshuffled), generator=torch.Generator().manual_seed(seed)
  )
  shuffled = Subset(unshuffled, shuffled_idxs)
  shuffled.targets = unshuffled_targets[shuffled_idxs]

  return shuffled


### CNN Classifier

In [None]:
# Deep Network Class

class Net(nn.Module):
  """Convolutional Neural Network architecture as described in McMahan 2017 paper :
  [Communication-Efficient Learning of Deep Networks from
  Decentralized Data] (https://arxiv.org/pdf/1602.05629.pdf)
  """
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 32, 5, padding = 1)
    self.conv2 = nn.Conv2d(32, 64, 5, padding = 1)
    self.pool = nn.MaxPool2d(kernel_size = (2, 2), padding = 1)
    self.fc1 = nn.Linear(64 * 7 * 7, 512)
    self.fc2 = nn.Linear(512, 10)

  def forward(self, input_tensor):
    """Forward pass of the CNN.
    Parameters
    ----------
    x : torch.Tensor
        Input Tensor that will pass through the network

    Returns
    -------
    torch.Tensor
        The resulting Tensor after it has passed through the network
    """
    output_tensor = F.relu(self.conv1(input_tensor))
    output_tensor = self.pool(output_tensor)
    output_tensor = F.relu(self.conv2(output_tensor))
    output_tensor = self.pool(output_tensor)
    output_tensor = nn.Flatten()(output_tensor)
    output_tensor = F.relu(self.fc1(output_tensor))
    output_tensor = self.fc2(output_tensor)
    return output_tensor


def train(net, trainloader, device=device, epochs=5, learning_rate=0.01, display=False):
  """Train the network on the training set.
  Parameters
  ----------
  net : nn.Module
      The neural network to train.
  trainloader : DataLoader
      The DataLoader containing the data to train the network on.
  device : torch.device
      The device on which the model should be trained, either 'cpu' or 'cuda'.
  epochs : int
      The number of epochs the model should be trained for.
  learning_rate : float
      The learning rate for the SGD optimizer.

  Returns
  -------
  Tuple[list, list]
      The training loss and the accuracy of the input model on the train data.

  """
  # Performance metrics initialization
  train_loss, train_acc = [] , []
  # Hyperparameters
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
  net.train()

  for i in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    for images, labels in trainloader:
      images, labels = images.to(device), labels.to(device)
      # Clear the gradients
      optimizer.zero_grad()
      # Forward pass - compute outputs on input data using the model
      outputs = net(images)
      loss = criterion(outputs, labels)
      # Backward pass
      loss.backward()
      optimizer.step()

    # Evaluate the training
    net.eval()  # Set the model to evaluation mode
    with torch.no_grad():
      total_correct = 0
      total_samples = 0
      for images, labels in trainloader:
          images, labels = images.to(device), labels.to(device)

          outputs = net(images)
          _, predicted = torch.max(outputs, 1)
          total_samples += labels.size(0)
          total_correct += (predicted == labels).sum().item()

      accuracy = total_correct / total_samples
      train_acc.append(accuracy)
      train_loss.append(loss.item())
    net.train()
  return train_loss, train_acc



def test(net, testloader, device=device):
  """Evaluate the network on the entire test set.
  Parameters
  ----------
  net : nn.Module
      The neural network to test.
  testloader : DataLoader
      The DataLoader containing the data to test the network on.
  device : torch.device
      The device on which the model should be tested, either 'cpu' or 'cuda'.

  Returns
  -------
  Tuple[float, float]
      The loss and the accuracy of the input model on the given data.
  """
  criterion = torch.nn.CrossEntropyLoss()
  correct, total, loss = 0, 0, 0.0
  net.eval()
  with torch.no_grad():
      for images, labels in testloader:
          images, labels = images.to(device), labels.to(device)
          outputs = net(images)
          loss += criterion(outputs, labels).item()
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
  if len(testloader.dataset) == 0:
      raise ValueError("Testloader can't be 0, exiting...")
  loss /= len(testloader.dataset)
  accuracy = correct / total
  return loss, accuracy



def get_model_size(model):
  """Get the neural network model weights size in bytes.
  Parameters
  ----------
  model : nn.Module
      The neural network.

  Returns
  -------
  int
      Model weights size in bytes.
  """
  total_size = 0
  for param in model.parameters():
    total_size += np.prod(param.size())
  return total_size * 4  # Assuming 4 bytes for each float32 value


### Computation, Communication and Reputation Models

In [None]:

class MSG_TYPE(Enum):
  '''Exchanged messages types
  '''
  OFF_REQ = 1 # Offloading request
  BID = 2     # Auction bid
  OFF_REP = 3 # Winner selection in the auction


# Blockchain
def record_to_blockchain(new_record):
  global BC
  global BC_LOCK
  with BC_LOCK:

    BC = pd.concat([BC, pd.DataFrame({
            'round' : [new_record[0]],
            'owner' : [new_record[1]],
            'executer' : [new_record[2]],
            'task_type' : [new_record[3]],
            'processing_time' : [new_record[4]],
            'communication_time' : [new_record[5]],
            'processing_energy' : [new_record[6]],
            'communication_energy' : [new_record[7]]
    })], ignore_index=True)



def euclidean_distance(coordinates_a, coordinates_b):
  x1, y1, z1 = coordinates_a[0], coordinates_a[1], coordinates_a[2]
  x2, y2, z2 = coordinates_b[0], coordinates_b[1], coordinates_b[2]
  return np.sqrt((x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2)


# Communication model
def channel_gain():
  """Calculates the channel gain between two clients
  """
  # Calculate channel gain using distance-dependent path-loss model (refer to reference below)
  # return 140.7 + 36.7 * np.log10(distance)
  # return 1/ distance**2
  # Uniformly distributed between [-90, -95] refer to below references (in dB)
  return np.random.uniform(-95, -90)

def transmission_rate(power, coordinates_a, coordinates_b):
  """Calculates the transmission rate from a sender to a receiver
  Parameters
  ----------
  power: float
      Transmission power in (mWatt).
  h: float
      Channel gain in (dB).
  Returns
  -------
  trans_rate: float
      Transmission rate (bits/s)
  """
  global B_w         # Bandwith in (Hz)
  global N_0         # Spectral density noise in (dBm/Hz)

  # distance = euclidean_distance(coordinates_a, coordinates_b)
  # channel_gain = np.random.uniform(1, 10, 1)[0]     # Generate the channel gain randomely
  # channel_gain = np.random.choice([0.0001, 0.005, 0.001, 0.05, 0.01, 0.5, 0.1])
  h = channel_gain()
  argument = (power * 10**(h/10)) / (10**(N_0/10) * B_w)
  trans_rate = B_w * np.log2(1 + argument)
  return trans_rate


def communication_time(data_size, power, coordinates_a, coordinates_b):
  """Calculates the communication time
  Parameters
  ----------
  data_size: int
      Size of transmitted data in bytes.
  Returns
  -------
  The transmission time between a and b (Second).
  """
  return data_size*8 / transmission_rate(power, coordinates_a, coordinates_b)


def communication_energy(power, comm_time):
  """Calculates the communication energy.
  Parameters
  ----------
  power: float
      Transmit power in (mWatt).
  comm_time: float
      Communication (transmission) time in (Second).
  Returns
  -------
  The transmit energy in (Joule).
  """
  return power * comm_time / 1000  # Divide by 1000 to convert to Joule


# Computation model
def training_time(data_size, cpu_cycles, cpu_frequency, epochs):
  """Calculates the time spent by a client to perform a training task.
  Parameters
  ----------
  data_size: int
      Trained data size in (Bytes).
  cpu_cycles: float
      Number of CPU cycles to process one unit of data (image of 28*28 Bytes).
  cpu_frequency: float
      CPU frequency in (Hz).
  epochs: int
      Number of local training epochs.

  Returns
  -------
  Local training time (Second).
  """
  return data_size * cpu_cycles * epochs / cpu_frequency


def validation_time(data_size, cpu_cycles, cpu_frequency):
  """Calculates the time spent by a client to perform a validation task.
  Parameters
  ----------
  data_size: int
      Size of used dataset for validation.
  cpu_cycles: int
      Number of CPU cycles required to process one unit of data (image of 28*28 Bytes).
  cpu_frequency: float
      CPU frequency in (Hz).
  Returns
  -------
  Validation time (Second).
  """
  return data_size * cpu_cycles / cpu_frequency


def aggregation_time(M, weights_size, cpu_cycles, cpu_frequency):
  """Calculates the time spent by a client to perform an aggeragtion task.
  Parameters
  ----------
  M: int
      Number of aggregated models.
  weights_size: int
      Size of model weights in (Bytes).
  cpu_frequency: float
      CPU frequency in (Hz).
  Returns
  -------
  Aggregation time.
  """

  # For simplification, we consider the cpu_cycles required are proportional to cpu_cycles attribute (dedicated for training tasks)
  cycles = cpu_cycles // 1e5
  return (M - 1) * weights_size * cycles / cpu_frequency


def computation_energy(capacitance, frequency, cpt_time):
  """Calculates the energy consumed to perform a computation task.
  Parameters
  ----------
  capacitance: float
      Client capacitance factor.
  frequency: float
      CPU frequency in (Hz)
  cpt_time: float
      Processing time in (Second)
  Returns
  -------
  The processing energy (Joule).
  """
  return capacitance * frequency**3 * cpt_time



# Reputation model
def update_data_quality(w_old, w_new, w_i, mu_old, lambd=0.25):
  ''' Updates the client's data quality
  Parameters
  ----------
  w_old: OrderedDict
      Old global model weigths (previous round).
  w_new: OrderedDict
      New global model weigths (current round).
  w_i: OrderedDict
      Local model weights (current round).
  mu_old: float
      Old value of data quality.
  lambd: float
      Quality data score discount factor.

  Returns
  -------
  mu_new: float
      New data quality score.

  '''
  # flat_w_old = torch.cat([p.view(-1) for p in w_old.values()])
  flat_w_new = torch.cat([p.view(-1) for p in w_new.values()])
  flat_w_i = torch.cat([p.view(-1) for p in w_i.values()])

  # numerator = torch.norm(flat_w_old - flat_w_i, p=2)
  # denominator = torch.norm(flat_w_old - flat_w_new, p=2)
  distance = torch.norm(flat_w_new - flat_w_i, p=2)

  # mu_new = (1 - lambd) * (numerator / denominator) + (lambd * mu_old)
  mu_new = (1 - lambd) / distance + (lambd * mu_old)
  return float(mu_new)


def update_resource_contribution(task_dict_list, eta_old, lambd=0.25, scaling_factor=1e4):
  ''' Updates the client's resource contribution.
  Parameters
  ----------
  task_dict_list: List[Dict]
      List of tasks performed by a given client.
  eta_old: float
      Old value of resource contribution.
  lambd: float
      Resource contribution score discount factor.
  scaling_factor: float
      Factor to rescale the numerator of performance index.

  Returns
  -------
  eta_new: float
      New value of resource contribution.
  '''
  c_i = 0
  overhead = 0
  if not task_dict_list == None:
    for task_dict in task_dict_list:
      c_i += task_dict['task_count'] * scaling_factor / (task_dict['processing_time'] * task_dict['processing_energy'] + task_dict['communication_time'] * task_dict['communication_energy'])
    eta_new = (1 - lambd) * c_i + lambd * eta_old
  else:
    eta_new = lambd * eta_old
    c_i = 1e-10
  return eta_new, c_i


def update_offload_history(old_phi, lambd=0.9):
  ''' Updates the client's offload history score
  Parameters
  ----------
  old_phi: float
      Old value of offloading history score.
  lambd: float
      Offload history score discount factor.

  Returns
  -------
  new_phi: float
      New offload history score.

  '''
  # new_phi = 1 + lambd * old_phi
  new_phi = lambd * old_phi + 1e-10
  return new_phi


def update_validation_score(new_nu, old_nu, lambd=0.25):
  return (1-lambd) * new_nu + lambd * old_nu




def get_reputation_score(mu, eta, phi, nu, a_1=0.27, a_2=0.27, a_3=0.2, a_4=0.26):
  ''' Calculates reputation score given components of reputation vector.
  Parameters
  ----------
  mu: float
      Data quality score.
  eta: float
      Resource contribution score.
  phi: float
      Offloading history score.
  nu: float
      Validation score.
  a_1, a_2, a_3, a_4: float
      Weighting factors.
  Returns
  -------
  Reputation score.

  '''
  sub_scores = [a_1 * mu, a_2 * eta, a_3 * phi, a_4 * nu]
  return float(sum(sub_scores)), sub_scores

# def update_gratefulness()



def read_reputation(client_id, as_dict=False):
  ''' Extracts reputation vector corresponsing to one client.
  Parameters
  ----------
  client_id: int
      Client ID.
  as_dict: bool
      Whether reputation vector is returned as a dict or a list.

  Returns
  ------
  reputation vector: List/ Dict
      Client's reputation vector.
  '''
  global REPUTATION_DATA
  global CURRENT_ROUND
  if as_dict:
    reputation_vector = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND - 1) & (REPUTATION_DATA['client_id'] == client_id)].to_dict(orient='records')[0]
  else:
    reputation_vector = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND - 1) & (REPUTATION_DATA['client_id'] == client_id)][['data_quality_norm', 'resource_contribution_norm', 'offloading_history', 'validation_score_norm']].values[0]
  return reputation_vector

### Client Class

In [None]:
# Client class
class Client(threading.Thread):
  '''Federated learning client
  '''

  def __init__(self, id, coordinates, trainloader, valloader, epochs, learning_rate, capacitance, cpu_cycles, power, frequency):
    self.id = id
    self.wallet = 0
    # Coordinates
    self.coordinates = coordinates
    # DL attributes
    self.net = Net().to(device=device)
    self.trainloader = trainloader
    self.valloader = valloader
    self.epochs = epochs
    self.learning_rate = learning_rate
    # Hardware specifications
    self.capacitance = capacitance
    self.cpu_cycles = cpu_cycles
    self.power = power
    self.frequency = frequency
    # DDPG agent
    global DRL_DISCOUNT_FACTOR, DRL_TAU, hidden_size, n_observations, env
    self.ddpg_agent = DDPG(
              DRL_DISCOUNT_FACTOR,
              DRL_TAU,
              hidden_size,
              n_observations,
              env.action_space,
              checkpoint_dir='./saved_models/BiddingEnv-v0'
              )
    self.replay_memory = ReplayMemory()
    self.ddpg_step = 0
    # Reset entity's flags
    self.reset()


  def reset(self):
    '''
    Reset flags at the begining of each round
    Parameters
    ----------
    -
    Returns
    -------
    -
    '''
    # Training metrics
    self.train_acc = [0]
    self.train_loss = [0]

    self.val_acc = 0
    self.val_loss = 0
    # State flags
    self.is_available = True                    # Flag for availability : A node can be active but not available for a given task
    self.is_active = True
    self.completed_train = False                # Flag for tracking local training task
    self.offload = False
    # Mutex variables
    self.notification_mutex = threading.Lock()  # To protect notification
    self.buffer_mutex = threading.Lock()        # To lock client's buffer
    # Critical resources
    self.notification = threading.Event()       # To receive notifications
    self.buffer = []                            # Client's buffer
    # DDPG flag
    self.start_ddpg = None
    self.finish_ddpg = None



  def run(self):
    '''
    Run client thread
    Parameters
    ----------
    -
    Returns
    -------
    -
    '''
    global END_ROUND_LOCK                             # Protect END_ROUND flag
    global END_ROUND                                  # END_ROUND flag
    global END_TASK                                   # END_TASK flag
    global selected_clients
    global MODEL_POOL_LOCK
    global MODEL_POOL
    global NUM_CLIENTS
    global MODELS_PATH
    global DRL_BATCH_SIZE

    eliminated = False
    while not END_TASK and not eliminated:
      time.sleep(1)
      while not END_ROUND and self.is_active:         # Do during a communication round
        if self not in selected_clients:
          eliminated = True
        if eliminated:
          break
        if CURRENT_ROUND > 1:
          self.update_ddpg()
          self.aggregate_ddpg()
        self.reset()                                  # Reset local flags
        # Retrieve global model (the one with max level values from the last round)
        model_id = MODEL_POOL[MODEL_POOL['round'] == CURRENT_ROUND-1]['level'].idxmax()

        self.net.load_state_dict(torch.load(f'{MODELS_PATH}/{model_id}.pth'))
        self.fit()                                    # Perform local training task (main task)
        time.sleep(2)
        i = 0
        while self.is_available:                      # Client is available for new tasks
          # Check for offloading requests
          with self.buffer_mutex:
            if len(self.buffer) != 0:                 # Read message if any
              self.is_available = False
              self.process_message(self.buffer.pop(0))     # Process received message
              # self.buffer = []
          # Check newly submitted models
          # For validation
          with MODEL_POOL_LOCK:
            round_condition = MODEL_POOL['round'] == CURRENT_ROUND
            id_condition    = (MODEL_POOL['trainer'] != self.id) & (MODEL_POOL['owner'] != self.id)
            lock_condition  = MODEL_POOL['lock'] == False
            # Check if local model (for simplicity only local models are validated) and not validated yet
            validated_condition = (MODEL_POOL['validators'].apply(lambda x: len(x) < 3)) & (MODEL_POOL['aggregators'].apply(lambda x: len(x) == 0))
            validator_condition = MODEL_POOL['validators'].apply(lambda x: not self.id in x)
            result_df = MODEL_POOL[round_condition & id_condition & lock_condition & validated_condition & validator_condition]
            if not result_df.empty:
              model_id = int(result_df.index[0])
              MODEL_POOL.at[model_id, 'lock'] = True
            # time.sleep(5)
          # Load model's weights
          if not result_df.empty:
            time.sleep(random.uniform(5, 20))
            weights = torch.load(f'{MODELS_PATH}/{model_id}.pth')
            # Evaluate model
            self.is_available = False
            owner_id = int(MODEL_POOL.at[model_id, 'owner'])
            validation_score = self.evaluate(owner_id, weights)
            print(f'  Client {self.id:>{3}}: Model {model_id:>{5}} validated')
            with MODEL_POOL_LOCK:
              MODEL_POOL.at[model_id, 'lock'] = False
              MODEL_POOL.at[model_id, 'validation_score'] = MODEL_POOL.at[model_id, 'validation_score'] + 1/3 * validation_score
              MODEL_POOL.at[model_id, 'validators'].append(self.id)
              # print(f'  Client {self.id}: Releasing MODEL_POOL_LOCK..')

          # For aggregation
          with MODEL_POOL_LOCK:
            round_condition = (MODEL_POOL['round'] == CURRENT_ROUND)
            lock_condition = (MODEL_POOL['lock'] == False)
            # validated_condition = (MODEL_POOL['validation_score'] == 3) | (MODEL_POOL['level'] > 1)
            validated_condition = (MODEL_POOL['level'] > 1) | (MODEL_POOL['validators'].apply(lambda x: len(x) == 3))
            aggregated_condition = (MODEL_POOL['aggregated'] == False)
            level_condition = MODEL_POOL['level'] < NUM_CLIENTS
            # Check if local model (for simplicity only local models are validated) and not validated yet
            result_df = MODEL_POOL[round_condition & lock_condition & validated_condition & aggregated_condition & level_condition]
            if not result_df.empty and len(result_df) > 1:
              idx = result_df.index.to_list()
              for model_id in idx:
                MODEL_POOL.at[model_id, 'lock'] = True
              time.sleep(5)
          if not result_df.empty and len(result_df) > 1:
            self.is_available = False
            self.aggregate(idx)

          # Set is_available flag to True again
          self.is_available = True

          if not END_ROUND:                             # If everyone has completed their learning tasks
            if all([client.completed_train for client in selected_clients]):
              if len(MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['aggregated'] == False)]) == 1:
                # Set END ROUND
                with END_ROUND_LOCK:
                  if not END_ROUND:
                    # Rescale global model if any missing local models
                    round_models = MODEL_POOL[MODEL_POOL['round'] == CURRENT_ROUND]
                    missing_clients = len(set(selected_clients) - set(round_models['owner']))
                    global_model_id = MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['aggregated'] == False)].index[0]
                    weights = torch.load(f'{MODELS_PATH}/{global_model_id}.pth')
                    weights = self.scale_model_weights(weights, len(selected_clients)/missing_clients)
                    torch.save(weights, f'{MODELS_PATH}/{global_model_id}.pth')
                    time.sleep(2)
                    # Set flag
                    print(f'  Client {self.id:>{3}}: Setting END_ROUND flag')
                    END_ROUND = True
          if END_ROUND:
            break
        if END_ROUND:
          break
      if END_ROUND:
        break
    if eliminated:
      print(f'    Client {self.id}: Client eliminated...')



  def update_ddpg(self):
    global env, CURRENT_ROUND, BID_POOL, ENV_LOCK
    global STEPS_PER_EP, METRICS_LOCK, ACTOR_LOSS, CRITIC_LOSS

    condition = (BID_POOL['round'] == CURRENT_ROUND-1) & (BID_POOL['agent_id'] == self.id)
    bids = BID_POOL.loc[condition]
    i = -1
    if not bids.empty:
      for index, r in bids.iterrows():
        i += 1
        row = r.to_dict()
        self.ddpg_step = self.ddpg_step % STEPS_PER_EP
        # if first record and step != 0
        if (i == 0) and (self.ddpg_step != 0) and (CURRENT_ROUND > 2):
          prev_round = CURRENT_ROUND - 2
          while (BID_POOL.loc[(BID_POOL['round'] == prev_round) & (BID_POOL['agent_id'] == self.id)].empty) and (prev_round >= 1):
            prev_round -= 1
          if not BID_POOL.loc[(BID_POOL['round'] == prev_round) & (BID_POOL['agent_id'] == self.id)].empty:
            prev_bids = BID_POOL.loc[(BID_POOL['round'] == prev_round) & (BID_POOL['agent_id'] == self.id)]
            prev_obs = prev_bids.iloc[len(prev_bids)-1].to_dict()
            with ENV_LOCK:
              env.set_obs(prev_obs['agent_id'], prev_obs['composed_reputation'], prev_obs['composed_reputation'], prev_obs['c_idx'], prev_obs['payment'])
              observation = env.get_obs(prev_obs['agent_id'])
            observation = torch.Tensor(observation).to(device)
            _, reward, _, info = env.step_(prev_obs['agent_id'], prev_obs['winner'], prev_obs['composed_reputation'], prev_obs['composed_reputation'], prev_obs['c_idx'], prev_obs['bid'], prev_obs['fair_payment'])
            reward = torch.Tensor([reward]).to(device)
            action = prev_obs['bid']
            action = torch.Tensor([float(action)]).to(device)
            done = True if self.ddpg_step == STEPS_PER_EP - 1 else False
            mask = torch.Tensor([done]).to(device)
            with ENV_LOCK:
              env.set_obs(row['agent_id'], row['composed_reputation'], row['composed_reputation'], row['c_idx'], row['payment'])
              next_observation = env.get_obs(row['agent_id'])
            next_observation = torch.Tensor(next_observation).to(device)
            self.replay_memory.push(observation, action, mask, next_observation, reward)
            self.ddpg_step += 1
            self.ddpg_step = self.ddpg_step % STEPS_PER_EP

        # register current row to replay memory
        with ENV_LOCK:
          env.set_obs(row['agent_id'], row['composed_reputation'], row['composed_reputation'], row['c_idx'], row['payment'])
          observation = env.get_obs(row['agent_id'])
        observation = torch.Tensor(observation).to(device)
        # print(f' Client {self.id:>{3}}: {row["winner"]} {row["c_idx"]} {row["bid"]} {row["fair_payment"]}')
        _, reward, _, info = env.step_(row['agent_id'], row['winner'], row['composed_reputation'], row['composed_reputation'], row['c_idx'], row['bid'], row['fair_payment'])
        reward = torch.Tensor([reward]).to(device)
        action = row['bid']
        action = torch.Tensor([float(action)]).to(device)

        if self.ddpg_step == STEPS_PER_EP - 1:
          next_observation = observation
          done = True
          mask = torch.Tensor([done]).to(device)
          next_observation = torch.Tensor(next_observation).to(device)
          self.replay_memory.push(observation, action, mask, next_observation, reward)
          self.ddpg_step += 1
          self.ddpg_step = self.ddpg_step % STEPS_PER_EP
        elif (i < len(bids) - 1):
          next_row = bids.iloc[i + 1].to_dict()
          with ENV_LOCK:
            env.set_obs(next_row['agent_id'], next_row['composed_reputation'], next_row['composed_reputation'], next_row['c_idx'], next_row['payment'])
            next_observation = env.get_obs(row['agent_id'])
          done = False
          mask = torch.Tensor([done]).to(device)
          next_observation = torch.Tensor(next_observation).to(device)
          self.replay_memory.push(observation, action, mask, next_observation, reward)
          self.ddpg_step += 1
          self.ddpg_step = self.ddpg_step % STEPS_PER_EP
        else:
          pass
    # print(f'  Client {self.id:>{3}}: Replay Memory : {self.replay_memory.memory}...')
    if len(self.replay_memory) > STEPS_PER_EP:
      self.start_ddpg = True
      transitions = self.replay_memory.get_last_episode(STEPS_PER_EP)
      batch = Transition(*zip(*transitions))
      print(f'Client {self.id} Batch: {batch}')
      p_loss = self.ddpg_agent.update_params(batch, agent_id=self.id)
      with METRICS_LOCK:
        ACTOR_LOSS[CURRENT_ROUND-1].append(p_loss)
      self.finish_ddpg = True
    else:
      self.start_ddpg = False


  def aggregate_ddpg(self):
    global selected_clients, AGENT_LOCK, DDPG_AGENT, DDPG_AGGREGATED, DDPG_CHECK
    time.sleep(random.uniform(1, 5))
    with AGENT_LOCK:
      if DDPG_CHECK == False:
        DDPG_CHECK = True
        while any([client.start_ddpg is None for client in selected_clients]):
          time.sleep(1)
        while any([(client.start_ddpg) and (not client.finish_ddpg) for client in selected_clients]):
          time.sleep(1)

        trained_agents = [client.ddpg_agent for client in selected_clients if (client.start_ddpg and client.finish_ddpg)]
        if(len(trained_agents) > 1):
          num_agents = len(trained_agents)

          policy_network_list = [self.scale_model_weights(agent.policy_network.state_dict(), 1/num_agents) for agent in trained_agents]
          # Aggregate scaled
          global_policy = self.fedavg(policy_network_list)

          DDPG_AGENT.policy_network.load_state_dict(copy.deepcopy(global_policy), strict=True)

          DDPG_AGGREGATED = True
          time.sleep(2)

      if DDPG_AGGREGATED == True:
        self.ddpg_agent.policy_network.load_state_dict(copy.deepcopy(DDPG_AGENT.policy_network.state_dict()), strict=True)



  def process_message(self, message):
    '''
    Process messages from client's buffer
    Parameters
    ----------
    message : dict[MSG_TYPE, dict]
        Message to process.

    Returns
    -------
    -
    '''
    # Set client momentarily not available
    self.is_available = False
    global AUCTION_BID_KEYS
    global CURRENT_ROUND
    global selected_clients
    global env, REPLAY_MEMORY, AGENT_LOCK
    global MEAN_REWARD, METRICS_LOCK
    global BID_POOL, BID_POOL_LOCK

    if 'message_type' in message and  'message_body' in message:    # Check if message structure is correct

      ### If offloading request
      if message['message_type'] == MSG_TYPE.OFF_REQ:
        # Extract message information
        auction_ID = message['message_body']['auction_ID']
        auctioneer_ID = message['message_body']['auctioneer_ID']
        offer = message['message_body']['offer']
        print(f'  Client {self.id:>{3}}: received message OFF_REQ from client {auctioneer_ID:>{3}}...')
        # Send bid to the auctioneer
        auc_reputation = read_reputation(auctioneer_ID)
        own_reputation = read_reputation(self.id)
        composed_reputation = [auc_reputation[0], own_reputation[1], auc_reputation[2], own_reputation[3]]

        for client in selected_clients:
          if client.id == auctioneer_ID:
            break

        remaining_epochs = offer['task_info']['remaining_epochs']
        processing_time = training_time(client.get_data_size() * 100, self.cpu_cycles, self.frequency, remaining_epochs)
        processing_energy = computation_energy(self.capacitance, self.frequency, processing_time)
        comm_time       = communication_time(client.get_data_size() * DATA_UNIT_SIZE + MODEL_WEIGHTS_SIZE, self.power, self.coordinates, client.coordinates)
        comm_energy = communication_energy(self.power, comm_time)
        task_summary = [{
            'task_count' : 1,
            'processing_time' : processing_time,
            'communication_time' : comm_time,
            'processing_energy' : processing_energy,
            'communication_energy' : comm_energy
          }]
        _, c_idx = update_resource_contribution(task_summary, 0)
        # print(f'    Estimated performance: {c_idx}...')

        with ENV_LOCK:
          env.set_obs(self.id,
                    own_reputation,
                    auc_reputation,
                    c_idx,
                    offer['sharing_portion']
                  )

        time.sleep(2)
        # Place a bid
        # print(f'Set observation is {env.get_obs(self.id)}')
        bid, true_action = self.place_bid(offer)


        # Save observation in bid pool to avoid overwriting it by next auction information
        with BID_POOL_LOCK:

          BID_POOL = pd.concat([BID_POOL, pd.DataFrame({
              'round' : [CURRENT_ROUND],
              'agent_id' : [self.id],
              'auctioneer_id' : [auctioneer_ID],
              'composed_reputation' : [[auc_reputation[0], own_reputation[1], auc_reputation[2], own_reputation[3]]],
              'bid': [bid],
              'c_idx' : [c_idx],
              'payment': [offer['sharing_portion']]
          })], ignore_index=True)


        # Compose bid message
        auction_bid_values = [time.time(), auction_ID, self.id, bid, true_action]
        bid_message_body = {key: value for key, value in zip(AUCTION_BID_KEYS, auction_bid_values)}

        print(f'  Client {self.id:>{3}}: Placing bid {bid} to client {auctioneer_ID:>{3}}')
        self.send_message(client, MSG_TYPE.BID, bid_message_body)

      ### If offloading response
      elif message['message_type'] == MSG_TYPE.OFF_REP:
        # Message body structure: ['timestamp', 'auction_ID', 'round', 'auctioneer_id', 'composed_reputation', 'bid', 'payment', 'task_args']
        for auctioneer in selected_clients:
            if auctioneer.id == message['message_body']['auctioneer_id']:
              break
        # If task_args != None then this is the auction winner
        winner = (message['message_body']['task_args'] != None)
        if winner:
          # If auction winner, calculate costs
          processing_time = training_time(auctioneer.get_data_size() * 100, self.cpu_cycles, self.frequency, message['message_body']['task_args'][1])
          processing_energy = computation_energy(self.capacitance, self.frequency, processing_time)
          comm_time       = communication_time(auctioneer.get_data_size() * DATA_UNIT_SIZE + MODEL_WEIGHTS_SIZE, self.power, self.coordinates, auctioneer.coordinates)
          comm_energy = communication_energy(self.power, comm_time)
          task_summary = [{
              'task_count' : 1,
              'processing_time' : processing_time,
              'communication_time' : comm_time,
              'processing_energy' : processing_energy,
              'communication_energy' : comm_energy
            }]
          _, c_idx = update_resource_contribution(task_summary, 0)
        else:
          c_idx = 0.

        auc_reputation = read_reputation(message['message_body']['auctioneer_id'])
        own_reputation = read_reputation(self.id)

        with BID_POOL_LOCK:
          condition = (BID_POOL['round'] == CURRENT_ROUND) & (BID_POOL['agent_id'] == self.id) & (BID_POOL['auctioneer_id'] == message['message_body']['auctioneer_id'])
          BID_POOL.loc[condition, 'winner'] = winner

        task_args = message['message_body']['task_args']
        # Check if auction winner
        if winner:
          print(f'  Client {self.id:>{3}}:  is the auction winner!')
          # Perform the offloading task
          self.fit_other(task_args)
        else:
          self.is_available = True

    else:
      print('ERRONEOUS MESSAGE STRUCTURE...', message, '\n')


  def get_parameters(self, as_list=False):
    """Get the local model parameters.
    Parameters
    ----------
    as_list : bool
        Whether to return a list or a dictionary.

    Returns
    -------
    self.net.state_dict() : OrderedDict or List
        Local model parameters.
    """
    if as_list:
      return [val.cpu().numpy() for _, val in self.net.state_dict().items()]
    else :
      return self.net.state_dict()


  def set_parameters(self, parameters):
    """Updates the local model parameters.
    Parameters
    ----------
    parameters : OrderedDict
        Parameters to load to current model.

    Returns
    -------
     """
    self.net.load_state_dict(parameters, strict=True)


  def fit(self, parameters=None):
    """Performs local model training
    Parameters
    ----------
    parameters : OrderedDict
        Model weights to load before strating local training.

    Returns
    -------
    self.net.state_dict() : OrderedDict
        Local model weights after training.
    dataset_size : int
        Size of local dataset.
    local_loss : List[float]
        Local training loss history (size is the number of local epochs).
    local_acc : List[float]
        Local training accuracy history.
    """
    self.is_available = False           # The client is no longer available
    global OFFLOADING_PROB
    global TOTAL_OFFLOADING
    global DATA_UNIT_SIZE
    global MODEL_WEIGHTS_SIZE
    global GLOBAL_MODEL_HOLDER
    global MODEL_POOL, MODEL_POOL_LOCK, MODELS_PATH

    print(f'  Client {self.id:>{3}}: started local training...')
    # print(f'Weights of client {self.id} are : {self.get_parameters()}....')

    # Will an offloading event occur during training
    offload_event = (random.Random().random() < OFFLOADING_PROB)
    epochs = random.randint(0, self.epochs-1) if offload_event else self.epochs
    # Perform local training
    if not (parameters == None):
      self.set_parameters(parameters)

    # Simulate hardware heterogeneity
    time.sleep(random.uniform(10, 20))
    # Start training
    self.train_loss, self.train_acc = train(
        self.net,
        self.trainloader,
        epochs=epochs,
        learning_rate=self.learning_rate,
    )
    local_loss, local_acc = test(self.net, self.valloader)
    self.val_loss = local_loss
    self.val_acc = local_acc

    # If offloading event
    if offload_event:
      TOTAL_OFFLOADING  += 1
      print(f'  Client {self.id:>{3}}: Offloading event...')
      # Launch auction with remaining number of epochs
      self.launch_auction(self.epochs-epochs)
    else:
      # Record task cost into blockchain
      self.is_available = True
      self.completed_train = True
      process_time = training_time(self.get_data_size()*100, self.cpu_cycles, self.frequency, self.epochs)
      if self.id == GLOBAL_MODEL_HOLDER.id:
        # Generate some random client_idx for tests, ALL COMM TIMES WILL BE CALCULATED DIFFERENTLY ONCE BC IN INTRODUCED
        random_idx = random.choice([x for x in range(1, NUM_CLIENTS) if x != self.id])
        comm_time = communication_time(MODEL_WEIGHTS_SIZE, self.power, self.coordinates, selected_clients[random_idx].coordinates)
      else:
        comm_time = communication_time(MODEL_WEIGHTS_SIZE, self.power, self.coordinates, GLOBAL_MODEL_HOLDER.coordinates)
      # Store task details
      process_energy = computation_energy(self.capacitance, self.frequency, process_time)
      comm_energy = communication_energy(self.power, comm_time)
      record_to_blockchain([CURRENT_ROUND, self.id, self.id, 'train', process_time, comm_time, process_energy, comm_energy])

      # Store model details
      with MODEL_POOL_LOCK:
        model_id = len(MODEL_POOL.index.to_list())
        torch.save(self.get_parameters(), f'{MODELS_PATH}/{model_id}.pth')


        MODEL_POOL = pd.concat([MODEL_POOL, pd.DataFrame({
            'round': [CURRENT_ROUND],
            'level': [1],
            'owner' : [self.id],
            'trainer': [self.id],
            'submodels_id':[[]],
            'aggregators': [[]],
            'aggregated': [False],
            'validators': [[]],
            'validation_score': [0],
            'lock': [False]
        })], ignore_index=True)
        # print(f"  Client: {self.id}: Releasing MODEL_POOL_LOCK..")
      print(f'  Client {self.id:>{3}}: completed local training...')

      time.sleep(1)



  def evaluate(self, owner_id, weights):
    """Evaluate other clients models
    Parameters
    ----------

    Returns
    -------
    Validation score.
    """
    global selected_clients
    # Calculate test accuracy using owner's weights and self.valloader
    self.net.load_state_dict(weights)
    _, accuracy = test(self.net, self.valloader)
    for client in selected_clients:
      if client.id == owner_id:
        break
    owner_acc = client.val_acc
    # print(f'accuracy {accuracy}')
    # print(f'val accuracy {client.val_acc}')
    validation_score = 1 if accuracy >= owner_acc else 1 - (np.abs(accuracy - owner_acc) / 100)
    # Save task details
    process_time   = validation_time(self.get_data_size() * 100, self.cpu_cycles, self.frequency)
    comm_time      = communication_time(MODEL_WEIGHTS_SIZE, self.power, self.coordinates, self.coordinates)
    process_energy = computation_energy(self.capacitance, self.frequency, process_time)
    comm_energy    = communication_energy(self.power, comm_time)
    record_to_blockchain([CURRENT_ROUND, self.id, self.id, 'val', process_time, comm_time, process_energy, comm_energy])
    return validation_score


  def scale_model_weights(self, weights, factor):
    scaled_weights = copy.deepcopy(weights)
    for k in scaled_weights.keys():
      scaled_weights[k] = factor * scaled_weights[k]

    return scaled_weights


  def fedavg(self, scaled_weights_list):
    w_avg = copy.deepcopy(scaled_weights_list[0])
    for k in w_avg.keys():
      tmp = torch.zeros_like(scaled_weights_list[0][k], dtype = torch.float32)
      for i in range(len(scaled_weights_list)):
        tmp += scaled_weights_list[i][k]
      w_avg[k].copy_(tmp)
    return w_avg



  def aggregate(self, models_idxs):
    global MODEL_POOL, MODEL_POOL_LOCK
    global MODELS_PATH
    global MODEL_WEIGHTS_SIZE
    client_w = []
    factor_w = []
    level = 0
    aggregators = []
    # Get models and scale them
    for model_id in models_idxs:
      time.sleep(random.uniform(10, 20))
      client_w.append(torch.load(f'{MODELS_PATH}/{model_id}.pth'))
      # Scale only local models weights
      if MODEL_POOL.loc[model_id, 'level'] == 1:
        factor_w.append(1/ NUM_CLIENTS)
      else:
        factor_w.append(1)
      level += MODEL_POOL.loc[model_id, 'level']
      if MODEL_POOL.at[model_id, 'aggregators'] != []:
        aggregators = aggregators + MODEL_POOL.at[model_id, 'aggregators']
    scaled_weights_list = [self.scale_model_weights(weights, factor) for weights, factor in zip(client_w, factor_w)]
    # Aggregate scaled weights
    average_weights = self.fedavg(scaled_weights_list)
    aggregators.append(self.id)
    # Save aggregated model information
    with MODEL_POOL_LOCK:
      # Save and share aggregated model
      new_model_id = len(MODEL_POOL.index)
      torch.save(average_weights, f'{MODELS_PATH}/{new_model_id}.pth')

      MODEL_POOL = pd.concat([MODEL_POOL, pd.DataFrame({
          'round': [CURRENT_ROUND],
          'level': [level],
          'owner': [None],
          'trainer': [None],
          'submodels_id': [models_idxs],
          'aggregators': [aggregators],
          'aggregated': [False],
          'validators': [[]],
          'validation_score': [0],
          'lock': [False]
      })], ignore_index=True)

      print(f'  Client {self.id:>{3}}: Model {new_model_id:>{5}} aggregated..')
      for model_id in models_idxs:
        MODEL_POOL.at[model_id, 'aggregated'] = True
        MODEL_POOL.at[model_id, 'lock'] = False
      # print(f'  Client {self.id}: Releasing MODEL_POOL_LOCK..')
      # Save task details
      process_time   = aggregation_time(len(models_idxs), MODEL_WEIGHTS_SIZE, self.cpu_cycles, self.frequency)
      comm_time      = communication_time(MODEL_WEIGHTS_SIZE, self.power, self.coordinates, self.coordinates)
      process_energy = computation_energy(self.capacitance, self.frequency, process_time)
      comm_energy    = communication_energy(self.power, comm_time)
      record_to_blockchain([CURRENT_ROUND, self.id, self.id, 'agg', process_time, comm_time, process_energy, comm_energy])


  def send_message(self, receiver, message_type, message_body):
    """Send message to other clients
    Parameters
    ----------
    receiver : Client
        Address of the receiver
    message_type : MSG_TYPE
        Type of the message to send
    message_body : dict
        Message content, it depends on the message type

    Returns
    -------
    """
    # Simulate hardware heterogeneity
    time.sleep(random.uniform(0.1, 1))
    # Use mutex for exclusive access to the client's buffer
    with receiver.buffer_mutex:
      receiver.buffer.append({
          'message_type' : message_type,
          'message_body' : message_body
      })
      time.sleep(1)
    # Notify the client
    with receiver.notification_mutex:
      receiver.notification.set()
      time.sleep(1)


  def fit_other(self, task_args):
    """Performs other client's local training task (offloaded task).

    Parameters
    ----------
    task_args : Tuple[int, int, OrderedDict, DataLoader]
        Offloaded local task with sender id, remaining epochs, local weights and local dataset.

    Returns
    -------
    """
    global MODEL_POOL_LOCK, MODEL_POOL
    # Extract task information
    sender_id, remaining_epochs, weights, data = task_args
    for sender in selected_clients:
      if sender.id == sender_id:
        break
    print(f"  Client {self.id:>{3}}: starting local training of client {sender_id:>{3}}'s offloaded task.. ")
    # Perform training of other client's model
    self.set_parameters(copy.deepcopy(weights))
    time.sleep(random.uniform(1, 5))      # For hardware homogeneity
    sender.train_loss, sender.train_acc = train(
        self.net,                         # Created model with other client's local parameters
        data,                             # Pass other client's dataloader
        epochs = remaining_epochs,        # Remaining number of epochs
        learning_rate = sender.learning_rate
    )
    local_loss, local_acc = test(self.net, sender.valloader)
    sender.val_loss = local_loss
    sender.val_acc = local_acc
    # Client is now available
    self.is_available = True

    process_time = training_time(self.get_data_size() * 100, self.cpu_cycles, self.frequency, self.epochs)
    comm_time = communication_time(sender.get_data_size() * DATA_UNIT_SIZE + 2 * MODEL_WEIGHTS_SIZE, self.power, self.coordinates, sender.coordinates)
    process_energy = computation_energy(self.capacitance, self.frequency, process_time)
    comm_energy = communication_energy(self.power, comm_time)
    record_to_blockchain([CURRENT_ROUND, sender.id, self.id, 'train', process_time, comm_time, process_energy, comm_energy])

    with MODEL_POOL_LOCK:
      # Save local models weights
      torch.save(self.get_parameters(), f'{MODELS_PATH}/{len(MODEL_POOL.index)}.pth')

      MODEL_POOL = pd.concat([MODEL_POOL, pd.DataFrame({
          'round': [CURRENT_ROUND],
          'level': [1],
          'owner' : [sender.id],
          'trainer': [self.id],
          'submodels_id': [[]],
          'aggregators': [[]],
          'aggregated': [False],
          'validators': [[]],
          'validation_score': [0],
          'lock': [False]
      })], ignore_index=True)
      # print(f"  Client: {self.id}: Releasing MODEL_POOL_LOCK..")
    print(f"  Client {self.id:>{3}}: completing local training of client {sender_id:>{3}}'s offloaded task..")
    sender.completed_train = True
    time.sleep(1)



  def launch_auction(self, remaining_epochs, bid_upper_limit=0.2):
    """Launch auction game for task offloading.
    Parameters
    ----------
    remaining_epochs : int
        Remaining number of local epochs.
    """
    global OFFLOADING_TOKEN
    global CURRENT_ROUND, NUM_CLIENTS
    global AUCTION_OFFER_KEYS
    global AUCTION_REP_KEYS
    global LR_MODEL
    global REPUTATION_DATA
    global DATA_UNIT_SIZE
    global env
    global TASK_DROP

    receivers = []
    # Allows to perform only one auction at a time
    with OFFLOADING_TOKEN:
      # Collect available clients
      while(len(receivers) < 2):
        time.sleep(2)
        receivers = [client for client in selected_clients if (client.id != self.id and client.is_available and client.is_active)]
      # Select some of the receivers to reduce simulation time
      # receivers = receivers[:min(len(receivers), NUM_CLIENTS//2)]
      random_indices = random.sample([r.id for r in receivers], min(len(receivers), NUM_CLIENTS//2))
      receivers = [rec for rec in receivers if rec.id in random_indices]

      print(f'  Client {self.id:>{3}}: Auction receivers collected.... {[rec.id for rec in receivers]}')
      # Compose offloading offer/request
      task_info = {'dataset_size' : len(self.trainloader.dataset), 'remaining_epochs' : remaining_epochs}
      print(f'  Client {self.id:>{3}}: Preparing auction offer....')
      reputation = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND - 1) & (REPUTATION_DATA['client_id'] == self.id)][['data_quality_norm', 'resource_contribution_norm', 'offloading_history', 'validation_score_norm']].values[0]

      sharing_portion = 0
      if CURRENT_ROUND >= 2:
        sharing_portion = BC.loc[BC['round'] >= 1][['executer_per']].mean()[0]
        noise = random.uniform(0, sharing_portion/2)
        original_portion = sharing_portion
        sharing_portion -= noise
        print(f'  Client {self.id:>{3}}: Profit portion offered: {sharing_portion}')

      if CURRENT_ROUND == 1 or sharing_portion == 0:
        sharing_portion = random.uniform(0, 1) /(NUM_CLIENTS*5)  #* REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == self.id)][['profit_per']].values[0][0]
        original_portion = sharing_portion
        print(f'  Client {self.id:>{3}}: Profit portion offered: {sharing_portion}')

      offloading_offer = {
          'task_info' : task_info,
          'sharing_portion' : sharing_portion
      }
      auction_offer_values = [time.time(), TOTAL_OFFLOADING+1, CURRENT_ROUND, self.id, offloading_offer]
      offloading_request = {key: value for key, value in zip(AUCTION_OFFER_KEYS, auction_offer_values)}
      print(f'  Client {self.id:>{3}}: Sending offloading requests....')
      # Send offloading request to available nodes
      for i in range(len(receivers)):
        self.send_message(receivers[i], MSG_TYPE.OFF_REQ, offloading_request)
      time.sleep(2)
      auction_bids = []

      # Collect bids
      # TODO Add a waiting limit befire the client drops off
      while len(auction_bids) < len(receivers):
        time.sleep(2)
        if not len(self.buffer) == 0:
          with self.buffer_mutex:
            for message in self.buffer:
              if message['message_type'] == MSG_TYPE.BID:
                auction_bids.append(message)
            self.buffer = []

      print(f'  Client {self.id:>{3}}: Selecting auction winner...')
      # Select the auction winner (greedy selection)
      auc_reputation = read_reputation(self.id)
      all_reputation = [read_reputation(message['message_body']['bidder_ID']) for message in auction_bids]

      # Get the auctioneer's utility corresponsing to each bid
      epsilon = 10 * random.uniform(0, original_portion)
      print(f'  Client {self.id:>{3}}: Bid limit is {sharing_portion+epsilon}...')
      # Filter bids
      filtered_bids = [(auction_bids[i]['message_body']['bid'] < sharing_portion+epsilon) for i in range(len(auction_bids))]
      if any(filtered_bids):
        scores = [get_reputation_score(auc_reputation[0], all_reputation[i][1], auc_reputation[2], all_reputation[i][3])[0] for i in range(len(auction_bids))]
        print(f'scores {scores}')
        b = np.log10(sum(scores)/len(scores))
        correct_bids = [auction_bids[i]['message_body']['bid'] for i in range(len(auction_bids)) if filtered_bids[i]]
        a = np.log10(sum(correct_bids)/len(correct_bids))
        print(f'a {a}')
        print(f'b {b}')
        rescaled_bids = [auction_bids[i]['message_body']['bid'] * 10**(b-a) for i in range(len(auction_bids))]
        print(f'rescaled_bids {rescaled_bids}')
        utilities     = [(5/6*scores[i]) - (1/6*rescaled_bids[i]) if filtered_bids[i] else -10000 for i in range(len(auction_bids))]
        # utilities     = [get_reputation_score(auc_reputation[0], all_reputation[i][1], auc_reputation[2], all_reputation[i][3])[0] if filtered_bids[i] else -10000 for i in range(len(auction_bids))]
        print(f"Utilities: {[(auction_bids[i]['message_body']['bidder_ID'], utilities[i]) for i in range(len(auction_bids))]}")
        winner_idx = np.argmax(np.array(utilities))
        accepted_bid = auction_bids[int(winner_idx)]
        rejected_bids = [bid for bid in auction_bids if bid != accepted_bid]
        task_args = (self.id, remaining_epochs, self.get_parameters(), self.trainloader)
        winner_reputation = all_reputation[int(winner_idx)]
        winner_id = accepted_bid['message_body']['bidder_ID']
        composed_reputation = [auc_reputation[0], winner_reputation[1], auc_reputation[2], winner_reputation[3]]
        # Compose auction response messages content
        # TODO: if second price auction select the next higher bid for payment
        portion_to_pay = accepted_bid['message_body']['bid']
        print(f'  Client {self.id:>{3}}: Portion to pay the auction winner: {portion_to_pay}...')
        ['timestamp', 'auction_ID', 'round', 'auctioneer_id', 'composed_reputation', 'bid', 'true_action', 'payment', 'task_args']

        auction_winner_values = [time.time(), TOTAL_OFFLOADING+1, CURRENT_ROUND, self.id, composed_reputation, accepted_bid['message_body']['bid'], accepted_bid['message_body']['true_action'], portion_to_pay, task_args]  #  for the auction winner
      else:
        winner_id = -1
        composed_reputation = [auc_reputation[0], 0, auc_reputation[2], 0]
        print(f'  Client {self.id:>{3}}: No winner in this auction game...')
        TASK_DROP[CURRENT_ROUND-1] = TASK_DROP[CURRENT_ROUND-1] + 1

      # Send auction responses
      for i, client in enumerate(receivers):
        if winner_id == client.id:
          self.send_message(client, MSG_TYPE.OFF_REP, {
              key: value for key, value in zip(AUCTION_REP_KEYS, auction_winner_values)
          })
        else:
          self.send_message(client, MSG_TYPE.OFF_REP, {
              key: value for key, value in zip(AUCTION_REP_KEYS, [time.time(), TOTAL_OFFLOADING+1, CURRENT_ROUND, self.id, composed_reputation, auction_bids[i]['message_body']['bid'], auction_bids[i]['message_body']['true_action'], 0, None])
          })

      # The client is disconnected
      self.is_active = False
      print(f'  Client {self.id:>{3}}: Auction completed..')
      self.offload = True
      if not any(filtered_bids):
        self.completed_train = True
      time.sleep(1)


  def place_bid(self, offer):
    """Place a bid in a given offloading auction.
    Parameters
    ----------

    Returns
    -------
    bid : float
        The profit portion that this bidder is willing to receive from the auctioneer.
    """
    global env
    # global AGENT_LOCK, DDPG_AGENT, REPLAY_MEMORY
    global ou_noise

    # with AGENT_LOCK:
    raw_obs = env.get_obs(self.id)
    # print(f'Raw obs: {raw_obs}')
    # observation = torch.Tensor([v for _,v in raw_obs.items()]).to(device)
    observation = torch.Tensor(raw_obs).to(device)
    action = self.ddpg_agent.calc_action(observation, ou_noise)
    self.ddpg_agent.decay_exploration()
    action = action.cpu().numpy()[0]
    bid = action
    # ReLu output in [-1,1] convert to [0, 1]
    # bid = (bid +  1) / 2
    # if bid < offer['sharing_portion']:
    # if bid < 0:
    #   bid = 0
    return bid, action


  def get_data_size(self):
    """Get the size of local dataset.
    """
    return len(self.trainloader.dataset)  # There is a difference between len(trainloader) and len(trainloader.dataset)

### Network Class

In [None]:
# Network class
import traceback

class Network:
  """Federated Learning Network
  Functions included in this class are to be executed in a completely distributed fashion through smart contracts"""

  def __init__(self, num_clients, batch_size, testloader):
    self.net = Net().to(device)
    self.num_clients = num_clients
    self.batch_size = batch_size
    self.testloader = testloader


  def select_clients(self, comm_round, selected_clients, threshold=0):
    """Clients are selected according to their respective reputation scores.
    Parameters
    ----------
    comm_round : int
        Current communication round.
    threshold : float
        Clients with reputation score under this threshold are eliminated from the next round.

    Returns
    -------
    selected_clients_idx : List[int]
        List of selected clients indices.
    """
    global REPUTATION_DATA
    # np.random.seed(comm_round)      # Random selection
    # selected_clients_idx = np.random.choice(np.arange(self.num_clients), np.round(self.num_clients * portion), replace = False)
    selected_clients_idx = []
    for i in range(len(selected_clients)):
      reputation_score = REPUTATION_DATA[(REPUTATION_DATA['round'] == comm_round-1) & (REPUTATION_DATA['client_id'] == selected_clients[i].id)]['reputation_score'].values[0]
      if reputation_score >= threshold:
        selected_clients_idx.append(i)

    return selected_clients_idx


  def launch_task(self, comm_rounds, learning_rate, epochs, portion = 1, verbose = 1, verbose_test = 1):
    """Launch of federated task (the whole FL training process).
    Parameters
    ----------
    comm_rounds : int
        Number of communication rounds.
    learning_rate : float
    portion : float
        Portion of clients to select for participating.
    epochs : int
        Number of local epochs.

    Returns
    -------
    net.state_dict() : OrderedDict
        Global model parameters after the end of learning.
    accuracy : float
        Global model accuracy on test data.
    loss : float
        Global model loss.
     """
    # Learning status flags
    global CURRENT_ROUND
    global NUM_CLIENTS
    global END_ROUND_LOCK
    global END_ROUND
    global END_TASK
    # List of clients
    global clients
    global selected_clients
    global GLOBAL_MODEL_HOLDER
    global MODEL_WEIGHTS_SIZE

    global REPUTATION_DATA
    global RHO_MAX
    global LR_MODEL
    global MODEL_POOL, MODELS_PATH

    global REPLAY_MEMORY, DRL_BATCH_SIZE, DDPG_AGENT
    global MEAN_REWARD, MEAN_REWARD_LOCK, BIDDER_UTILITY

    global env, ou_noise
    global TOTAL_REPUTATION
    global GLOBAL_ACCURACY
    global BID_POOL, DDPG_AGGREGATED, DDPG_CHECK
    global ACTOR_LOSS, CRITIC_LOSS
    global TASK_DROP, TASK_DROP_RATE

    accuracy = []
    loss = []
    # self.net.train()

    # To nomalize reputation data columns (using MinMax normalization)
    columns_to_normalize = ['data_quality', 'resource_contribution', 'validation_score']
    columns_normalized = ['data_quality_norm', 'resource_contribution_norm', 'validation_score_norm']

    net = Net().to(device)
    global_weights = net.state_dict()
    MODEL_WEIGHTS_SIZE = sum(p.numel() * p.element_size() for p in global_weights.values())


    MODEL_POOL = pd.concat([MODEL_POOL, pd.DataFrame({
        'round': [0],
        'level': [NUM_CLIENTS],
        'owner' : [None],
        'trainer': [None],
        'submodels_id': [[]],
        'aggregators': [[]],
        'aggregated': [False],
        'validators': [[]],
        'validation_score': [0],
        'lock': [True]
    })], ignore_index=True)
    torch.save(global_weights, f'{MODELS_PATH}/0.pth')


    print(f'Launch of FL task...')
    END_TASK = False
    time.sleep(2)
    # Selceted clients
    selected_clients = clients
    # For each communication round
    with ThreadPoolExecutor(max_workers=len(clients)) as executor:
      for round in range(comm_rounds):
        CURRENT_ROUND = round + 1
        ou_noise.reset()
        if(verbose == 1):
          print("*************************************************************************************************")
          print("Communication round : ", CURRENT_ROUND)

        ACTOR_LOSS.append([])
        # CRITIC_LOSS.append([])
        # Select a list of clients idx
        if CURRENT_ROUND > 3:
          selected_clients_idx = self.select_clients(CURRENT_ROUND, selected_clients)
          selected_clients = [selected_clients[i] for i in selected_clients_idx]
          NUM_CLIENTS = len(selected_clients)

        # TODO Remove this global model holder
        GLOBAL_MODEL_HOLDER = selected_clients[int(np.random.choice(np.arange(len(selected_clients))))]

        MEAN_REWARD.append([])
        BIDDER_UTILITY.append([])
        TASK_DROP.append(0)

        for client in selected_clients:
          client.reset()

        if CURRENT_ROUND == 1:
          # Initialize reputation vectors randomly
          for i, client in enumerate(selected_clients):
            mu = random.uniform(0.5, 0.9)
            eta = random.uniform(0.5, 0.9)
            nu = random.uniform(0.5, 0.9)
            reputation_score, _ = get_reputation_score(mu, eta, 1, nu)

            try:
              REPUTATION_DATA = pd.concat([REPUTATION_DATA, pd.DataFrame({
                  'round' : [0],
                  'client_id' : [client.id],
                  'data_quality' : [mu],
                  'resource_contribution' : [eta],
                  'offloading_history' : [1.],
                  'validation_score' : [nu],
                  'performance_index' : [0.],
                  'reputation_score' : [reputation_score],
                  'profit' : [0.]
              })], ignore_index=True)
            except Exception as e:
              print(f"Exception in one of the threads: {e}")
              traceback.print_exc()
          # Normalize reputation
          REPUTATION_DATA_ROUND = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == 0)]
          scaler = MinMaxScaler()
          REPUTATION_DATA_ROUND[columns_normalized] = scaler.fit_transform(REPUTATION_DATA_ROUND[columns_to_normalize])

          REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == 0)] = REPUTATION_DATA_ROUND
        # Set round flag to start local training
        with END_ROUND_LOCK:
          DDPG_AGGREGATED = False
          DDPG_CHECK = False
          END_ROUND = False
        time.sleep(1)

        # Run clients threads
        futures = [executor.submit(client.run) for client in selected_clients]
        for future in concurrent.futures.as_completed(futures):
          try:
            result = future.result()
          except Exception as e:
            print(f"Exception in one of the threads: {e}")
            traceback.print_exc()
        while not END_ROUND:
          time.sleep(2)

        try:
          print("All client threads have completed local training...")
          time.sleep(2)
          # Get training results of this communication round
          dataset_size = [client.get_data_size() for client in selected_clients]
          local_loss   = [client.train_loss for client in selected_clients]
          local_acc    = [client.train_acc for client in selected_clients]

          for i in range(len(dataset_size)):
            # if len(local_loss[i]) > 0:
            if len(local_loss[i]) > 0:
              print(f'  Client: {selected_clients[i].id:>{3}}    Dataset size: {dataset_size[i]}   Train loss: {sum(local_loss[i])/ len(local_loss[i]): .4f}   Train accuracy:  {sum(local_acc[i]) * 100/ len(local_acc[i]): .4f}')
            else:
              print(f'  Client: {selected_clients[i].id:>{3}}    Dataset size: {dataset_size[i]}   Train loss: {0.0}   Train accuracy:  {0.0}')

          # Get global model weights
          # New model weights
          model_id = MODEL_POOL[MODEL_POOL['round'] == CURRENT_ROUND]['level'].idxmax()
          w_new = torch.load(f'{MODELS_PATH}/{model_id}.pth')
          # Old model weights
          model_id = MODEL_POOL[MODEL_POOL['round'] == CURRENT_ROUND-1]['level'].idxmax()
          w_old = torch.load(f'{MODELS_PATH}/{model_id}.pth')

          reputation_scores = []
          for i, client in enumerate(selected_clients):
            # Get reputation vector
            reputation_vector = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == client.id)].to_dict(orient='records')[0]
            ##### UPDATE CONTRIBUTION
            client_records = BC[(BC['round'] == CURRENT_ROUND) & (BC['executer'] == client.id)]
            if not client_records.empty:
              # Get task summary of client for this round
              task_summary = client_records.groupby('task_type').agg(
                task_count=('task_type', 'count'),
                processing_time=('processing_time', 'sum'),
                communication_time=('communication_time', 'sum'),
                processing_energy=('processing_energy', 'sum'),
                communication_energy=('communication_energy', 'sum'),
                ).reset_index()
              # Convert to dictionary
              task_summary = task_summary.to_dict(orient='records')
              eta, c_i = update_resource_contribution(task_summary, reputation_vector['resource_contribution'])
            else:
              eta, c_i = update_resource_contribution(None, reputation_vector['resource_contribution'])

            ##### UPDATE DATA QUALITY
            if not MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['owner'] == client.id)].empty:
              local_model_id = MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['owner'] == client.id)].index[0]
              w_i = torch.load(f'{MODELS_PATH}/{local_model_id}.pth')
              mu = update_data_quality(w_old, w_new, w_i, reputation_vector['data_quality'])
            else:
              mu = reputation_vector['data_quality']

            ##### UPDATE OFFLOADING HISTORY
            phi      = reputation_vector['offloading_history']
            if client.offload:
              phi = update_offload_history(phi)

            ##### UPDATE VAILDATION SCORE
            # Get the validity score if the client is the model owner or if an executer of an offloaded task
            if not MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['trainer'] == client.id)].empty:
              new_val_score = MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['trainer'] == client.id)]['validation_score'].values.sum()
              nu = update_validation_score(new_val_score, reputation_vector['validation_score'])
            else:
              nu = reputation_vector['validation_score']

            REPUTATION_DATA = pd.concat([REPUTATION_DATA, pd.DataFrame({
                'round' : [CURRENT_ROUND],
                'client_id' : [client.id],
                'data_quality' : [mu],
                'resource_contribution' : [eta],
                'offloading_history' : [phi],
                'validation_score' : [nu],
                'performance_index' : [c_i],
                'profit' : [0]
            })], ignore_index=True)

          # Normalize reputation
          REPUTATION_DATA_ROUND = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND)]
          scaler = MinMaxScaler()
          REPUTATION_DATA_ROUND[columns_normalized] = scaler.fit_transform(REPUTATION_DATA_ROUND[columns_to_normalize])
          REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND)] = REPUTATION_DATA_ROUND

          print('Payment....')
          # Calculate total payments
          new_df = BC.loc[(BC['round'] == CURRENT_ROUND)]
          for index, r in new_df.iterrows():
            row = r.to_dict()
            # Update task data quality
            mu_old = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == row['owner'])].to_dict(orient='records')[0]['data_quality']
            local_model_id = MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['owner'] == row['owner'])].index[0]
            w_i = torch.load(f'{MODELS_PATH}/{local_model_id}.pth')
            new_df.at[index, 'data_quality'] = update_data_quality(w_old, w_new, w_i, mu_old)
            # Update resource contribution
            eta_old = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == row['executer'])].to_dict(orient='records')[0]['resource_contribution']
            task_summary = {
              'task_count' : 1,
              'processing_time' : row['processing_time'],
              'communication_time' : row['communication_time'],
              'processing_energy' : row['processing_energy'],
              'communication_energy' : row['communication_energy']
            }
            new_df.at[index, 'resource_contribution'], new_df.at[index, 'performance_index'] = update_resource_contribution([task_summary], eta_old)
            # Update offloading history
            phi = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == row['owner'])].to_dict(orient='records')[0]['offloading_history']
            if (row['task_type'] == 'train') and (row['executer'] != row['owner']):
              phi = update_offload_history(phi)
            new_df.at[index, 'offloading_history'] = phi
            # Update validation score
            nu_old = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND-1) & (REPUTATION_DATA['client_id'] == row['executer'])].to_dict(orient='records')[0]['validation_score']
            new_val_score = MODEL_POOL[(MODEL_POOL['round'] == CURRENT_ROUND) & (MODEL_POOL['trainer'] == row['executer']) & (MODEL_POOL['owner'] == row['owner'])].to_dict(orient='records')[0]['validation_score']
            new_df.at[index, 'validation_score'] = update_validation_score(new_val_score, nu_old)

          BC.loc[(BC['round'] == CURRENT_ROUND)] = new_df
          # Normalize reputation
          # REPUTATION_DATA_ROUND = REPUTATION_DATA.loc[(REPUTATION_DATA['round'] == CURRENT_ROUND)]
          scaler = MinMaxScaler()
          new_df[columns_normalized] = scaler.fit_transform(new_df[columns_to_normalize])
          BC.loc[(BC['round'] == CURRENT_ROUND)] = new_df

          for index, r in new_df.iterrows():
            # Calculate the score of each task
            row = r.to_dict()
            reputation_score, sub_scores = get_reputation_score(row['data_quality_norm'], row['resource_contribution_norm'], row['offloading_history'], row['validation_score_norm'])
            new_df.at[index, 'reputation_score'] = reputation_score
            new_df.at[index, 'executer_score']   = sub_scores[1] + sub_scores[3]
          total_score = new_df[['reputation_score']].sum()[0]

          # Calculate profit percentages
          for index, r in new_df.iterrows():
            row = r.to_dict()
            new_df.at[index, 'executer_per'] = (row['executer_score'] / total_score) + 1e-8

          BC.loc[(BC['round'] == CURRENT_ROUND)] = new_df

          for i, client in enumerate(selected_clients):
            # Check first if client has participated in this round tasks
            executer_df = BC.loc[(BC['round'] == CURRENT_ROUND) & (BC['executer'] == client.id)]
            owner_df    = BC.loc[(BC['round'] == CURRENT_ROUND) & (BC['owner'] == client.id)]
            # print(executer_df)
            # print(owner_df)
            # if not executer_df.empty or not owner_df.empty:
            executer_sum = 0
            owner_sum = 0
            if not executer_df.empty:
              executer_sum = executer_df[['executer_score']].sum()[0]
            if not owner_df.empty:
              owner_sum = (owner_df[['reputation_score']] - owner_df[['executer_score']]).sum()[0]
            new_profit = RHO_MAX * (executer_sum + owner_sum)
            idx = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND) & (REPUTATION_DATA['client_id'] == client.id)].index[0]
            REPUTATION_DATA.at[idx, 'profit'] = new_profit
            mu = REPUTATION_DATA.at[idx, 'data_quality_norm']
            eta = REPUTATION_DATA.at[idx, 'resource_contribution_norm']
            phi = REPUTATION_DATA.at[idx, 'offloading_history']
            nu = REPUTATION_DATA.at[idx, 'validation_score_norm']
            REPUTATION_DATA.at[idx, 'reputation_score'], _ = get_reputation_score(mu, eta, phi, nu)
            # # Update recorded data
            client.wallet += new_profit

          total_reputation_score = REPUTATION_DATA[REPUTATION_DATA['round'] == CURRENT_ROUND]['reputation_score'].sum() / len(REPUTATION_DATA[REPUTATION_DATA['round'] == CURRENT_ROUND]['reputation_score'])
          TOTAL_REPUTATION.append(total_reputation_score)
          TASK_DROP_RATE.append(TASK_DROP[CURRENT_ROUND-1]/NUM_CLIENTS)
          print(f'  Total reputation score: {total_reputation_score}')
          # Test global model performance on test dataset
          loss_test, _, acc_test = self.test(w_new)

          if (verbose_test):
            print("Communication round : ", round + 1, ", Test accuracy : ", acc_test, ", Test loss : ", loss_test)
            print("*****************************************************************************************************")
          GLOBAL_ACCURACY.append(acc_test)
          accuracy.append(acc_test)
          loss.append(loss_test)
          # print(f'Reputation data \n {REPUTATION_DATA}')
          print("End round...")


          temp = BID_POOL.loc[BID_POOL['round'] == CURRENT_ROUND]
          for i, r in temp.iterrows():
            row = r.to_dict()
            if row['winner'] == True:
              fair_payment = BC[(BC['round'] == CURRENT_ROUND) & (BC['owner'] == row['auctioneer_id']) & (BC['executer'] == row['agent_id']) & (BC['task_type'] == 'train')].to_dict(orient='records')[0]['executer_per']
            else:
              if not BC[(BC['round'] == CURRENT_ROUND) & (BC['owner'] == row['auctioneer_id']) & (BC['task_type'] == 'train')].empty:
                fair_payment = BC[(BC['round'] == CURRENT_ROUND) & (BC['owner'] == row['auctioneer_id']) & (BC['task_type'] == 'train')].to_dict(orient='records')[0]['executer_per']
              else:
                fair_payment = 0.

              
            auc_reputation = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND) & (REPUTATION_DATA['client_id'] == row['agent_id'])][['data_quality_norm', 'resource_contribution_norm', 'offloading_history', 'validation_score_norm']].values[0]
            bidder_reputation = REPUTATION_DATA[(REPUTATION_DATA['round'] == CURRENT_ROUND) & (REPUTATION_DATA['client_id'] == row['auctioneer_id'])][['data_quality_norm', 'resource_contribution_norm', 'offloading_history', 'validation_score_norm']].values[0]
            # composed_reputation = [auc_reputation[0], bidder_reputation[1], auc_reputation[2], bidder_reputation[3]]
            BID_POOL.at[i, 'fair_payment'] = fair_payment

          # Save changes
          mode = 'a' if i>1 else 'w' 
          BC[BC['round']==CURRENT_ROUND].to_csv('results/reinforce/BC_50cli_50round_01offprob_8ep_withaucutility_noniid.csv', index=False, mode=mode, header=not (CURRENT_ROUND>1))
          REPUTATION_DATA[REPUTATION_DATA['round']==CURRENT_ROUND].to_csv('results/reinforce/REPUTATION_DATA_50cli_50round_01offprob_8ep_withaucutility_noniid.csv', index=False, mode=mode, header=not (CURRENT_ROUND>1))
          BID_POOL[BID_POOL['round']==CURRENT_ROUND].to_csv('results/reinforce/BID_POOL_50cli_50round_01offprob_8ep_withaucutility_noniid.csv', index=False, mode=mode, header=not (CURRENT_ROUND>1))
          with open('results/reinforce/metrics_50cli_50round_01offprob_8ep_withaucutility_noniid.csv', 'a', newline='') as file:
              writer = csv.writer(file)
              writer.writerow([CURRENT_ROUND, GLOBAL_ACCURACY[CURRENT_ROUND-1], ACTOR_LOSS[CURRENT_ROUND-1], TOTAL_REPUTATION[CURRENT_ROUND-1], TASK_DROP_RATE[CURRENT_ROUND-1]])
            
          if len(ACTOR_LOSS[CURRENT_ROUND-1]) > 0:
            print(f'Global actor loss : {sum(ACTOR_LOSS[CURRENT_ROUND-1])/len(ACTOR_LOSS[CURRENT_ROUND-1])}')
            # print(f'Global critic loss : {sum(CRITIC_LOSS[CURRENT_ROUND-1])/len(CRITIC_LOSS[CURRENT_ROUND-1])}')
          print("End round and models training...")
          delete_models(MODELS_PATH)
        except Exception as e:
          print(f"An exception occurred: {e}")
          traceback.print_exc()

      with MEAN_REWARD_LOCK:
        MEAN_REWARD = [sum(MEAN_REWARD[i])/len(MEAN_REWARD[i]) for i in range(len(MEAN_REWARD[i]))]
      END_TASK = True
      print('End of FL task')
    return self.net.state_dict(), accuracy, loss


  def test(self, weights):
    """ Evaluates global model performance with test dataset """
    self.net.load_state_dict(weights)
    loss, accuracy = test(self.net, self.testloader)
    return float(loss), len(self.testloader.dataset), float(accuracy)




### DRL for Bidding Strategy

#### Environment Definition

In [None]:
# Define a Custom Gym Environment
class BiddingEnv(gym.Env):
  def __init__(self, num_agents=10):
    super(BiddingEnv, self).__init__()
    self.num_agents = num_agents

    # Define the components of the observation space
    self.observation_space = spaces.Dict({
      # 'bidder_reputation':  spaces.Box(low=0.0, high=1.0, shape=(4,), dtype=float),
      # 'auctioneer_reputation': spaces.Box(low=0.0, high=1.0, shape=(4,), dtype=float),
      'composed_reputation': spaces.Box(low=0.0, high=1.0, shape=(4,), dtype=float),
      'performance_index': spaces.Box(low=0.0, high=100.0, shape=(1,), dtype=float),
      'payment': spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=float)
    })
    # Define action space
    self.action_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=float)
    # Define state space
    self.state_space = spaces.Tuple([self.observation_space] * num_agents)
    #Initialize current state
    self.current_state = self.reset_()


  def get_n_observations(self):
    return (
      self.observation_space['composed_reputation'].shape[0] +
      self.observation_space['performance_index'].shape[0] +
      self.observation_space['payment'].shape[0]
      # self.observation_space['battery_level'].n
    )


  def get_action_range(self):
    return self.action_space.high - self.action_space.low


  def reset_obs(self, agent_id):
    # self.set_obs(agent_id, [0, 0, 0, 0], [0, 0, 0, 0], 0., 0.)
    self.set_obs(agent_id, [0, 0, 0, 0], 0., 0.)


  def reset_(self):
    # Reset the environment to the initial state
    # super.reset(seed=self.seed)
    initial_state = []
    for i in range(self.num_agents):
      initial_state.append({
        # 'bidder_reputation': [0, 0, 0, 0],
        # 'auctioneer_reputation': [0, 0, 0, 0],
        'composed_reputation': [0, 0, 0, 0],
        'performance_index': 0.,
        'payment': 0.
      })
    return initial_state


  def set_obs(self, agent_id, bidder_reputation, auctioneer_reputation, c_idx, payment):
    self.current_state[agent_id-1] = {
        # 'bidder_reputation': bidder_reputation,
        # 'auctioneer_reputation': auctioneer_reputation,
        'composed_reputation': [auctioneer_reputation[0], bidder_reputation[1], auctioneer_reputation[2], bidder_reputation[3]],
        'performance_index':c_idx,
        'payment': payment
    }


  def get_obs(self, agent_id):
    # print(self.current_state)
    # print(f'current observation {self.current_state[agent_id-1]}....')
    current_obs = self.current_state[agent_id-1]
    # observation_to_list = list(current_obs['bidder_reputation']) + list(current_obs['auctioneer_reputation']) + [current_obs['performance_index']] + [current_obs['payment']]
    observation_to_list = list(current_obs['composed_reputation']) + [current_obs['performance_index']] + [current_obs['payment']]
    # print(f'  Get observation {observation_to_list}........')
    return observation_to_list


  def get_state(self):
    return self.current_state


  def step_(self, agent_id, winner, bidder_reputation, auctioneer_reputation, c_idx, payment, fair_payment, scaling_factor=1):
    """
    Calculates the next observation and reward given the taken action.

    Returns
    -------
    observation: spaces.Dict
        Next observation or state observable by this agent.
    reward: float
        Reward signal received according to auction result.
    done: bool
        Whether the episode is done.
    info: Dict
        Optional dictionary containing additional information about the step.
    """
    global CURRENT_ROUND
    global BIDDER_UTILITY_THRESHOLD
    global METRICS_LOCK, BIDDER_UTILITY

    # Calculate next state

    system_utility = self._system_utility(agent_id, bidder_reputation, auctioneer_reputation)
    if winner:
      if CURRENT_ROUND > 1:
        bidder_utility = self._bidder_utility(agent_id, payment, fair_payment)
        if bidder_utility == 0:
          reward = system_utility * scaling_factor
        else:
          p_f = bidder_utility
          f_p = 1/ bidder_utility
          reward = 1/(p_f + f_p) * scaling_factor * 10 + system_utility * scaling_factor

        info = {"agent_id": agent_id, "winner": True, "bidder_utility": bidder_utility, "system_utility": system_utility}
      else:
        reward = system_utility * scaling_factor
        info = {"agent_id": agent_id, "winner": True, "bidder_utility": 1e-5, "system_utility": 1e-5}
      print(f'  Client {agent_id:>{3}}: Obtained reward: {reward}')

    else:
      bidder_utility = payment / (fair_payment+1e-8)
      if bidder_utility == 0:
        reward = - system_utility * scaling_factor
      else:
        p_f = bidder_utility
        f_p = 1/ bidder_utility
        reward = -((p_f + f_p) + 1 / (10*system_utility + 1e-6))
        if reward < -5:
          reward = -5
      info = {"agent_id": agent_id, "winner": False, "bidder_utility": 0, "system_utility": 0}
        
    return self.get_obs(agent_id), reward, False, info



  def _bidder_utility(self, agent_id, payment, fair_payment):
    global BIDDER_UTILITYY, METRICS_LOCK
    profit_made    = payment / (fair_payment+1e-8)
    with METRICS_LOCK:
      BIDDER_UTILITY[CURRENT_ROUND-1].append(profit_made)
    bidder_utility = profit_made
    print(f'  Client {agent_id:>{3}}: Bidder utility is {bidder_utility} (payment: {payment} -- fair payment: {fair_payment})...')
    return bidder_utility


  def _system_utility(self, agent_id, bidder_reputation, auctioneer_reputation):
    composed_score, _ = get_reputation_score(auctioneer_reputation[0], bidder_reputation[1], auctioneer_reputation[2], bidder_reputation[3])
    bidder_score, _ = get_reputation_score(bidder_reputation[0], bidder_reputation[1], bidder_reputation[2], bidder_reputation[3])
    auctioneer_score, _ = get_reputation_score(auctioneer_reputation[0], auctioneer_reputation[1], auctioneer_reputation[2], auctioneer_reputation[3])
    # system_utility = 2 * composed_score / (bidder_score + auctioneer_score)
    system_utility = composed_score
    # print(f'  Client {agent_id:>{3}}: System utility is {system_utility}...')
    return system_utility


  def render(self):
    pass


In [None]:
# Register the custom environment
from gym.envs.registration import register

register(
    id='BiddingEnv-v0',
    entry_point='__main__:BiddingEnv',
    max_episode_steps=300,
)

#### DRL Agnet Elements

In [None]:
# Taken from : https://github.com/schneimo/ddpg-pytorch/tree/master 
# and modified to suit our application..


## Experience Replay Buffer Class
class ReplayMemory(object):

    def __init__(self, capacity=1e3):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = int((self.position + 1) % self.capacity)

    def get_last_episode(self, episode_len):
        """Get the last episode_len elements."""
        if len(self.memory) < episode_len:
          return self.memory[:self.position]
        else:
          if self.position >= episode_len:
            start_index = random.randint(0, self.position - episode_len)
            return self.memory[start_index:start_index + episode_len]


    def sample(self, batch_size):

      # To avoid overfitting on 0-reward experiences, we rebalance
      # nonzero_reward_transitions = [t for t in self.memory if t.reward != 0]
      nonzero_reward_transitions = [t for t in self.memory if t.reward > 0]
      num_nonzero_reward_transitions = len(nonzero_reward_transitions)
      zero_reward_transitions = [t for t in self.memory if t.reward <= 0]
      num_zero_reward_transitions = len(zero_reward_transitions)
      if num_zero_reward_transitions < batch_size // 2:
        num_nonzero_reward_transitions = batch_size - num_zero_reward_transitions
      elif num_nonzero_reward_transitions < batch_size // 2:
        num_zero_reward_transitions = batch_size - num_nonzero_reward_transitions
      else:
        num_nonzero_reward_transitions = num_zero_reward_transitions = batch_size//2
      # Determine the number of transitions with non-zero rewards to include in the batch
      selected_nonzero_reward_transitions = random.sample(nonzero_reward_transitions, num_nonzero_reward_transitions)

      selected_zero_reward_transitions = random.sample(zero_reward_transitions, num_zero_reward_transitions)
      # Combine the selected transitions
      selected_transitions = selected_nonzero_reward_transitions + selected_zero_reward_transitions

      # Shuffle the selected transitions
      random.shuffle(selected_transitions)
      # return random.sample(self.memory, batch_size)
      return selected_transitions


    def __len__(self):
        return len(self.memory)



# WEIGHTS_FINAL_INIT = -40e-1
# BIAS_FINAL_INIT = -40e-2
WEIGHTS_FINAL_INIT = -5e-1
BIAS_FINAL_INIT = -5e-2


def fan_in_uniform_init(tensor, fan_in=None):
    """Utility function for initializing actor and critic"""
    if fan_in is None:
        fan_in = tensor.size(-1)

    w = 1. / np.sqrt(fan_in)
    nn.init.uniform_(tensor, -w, w)

## Actor class
class Actor(nn.Module):
  def __init__(self, hidden_size, num_inputs, action_space):
    super(Actor, self).__init__()
    self.action_space = action_space
    num_outputs = action_space.shape[0]
    # Layer 1
    self.linear1 = nn.Linear(num_inputs, hidden_size[0])
    self.ln1 = nn.LayerNorm(hidden_size[0])
    # Layer 2
    self.linear2 = nn.Linear(hidden_size[0], hidden_size[1])
    self.ln2 = nn.LayerNorm(hidden_size[1])
    # Output Layer
    self.mu = nn.Linear(hidden_size[1], num_outputs)
    # Weight Init
    fan_in_uniform_init(self.linear1.weight)
    fan_in_uniform_init(self.linear1.bias)
    fan_in_uniform_init(self.linear2.weight)
    fan_in_uniform_init(self.linear2.bias)
    nn.init.uniform_(self.mu.weight, WEIGHTS_FINAL_INIT, 0)
    nn.init.uniform_(self.mu.bias, BIAS_FINAL_INIT, 0)

  def forward(self, inputs):
    x = inputs
    # Layer 1
    x = self.linear1(x)
    x = self.ln1(x)
    x = F.relu(x)
    # Layer 2
    x = self.linear2(x)
    x = self.ln2(x)
    x = F.relu(x)
    # Output
    # mu = torch.tanh(self.mu(x))
    mu = torch.sigmoid(self.mu(x))
    return mu


## Critic Class
class Critic(nn.Module):
  def __init__(self, hidden_size, num_inputs, action_space):
    super(Critic, self).__init__()
    self.action_space = action_space
    num_outputs = action_space.shape[0]
    # Layer 1
    self.linear1 = nn.Linear(num_inputs, hidden_size[0])
    self.ln1 = nn.LayerNorm(hidden_size[0])
    # Layer 2
    # In the second layer the actions will be inserted also
    self.linear2 = nn.Linear(hidden_size[0] + num_outputs, hidden_size[1])
    self.ln2 = nn.LayerNorm(hidden_size[1])
    # Output layer (single value)
    self.V = nn.Linear(hidden_size[1], 1)
    # Weight Init
    fan_in_uniform_init(self.linear1.weight)
    fan_in_uniform_init(self.linear1.bias)
    fan_in_uniform_init(self.linear2.weight)
    fan_in_uniform_init(self.linear2.bias)


    nn.init.uniform_(self.V.weight, WEIGHTS_FINAL_INIT, 0)
    nn.init.uniform_(self.V.bias, BIAS_FINAL_INIT, 0)

  def forward(self, inputs, actions):
    x = inputs
    # Layer 1
    x = self.linear1(x)
    x = self.ln1(x)
    x = F.relu(x)
    # Layer 2
    x = torch.cat((x, actions), 1)  # Insert the actions
    x = self.linear2(x)
    x = self.ln2(x)
    x = F.relu(x)
    # Output
    V = self.V(x)


    return V


## Target Networks Update Functions
def soft_update(target, source, tau):
  for target_param, param in zip(target.parameters(), source.parameters()):
    target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)

def hard_update(target, source):
  for target_param, param in zip(target.parameters(), source.parameters()):
    target_param.data.copy_(param.data)



class PolicyNetwork(nn.Module):
    def __init__(self, num_inputs, hidden_size, action_space):
        super(PolicyNetwork, self).__init__()
        num_outputs = action_space.shape[0]
        self.linear1 = nn.Linear(num_inputs, hidden_size[0])
        self.ln1 = nn.LayerNorm(hidden_size[0])
        self.linear2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.ln2 = nn.LayerNorm(hidden_size[1])
        self.mean_linear = nn.Linear(hidden_size[1], num_outputs)
        self.std_linear = nn.Linear(hidden_size[1], num_outputs)

        fan_in_uniform_init(self.linear1.weight)
        fan_in_uniform_init(self.linear1.bias)
        fan_in_uniform_init(self.linear2.weight)
        fan_in_uniform_init(self.linear2.bias)
        nn.init.uniform_(self.mean_linear.weight, WEIGHTS_FINAL_INIT, 0)
        nn.init.uniform_(self.mean_linear.bias, BIAS_FINAL_INIT, 0)

        nn.init.uniform_(self.std_linear.weight, WEIGHTS_FINAL_INIT, 0)
        nn.init.uniform_(self.std_linear.bias, BIAS_FINAL_INIT, 0)


    def forward(self, state):
        x = self.linear1(state)
        x = self.ln1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = self.ln2(x)
        x = F.relu(x)
        mean = torch.sigmoid(self.mean_linear(x))  # Applying sigmoid activation
        std = F.softplus(self.std_linear(x))
        return mean, std



## DDPG Agent Class
class DDPG(object):
  def __init__(self, gamma, tau, hidden_size, num_inputs, action_space, exploration_prob=0.9, decay_rate=0.99, checkpoint_dir=None):

    self.gamma = gamma
    self.action_space = action_space
    self.policy_network = PolicyNetwork(num_inputs, hidden_size, action_space).to(device)
    self.optimizer = Adam(self.policy_network.parameters(), lr=1e-3)
    self.exploration_prob = exploration_prob
    self.decay_rate = decay_rate

  def calc_action(self, state, action_noise=None):

    self.policy_network.eval()
    x = state.to(device)
    with torch.no_grad():
      mean, std = self.policy_network(x)
      # Sample from Gaussian Distribution
      normal = torch.distributions.Normal(mean, std)
      action = normal.sample()
      print(f'  Calculating bid... action(bid) wt noise: {action.item()}')
      if np.random.rand() < self.exploration_prob:
        if action_noise is not None:
          noise = torch.Tensor(action_noise.noise()).to(device)
          action += noise
      action = torch.abs(action)
      action = torch.clamp(action, 0, 1)
    return action


  def update_params(self, batch, epochs=1, agent_id=0):
    states  = torch.stack(batch.state).to(device)
    actions = torch.stack(batch.action).to(device)
    rewards = torch.stack(batch.reward).to(device)

    self.optimizer.zero_grad()
    mean, std = self.policy_network(states)
    normal = torch.distributions.Normal(mean, std)
    log_probs = normal.log_prob(actions)
    returns = self._calculate_returns(rewards)
    loss = -(log_probs * returns).mean()
    loss.backward()
    self.optimizer.step()

    return loss.item()


  def _calculate_returns(self, rewards):
    G = torch.zeros_like(rewards)
    running_sum = 0
    for t in reversed(range(len(rewards))):
      running_sum = rewards[t] + self.gamma * running_sum
      G[t] = running_sum
    return G

  def decay_exploration(self):
    self.exploration_prob *= self.decay_rate

  def save_checkpoint(self, last_timestep, replay_buffer):
    """
    Saving the networks and all parameters to a file in 'checkpoint_dir'

    Parameters
    ----------
    last_timestep:
        Last timestep in training before saving
    replay_buffer:  ReplayMemory
        Current replay buffer
    """
    checkpoint_name = self.checkpoint_dir + '/ep_{}.pth.tar'.format(last_timestep)
    logger.info('Saving checkpoint...')
    checkpoint = {
        'last_timestep': last_timestep,
        'actor': self.actor.state_dict(),
        'critic': self.critic.state_dict(),
        'actor_target': self.actor_target.state_dict(),
        'critic_target': self.critic_target.state_dict(),
        'actor_optimizer': self.actor_optimizer.state_dict(),
        'critic_optimizer': self.critic_optimizer.state_dict(),
        'replay_buffer': replay_buffer,
    }
    logger.info('Saving model at timestep {}...'.format(last_timestep))
    torch.save(checkpoint, checkpoint_name)
    gc.collect()
    logger.info('Saved model at timestep {} to {}'.format(last_timestep, self.checkpoint_dir))

  def get_path_of_latest_file(self):
      """
      Returns the latest created file in 'checkpoint_dir'
      """
      files = [file for file in os.listdir(self.checkpoint_dir) if (file.endswith(".pt") or file.endswith(".tar"))]
      filepaths = [os.path.join(self.checkpoint_dir, file) for file in files]
      last_file = max(filepaths, key=os.path.getctime)
      return os.path.abspath(last_file)

  def load_checkpoint(self, checkpoint_path=None):
    """
    Saving the networks and all parameters from a given path. If the given path is None
    then the latest saved file in 'checkpoint_dir' will be used.

    Arguments:
        checkpoint_path:    File to load the model from

    """

    if checkpoint_path is None:
      checkpoint_path = self.get_path_of_latest_file()

    if os.path.isfile(checkpoint_path):
      logger.info("Loading checkpoint...({})".format(checkpoint_path))
      key = 'cuda' if torch.cuda.is_available() else 'cpu'
      checkpoint = torch.load(checkpoint_path, map_location=key)
      # start_timestep = checkpoint['last_timestep'] + 1
      start_timestep = checkpoint['last_timestep']
      self.actor.load_state_dict(checkpoint['actor'])
      self.critic.load_state_dict(checkpoint['critic'])
      self.actor_target.load_state_dict(checkpoint['actor_target'])
      self.critic_target.load_state_dict(checkpoint['critic_target'])
      self.actor_optimizer.load_state_dict(checkpoint['actor_optimizer'])
      self.critic_optimizer.load_state_dict(checkpoint['critic_optimizer'])
      replay_buffer = checkpoint['replay_buffer']
      gc.collect()
      logger.info('Loaded model at timestep {} from {}'.format(start_timestep, checkpoint_path))
      return start_timestep, replay_buffer
    else:
      raise OSError('Checkpoint not found')


  def set_eval(self):
    """
    Sets the model in evaluation mode
    """
    self.actor.eval()
    self.critic.eval()
    self.actor_target.eval()
    self.critic_target.eval()

  def set_train(self):
    """
    Sets the model in training mode
    """
    self.actor.train()
    self.critic.train()
    self.actor_target.train()
    self.critic_target.train()

  def get_network(self, name):
    if name == 'Actor':
      return self.actor
    elif name == 'Critic':
      return self.critic
    else:
      raise NameError('name \'{}\' is not defined as a network'.format(name))



class NormalizedActions(gym.ActionWrapper):
  def action(self, action):
    """
    Normalizes the actions to be in between action_space.high and action_space.low.
    If action_space.low == -action_space.high, this is equals to action_space.high*action.

    Parameters
    ----------
    action

    Return
    ------
    Normalized action
    """

    action = (action + 1) / 2  # [-1, 1] => [0, 1]
    action *= (self.action_space.high - self.action_space.low)
    action += self.action_space.low
    return action

  def reverse_action(self, action):
    """
    Reverts the normalization

    Parameters
    ----------
    action

    Return
    ------
    Reverted action
    """
    action -= self.action_space.low
    action /= (self.action_space.high - self.action_space.low)
    action = action * 2 - 1
    return action



# From OpenAI Baselines:
# https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py
class OrnsteinUhlenbeckActionNoise:
  def __init__(self, mu, sigma, theta=.15, dt=1e-2, x0=None):
    self.theta = theta
    self.mu = mu
    self.sigma = sigma
    self.dt = dt
    self.x0 = x0
    self.reset()

  def noise(self):
    x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt \
        + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
    self.x_prev = x
    return x

  def reset(self):
    self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)

  def __repr__(self):
    return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self.mu, self.sigma)


### Main Program

In [None]:
### MAIN PROGRAM

## Dataframes Args
BC_LOCK = threading.Lock()
columns = ['round', 'owner', 'executer', 'task_type', 'processing_time', 'communication_time', 'processing_energy', 'communication_energy', 'data_quality', 'resource_contribution', 'offloading_history', 'validation_score', 'data_quality_norm', 'resource_contribution_norm', 'validation_score_norm', 'performance_index', 'reputation_score', 'executer_score', 'executer_per']
BC = pd.DataFrame(columns = columns)

reputation_cols = ['round', 'client_id', 'data_quality', 'resource_contribution', 'offloading_history', 'validation_score', 'performance_index', 'profit', 'data_quality_norm', 'resource_contribution_norm', 'validation_score_norm', 'reputation_score']
REPUTATION_DATA = pd.DataFrame(columns = reputation_cols)

# bid_cols = ['round', 'agent_id', 'auctioneer_id', 'bid', 'winner', 'bidder_reputation', 'auc_reputation', 'c_idx', 'payment', 'fair_payment']
BID_POOL_LOCK = threading.Lock()
bid_cols = ['round', 'agent_id', 'auctioneer_id', 'bid', 'winner', 'composed_reputation', 'c_idx', 'payment', 'fair_payment']
BID_POOL = pd.DataFrame(columns = bid_cols)

MODEL_POOL_LOCK = threading.Lock()
model_pool_cols = ['round', 'level', 'owner', 'trainer', 'submodels_id', 'aggregators', 'aggregated', 'validators', 'validation_score', 'lock']

MODEL_POOL = pd.DataFrame(columns = model_pool_cols, dtype='float32')

## Total payment
RHO_MIN = 0
RHO_MAX = 10000

## Shared variables
NUM_CLIENTS = 50
MODEL_WEIGHTS_SIZE = 0                          # The size of model weights size in bytes
GLOBAL_MODEL_HOLDER = None
DATA_UNIT_SIZE = 78400                          # The size of a single data unit (for example one image) in (Bytes)
B_w = 15e3                                      # Bandwidth in (Hz)
CAPACITANCE = 1e-28
N_0 = -174                                      # Spectral density noise (dBm/Hz)
TRANSMIT_POWER = 400                            # Transmission power in (mWatt)


# State flags
END_ROUND_LOCK = threading.Lock()
END_ROUND = False
END_TASK = False

# Offloading variables
OFFLOADING_TOKEN = threading.Lock()
OFFLOADING_PROB = 0.1
TOTAL_OFFLOADING = 0
CURRENT_ROUND = 0

AUCTION_OFFER_KEYS = ['timestamp', 'auction_ID', 'round', 'auctioneer_ID', 'offer']
AUCTION_BID_KEYS = ['timestamp', 'auction_ID', 'bidder_ID', 'bid', 'true_action']
AUCTION_REP_KEYS = ['timestamp', 'auction_ID', 'round', 'auctioneer_id', 'composed_reputation', 'bid', 'true_action', 'payment', 'task_args']

## Federated Learning Parameters
BATCH_SIZE = 32
NUM_EPOCHS = 3
LEARNING_RATE = 0.01
NUM_ROUNDS = 50
selected_clients = []

## Reinfocement Learning Parameters
DRL_BATCH_SIZE = 8              # Number of transitions sampled from replay memory
DRL_DISCOUNT_FACTOR = 0.7       # Discount factor of Bellman equation
DRL_EPS_START = 0.9             # Exploitation-exploration tradeoff factor
DRL_EPS_END = 0.05
DRL_EPS_DECAY = 1000            # Decay to control the decay of epsilon
DRL_TAU = 0.005                 # Update rate of target network
DRL_LEARNING_RATE = 1e-3        # Learning rate of DRL network
BIDDER_UTILITY_THRESHOLD = 0.15
STEPS_PER_EP = 8

# Metrics
METRICS_LOCK = threading.Lock()
MEAN_REWARD = []
BIDDER_UTILITY = []
TOTAL_REPUTATION = []
GLOBAL_ACCURACY = []
ACTOR_LOSS = []
CRITIC_LOSS = []
TASK_DROP = []
TASK_DROP_RATE = []


# Linear regression model for future profit prediction
LR_MODEL = LinearRegression()

# Reinforcement Learning Environment
ENV_LOCK = threading.Lock()
env = gym.make("BiddingEnv-v0", num_agents = NUM_CLIENTS)
# env = NormalizedActions(env)

# Set seed for random generators
random.seed(42)
# env.seed(42)
torch.manual_seed(42)
np.random.seed(42)

# n_observations = env.get_n_observations()
# action_range = env.get_action_range()
n_observations = env.get_n_observations()
n_actions = 1


# hidden_size = (400, 300)
hidden_size = (128, 64)

# Create Replay Memory
REPLAY_MEMORY = ReplayMemory(int(1e5))

# Initialize OU-Noise
nb_actions = env.action_space.shape[-1]
ou_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=0.005 * np.ones(nb_actions))


# Create DDPG Agent
AGENT_LOCK = threading.Lock()
DDPG_AGGREGATED = False
DDPG_CHECK = False

DDPG_AGENT = DDPG(
              DRL_DISCOUNT_FACTOR,
              DRL_TAU,
              hidden_size,
              n_observations,
              env.action_space,
              checkpoint_dir='./saved_models/BiddingEnv-v0'
              )



# Prepare and split dataset
trainloaders, valloaders, testloader=load_datasets(num_clients=NUM_CLIENTS*10, inter_iid=False, intra_iid=False)  #*100)
network = Network(num_clients=NUM_CLIENTS, batch_size=BATCH_SIZE, testloader=testloader)

# Read clients Details
generate_clients(NUM_CLIENTS)
client_df = pd.read_csv('client_data.csv')

clients = [Client(
    id=int(row['id']),
    coordinates = (row['x_coordinate'], row['y_coordinate'], row['z_coordinate']),
    trainloader=trainloaders[i],
    valloader=valloaders[i],
    epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    capacitance=CAPACITANCE,
    cpu_cycles=row['cpu_cycles'],
    power=TRANSMIT_POWER,
    frequency=row['frequency']) for i, row in client_df.iterrows()]

with ThreadPoolExecutor(max_workers=50) as executor:
  executor.submit(network.launch_task, NUM_ROUNDS, LEARNING_RATE, NUM_EPOCHS)


print(f'Total offloading events : {TOTAL_OFFLOADING}')


References: \\
transmission rate formula and simulation values:
- https://ieeexplore.ieee.org/document/9521696/
- https://ieeexplore.ieee.org/document/9372882
- https://www.sciencedirect.com/science/article/pii/S0167739X23001383

