<a href="https://colab.research.google.com/github/rupeshgyawali/federated-covid-xray-detection/blob/main/federated/sequential_simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Download and unzip dataset

In [None]:
!cp /content/drive/MyDrive/MajorProject/Dataset_NIID.zip /content/

In [None]:
!unzip -q Dataset_NIID.zip -d Dataset_NIID

### Install necessary libraries

In [None]:
!pip install flwr

Collecting flwr
  Downloading flwr-0.19.0-py3-none-any.whl (106 kB)
[?25l[K     |███                             | 10 kB 25.8 MB/s eta 0:00:01[K     |██████▏                         | 20 kB 32.1 MB/s eta 0:00:01[K     |█████████▎                      | 30 kB 20.8 MB/s eta 0:00:01[K     |████████████▎                   | 40 kB 13.3 MB/s eta 0:00:01[K     |███████████████▍                | 51 kB 10.7 MB/s eta 0:00:01[K     |██████████████████▌             | 61 kB 12.4 MB/s eta 0:00:01[K     |█████████████████████▌          | 71 kB 13.4 MB/s eta 0:00:01[K     |████████████████████████▋       | 81 kB 13.0 MB/s eta 0:00:01[K     |███████████████████████████▊    | 92 kB 14.2 MB/s eta 0:00:01[K     |██████████████████████████████▊ | 102 kB 15.0 MB/s eta 0:00:01[K     |████████████████████████████████| 106 kB 15.0 MB/s 
Collecting protobuf<4.0.0,>=3.19.0
  Downloading protobuf-3.20.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
[K     |████████████████

### Necessary imports


In [None]:
import numpy as np
import flwr as fl
import tensorflow as tf

In [None]:
batch_size = 32
img_height = 224
img_width = 224

### Dataset preparation

In [None]:
def get_dataset(dataset_path):
    train_ds = tf.keras.utils.image_dataset_from_directory(
        dataset_path, seed=123, validation_split=0.2, subset="training", batch_size=batch_size, image_size=(img_height, img_width))
    test_ds = tf.keras.utils.image_dataset_from_directory(
        dataset_path, seed=123, validation_split=0.2, subset="validation", batch_size=batch_size, image_size=(img_height, img_width))

    # Configure dataset for performance
    # AUTOTUNE = tf.data.AUTOTUNE
    # train_ds = train_ds.cache().shuffle(4500).prefetch(buffer_size=AUTOTUNE)
    # test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

    return train_ds, test_ds

### Model Preparation

In [None]:
def get_compiled_model():
    num_classes = 4

    vgg = tf.keras.applications.vgg16.VGG16(input_shape=[img_height,img_width]  + [3], weights='imagenet', include_top=False)#, input_tensor=data_augmentation)

    for layer in vgg.layers:
        layer.trainable = False
    
    x=tf.keras.layers.Flatten()(vgg.output)
    x=tf.keras.layers.Dense(64, activation='relu')(x)
    x=tf.keras.layers.Dropout(0.2)(x)
    x=tf.keras.layers.Dense(128, activation='relu')(x)
    prediction = tf.keras.layers.Dense(4,activation='softmax')(x)

    model = tf.keras.models.Model(inputs=vgg.input, outputs=prediction)

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    return model

In [None]:
model = get_compiled_model()

### Federated learning


In [None]:
def evaluate_metrics_aggregation_fn(eval_metrics):
    # Weigh accuracy of each client by number of examples used
    accuracies = [metrics * num_examples for num_examples, metrics in eval_metrics]
    examples = [num_examples for num_examples, _ in eval_metrics]

    # Aggregate and print custom metric
    accuracy_aggregated = sum(accuracies) / sum(examples)
    return {'accuracy': accuracy_aggregated}

In [None]:
NUM_OF_CLIENTS = 3
NUM_OF_ROUNDS = 50
LOCAL_EPOCHS = 3
CLIENT_BASE_LOG_DIR = '/content/drive/MyDrive/MajorProject/logs/experiment3/federated/client'
SERVER_BASE_LOG_DIR = '/content/drive/MyDrive/MajorProject/logs/experiment3/federated/'

In [None]:
Datasets = []
for i in range(1, NUM_OF_CLIENTS+1):
  train, test = get_dataset('/content/Dataset_NIID/client' + str(i))
  Datasets.append((train, test)) 

Found 1982 files belonging to 4 classes.
Using 1586 files for training.
Found 1982 files belonging to 4 classes.
Using 396 files for validation.
Found 1716 files belonging to 4 classes.
Using 1373 files for training.
Found 1716 files belonging to 4 classes.
Using 343 files for validation.
Found 1847 files belonging to 4 classes.
Using 1478 files for training.
Found 1847 files belonging to 4 classes.
Using 369 files for validation.


In [None]:
aggregated_weights = model.get_weights()

In [None]:
for round in range(1, NUM_OF_ROUNDS+1):
  print(f'Round {round}')
  # Fit round
  fit_results = []
  for client_id in range(1, NUM_OF_CLIENTS+1):
    log_dir = CLIENT_BASE_LOG_DIR + str(client_id)
    train_ds, test_ds = Datasets[client_id-1]
    model.set_weights(aggregated_weights)
    csv_logger = tf.keras.callbacks.CSVLogger(
            log_dir + 'training.log', 
            append=True)
    print(f'Client {client_id}: Fit round')
    model.fit(train_ds, validation_data=test_ds, epochs=LOCAL_EPOCHS, callbacks=[csv_logger])
    fit_results.append((model.get_weights(), len(train_ds)))
  
  # Federated Aggregation
  aggregated_weights = fl.server.strategy.aggregate.aggregate(fit_results)
  print('Weights aggreated')
  if round % 5 == 0:
    print(f"Saving round {round} aggregated_weights...")
    np.savez(SERVER_BASE_LOG_DIR + f"round-{round}-weights.npz", *aggregated_weights)

  # Evaluation round
  evaluation_results = []
  model.set_weights(aggregated_weights)
  for client_id in range(1, NUM_OF_CLIENTS+1):
    log_dir = CLIENT_BASE_LOG_DIR + str(client_id)
    _, test_ds = Datasets[client_id-1]
    print(f'Client {client_id}: Evaluation round')
    loss, acc = model.evaluate(test_ds)
    # Log client's local evaluation loss and accuracy
    with open(log_dir + 'evaluation.log', 'a') as f:
      f.write(f'{round},{loss},{acc}\n')
    evaluation_results.append((len(test_ds), loss, acc))

  # Evaluation Aggregation
  loss_aggregated = fl.server.strategy.aggregate.weighted_loss_avg([(n_examples, l) for n_examples, l, _ in evaluation_results])
  metrics_aggregated = evaluate_metrics_aggregation_fn([(n_examples, a) for n_examples, _, a in evaluation_results])
  # Log evaluation loss and accuracy
  with open(SERVER_BASE_LOG_DIR + 'server_evaluation.log', 'a') as f:
    f.write(f'{round},{loss_aggregated},{metrics_aggregated["accuracy"]}\n')
  print('Evaluation Aggregated')


Round 1
Client 1: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 2: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 3: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Weights aggreated
Client 1: Evaluation round
Client 2: Evaluation round
Client 3: Evaluation round
Evaluation Aggregated
Round 2
Client 1: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 2: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 3: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Weights aggreated
Client 1: Evaluation round
Client 2: Evaluation round
Client 3: Evaluation round
Evaluation Aggregated
Round 3
Client 1: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 2: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 3: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Weights aggreated
Client 1: Evaluation round
Client 2: Evaluation round
Client 3: Evaluation round
Evaluation Aggregated
Round 4
Client 1: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 2: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Client 3: Fit round
Epoch 1/3
Epoch 2/3
Epoch 3/3
Weigh