<a href="https://colab.research.google.com/github/marioschuele/masterarbeit/blob/main/Federated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install tensorflow-federated==0.39

In [None]:
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
import keras
import pandas as pd
import matplotlib.pyplot as plt
import os
from tensorflow.keras.callbacks import TensorBoard

%load_ext tensorboard

np.random.seed(0)

In [None]:
! pip install kaggle -q

#Download and unzip Kaggle dataset

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download yuweisunut/sidd-segmented-intrusion-detection-dataset
!unzip -q sidd-segmented-intrusion-detection-dataset

In [None]:
NUM_CLIENTS = 15
NUM_EPOCHS = 1
BATCH_SIZE = 32
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = tf.data.AUTOTUNE

In [None]:
directory = 'SIDD'
client_id = 0
uid = 0
imgs = {}

for client in os.listdir(directory):
  curr_path = f'{directory}/{client}/pcap'

  for subdir in os.listdir(curr_path):
    curr_path = f'{directory}/{client}/pcap/{subdir}/dataset'
    curr_type = subdir[-1:]
    if curr_type == str(1):
   
        for dayscen in os.listdir(curr_path):
          curr_path = f'{directory}/{client}/pcap/{subdir}/dataset/{dayscen}'

          for img in os.listdir(curr_path):
            if dayscen == 'benign':
                imgs[uid] = {'id': uid, 'client_id': client_id, 'label': str(0), 'fn': img, 'path': curr_path + '/' + img}
            elif dayscen == 'malicious':
                imgs[uid] = {'id': uid, 'client_id': client_id, 'label': str(curr_type), 'fn': img, 'path': curr_path + '/' + img}
            uid +=1
  client_id += 1

In [None]:
img_df = pd.DataFrame.from_dict(imgs,orient='index')
img_df['label'] = img_df['label'].astype(int)
#img_df.loc[img_df.index[(img_df['label']==3)],'label'] = 2

In [None]:
def _parse_function(filename, y):
    image_string = tf.io.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    X = tf.cast(image_decoded, tf.float32)
    return X, y

In [None]:
file_paths = img_df.path
file_labels = img_df["label"]
client_ids = img_df["client_id"]  
    
X = file_paths
y = file_labels
client_ds = tf.data.Dataset.from_tensor_slices((X, y, client_ids))
test_size = 0.2
train_size = 1 - test_size
ds_length = tf.data.experimental.cardinality(client_ds).numpy()
num_test_samples = int(test_size * ds_length)

train_ds = client_ds.take(train_size * ds_length)
test_ds = client_ds.skip(train_size * ds_length)

In [None]:
def preprocess(dataset):

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).prefetch(PREFETCH_BUFFER)

def make_federated_data(client_data, client_ids):
    return [
      preprocess(create_tf_dataset_for_client(client_data, x))
      for x in client_ids
    ]

In [None]:
def create_tf_dataset_for_client(dataset, id_value):
    # Filter elements with the specified id_value
    
    filtered_dataset = dataset.filter(lambda x, y, client_id: tf.equal(client_id, id_value))

    # Remove the id from the filtered dataset
    filtered_dataset = filtered_dataset.map(lambda x, y, client_id: (x, y))
    
    #Load image from path and convert to float32
    def load_image(filename, y):
        image_string = tf.io.read_file(filename)
        image_decoded = tf.image.decode_jpeg(image_string, channels=3)
        X = tf.cast(image_decoded, tf.float32)
        return X, y

    filtered_dataset = filtered_dataset.map(load_image)
    
    print(type(filtered_dataset))


    return filtered_dataset


In [None]:
clients = img_df["client_id"].unique()
#print(type(client_ds))
federated_train_data = make_federated_data(train_ds, clients)
federated_test_data = make_federated_data(test_ds, clients)

print(f'Number of client datasets: {len(federated_train_data)}')
print(f'First dataset: {federated_train_data[0]}')

In [None]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = tf.keras.applications.EfficientNetB3(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=2,
    classifier_activation="sigmoid"
    )

  return tff.learning.from_keras_model(
      keras_model,
      input_spec=federated_train_data[0].element_spec,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
training_process = tff.learning.algorithms.build_unweighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

eval_process = tff.learning.build_federated_evaluation(model_fn)

In [None]:
train_state = training_process.initialize()
eval_results_df = pd.DataFrame()
train_results_df = pd.DataFrame()

In [None]:
log_dir = 'logs/fit_federated/'
summary_writer = tf.summary.create_file_writer(log_dir)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

NUM_ROUNDS = 30
with summary_writer.as_default():

    for round_num in range(NUM_ROUNDS):
        
      #Training of actual model  
      result = training_process.next(train_state, federated_train_data)
      train_state = result.state
      train_metrics = result.metrics
        
      accuracy = result.metrics['client_work']['train']['sparse_categorical_accuracy']
      loss = result.metrics['client_work']['train']['loss']  
        
      train_results_df = train_results_df.append({'Round': round_num+1, 'Accuracy': accuracy, 'Loss': loss}, ignore_index=True)  
      print('round {:2d}, metrics={}'.format(round_num, train_metrics))
      
      #Evaluate model
      model_weights = training_process.get_model_weights(train_state)
      test_metrics = eval_process(model_weights, federated_test_data)['eval']
      print(test_metrics)  
      eval_results_df = eval_results_df.append({'Round': round_num+1,
                                      **test_metrics}, ignore_index=True)  


callbacks = [TensorBoard(log_dir=log_dir)]

In [None]:
%tensorboard --logdir logs/fit/

In [None]:
print(eval_results_df.head(5))
print(train_results_df.head(5))

In [None]:
eval_results_df = eval_results_df.rename(columns={'sparse_categorical_accuracy': 'Accuracy', 'loss': 'Loss'})

#Plot accuracy of model
plt.plot(train_results_df['Accuracy'])
plt.plot(eval_results_df['Accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Round')
plt.legend(['train', 'test'], loc='upper left')
plt.xticks(range(len(train_results_df['Accuracy'])))
plt.show()

#Plot loss of model
plt.plot(train_results_df['Loss'])
plt.plot(eval_results_df['Loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Round')
plt.legend(['train', 'test'], loc='upper left')
plt.xticks(range(len(train_results_df['Loss'])))
plt.show()

In [None]:
%tensorboard --logdir {logdir}