# Federated Learning Experiments

## Imports

In [None]:
# std
from pathlib import Path
import datetime
import pickle
import math
import os


# libs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import keras

#local
from data import *
from model import build_gru_model
from config import ExpConfig

SAVE_FIGURES = False

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=2048)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

## Retrive Trained Models and Histories

In [None]:
def path_to_row(path):
    file_parts = path.parts[-1].split("_")
    index = file_parts[:-5]
    index += [file_parts[-1].split(".")[0]]
    file_type = file_parts[-1].split(".")[1]
    n_clients = file_parts[-4]
    n_rounds = file_parts[-2]
    return (tuple(index), file_type, path, n_clients, n_rounds)

whole = [path_to_row(path) for path in ExpConfig.SAVE_DIR.iterdir() if not path.is_dir() and path.parts[-1].startswith("fl")]
multiindex = sorted(set([_[0] for _ in whole]), key=lambda a: a[1])

models_df = pd.DataFrame(index=pd.MultiIndex.from_tuples(multiindex), columns=["weights", "history", "n_clients", "n_rounds"])
for row in whole:
    models_df.loc[row[0], row[1]] = row[2]
    models_df.loc[row[0], "n_clients"] = row[3]
    models_df.loc[row[0], "n_rounds"] = row[4]

models_df.sort_index(inplace=True)
models_df

## Data Visualization

### Initial Data Format

In [None]:
for col_name in df_train.columns:
    fig = plt.figure(figsize=(21,4))
    plt.plot(data_train_df[col_name][:200], 'k:', label='real - %s' % col_name)
    plt.plot(df_train[col_name][:200], label='cleaned - %s' % col_name)
    plt.plot(df_train_v[col_name][:200], label='cleaned(vertical) - %s' % col_name)
    plt.legend()
    plt.show()

### Data Partitions

#### Non-IID Partitions

In [None]:
def plot_hist_partition(prefix, orig, cols, c1, c2):
    
    n_of_prop = orig.shape[2]

    fig, axs = plt.subplots(n_of_prop, 2, figsize=(18,4*n_of_prop))
    fig.suptitle(prefix)

    for i in range(n_of_prop):
        axs[i,0].plot(orig[:,:,i][:,0], label='origin', color=c1)
        axs[i,0].set_title(f"{prefix} - {cols[i-1]}")
    
        sns.histplot(orig[:,:,i][:,0], kde=True, ax=axs[i,1], color=c2)
        axs[i,1].set_title(f"{prefix} - {cols[i-1]}")
        
    plt.tight_layout()
    plt.show()

noniid_partitions = split_art_data_noniid(3, train_data=network_data_train)

for idx, partition in enumerate(noniid_partitions):
    plot_hist_partition(f'train - partition {idx}', partition[0], list(df_train.columns), 'C3', 'C0')

#### IID Partitions

In [None]:
iid_partitions = split_art_data_iid(3, train_data=network_data_train)

for idx, partition in enumerate(iid_partitions):
    plot_hist_partition(f'train - partition {idx}', partition[0], list(df_train.columns), 'C3', 'C0')

#### Vertical Partitions

In [None]:
def plot_vertical_partitions(partitions, columns):
    n_of_prop = 2
    for i, (feature_partition, name) in enumerate(zip(partitions, columns)):
        partition = feature_partition[1][:200]
        fig, axs = plt.subplots(1, 2, figsize=(18,2*n_of_prop))
        fig.suptitle(name)
        
        axs[0].plot(partition, label='origin', color='C3')
        axs[0].set_title(f"Feature Visualisation - {name}")
        
        sns.histplot(partition, kde=True, ax=axs[1], color='C0')
        axs[1].set_title(f"Feature Distribution - {name}")



vertical_partitions = network_feature_data_train
plot_vertical_partitions(vertical_partitions, data_train_df.columns)    

## Regular Trainings and Inferences
 - Showcase of regular training on each data partition type
 - visualize training
 - add results to the metrics table

In [None]:
cfg_num_epochs = 10
cfg_batch_size = 1

In [None]:
from mods_utils import mape, smape, r2, rmse, cosine

def compute_metrics(y_true, y_pred):
    result = {}

    if len(y_true) > 1 and len(y_true) == len(y_pred):

        if isinstance(y_true, (pd.DataFrame, pd.Series)):
            y_true = y_true.values
            if len(y_true.shape) == 1:
                y_true= y_true.reshape(-1,1)

        if isinstance(y_pred, (pd.DataFrame, pd.Series)):
            y_pred = y_pred.values
            if len(y_pred.shape) == 1:
                y_pred= y_pred.reshape(-1,1)

        err_mape = mape(y_true, y_pred)
        err_smape = smape(y_true, y_pred)
        err_r2 = r2(y_true, y_pred)
        err_rmse = rmse(y_true, y_pred)
        err_cosine = cosine(y_true, y_pred)

        result['mods_mape'] = err_mape
        result['mods_smape'] = err_smape
        result['mods_r2'] = err_r2
        result['mods_rmse'] = err_rmse
        result['mods_cosine'] = err_cosine

    return result

def default_metrics(test_data, model):
    eval_result = model.evaluate(test_data[0], test_data[1], batch_size=1, return_dict=True)
    eval_result.pop("loss")
    return eval_result


def add_metrics(existing_df, new_metrics, model_name):
    mindex = pd.MultiIndex.from_product([[model_name],["http", "ssl"]])
    new_df = pd.DataFrame(new_metrics, index=mindex)
    return pd.concat([existing_df, new_df])

### Regular Over Whole Dataset

In [None]:
model_whole_name = "regular_complete"
model_whole = build_gru_model(model_whole_name, ExpConfig.MULTIVARIATE, ExpConfig.SEQUENCE_LEN)

history_whole = model_whole.fit(
    network_data_train[0],
    network_data_train[1],
    epochs=cfg_num_epochs,
    batch_size=cfg_batch_size,
    verbose=1,
    validation_data=network_data_test
)

# Plot Train/Val loss
plt.plot(history_whole.history['loss'])
plt.plot(history_whole.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')


In [None]:
pred = model_whole.predict(network_data_test[0], batch_size=1)
pred = pd.DataFrame(pred, columns=data_train_df.columns)

# forecast steps
pred.set_index(data_test_df.index[ExpConfig.SEQUENCE_LEN:], inplace=True)


fig, axs = plt.subplots(len(pred.columns),1,figsize=(21,4*len(pred.columns)))
i = 0
for col in pred.columns:
    axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], 'k:', label='real - %s' % col)
    axs[i].plot(df_test[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], label='cleaned - %s' % col)
    axs[i].plot(pred[col][:200], label='pred - %s' % col)
    axs[i].legend()
    i+=1
fig.tight_layout()
plt.show()

# metrics
all_metrics_whole = default_metrics(network_data_test, model_whole)
custom_com_metrics = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] ,pred) 
all_metrics_whole.update(**custom_com_metrics)

# Construct Metrics Table
metrics_df = pd.DataFrame(all_metrics_whole, index=pd.MultiIndex.from_product([[model_whole_name],["http", "ssl"]]))
metrics_df

### Regular over Non-IID Partition

In [None]:
model_noniid_one_part_name = "regular_one_part_noniid"

model_noniid_op = build_gru_model(model_noniid_one_part_name, ExpConfig.MULTIVARIATE, ExpConfig.SEQUENCE_LEN)

history_noniid_op = model_noniid_op.fit(
    noniid_partitions[0][0],
    noniid_partitions[0][1],
    epochs=cfg_num_epochs,
    batch_size=cfg_batch_size,
    verbose=1,
    validation_data=network_data_test
)
# Plot Train/Val loss
plt.plot(history_noniid_op.history['loss'])
plt.plot(history_noniid_op.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
pred = model_noniid_op.predict(network_data_test[0], batch_size=1)
pred = pd.DataFrame(pred, columns=data_train_df.columns)

# forecast steps
pred.set_index(data_test_df.index[ExpConfig.SEQUENCE_LEN:], inplace=True)


fig, axs = plt.subplots(len(pred.columns),1,figsize=(21,4*len(pred.columns)))
i = 0
for col in pred.columns:
    axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], 'k:', label='real - %s' % col)
    axs[i].plot(df_test[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], label='cleaned - %s' % col)
    axs[i].plot(pred[col][:200], label='pred - %s' % col)
    axs[i].legend()
    i+=1
fig.tight_layout()
plt.show()

# metrics
all_metrics_onep_noniid = default_metrics(network_data_test, model_noniid_op)
custom_com_metrics = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] ,pred) 
all_metrics_onep_noniid.update(**custom_com_metrics)

# Construct Metrics Table
metrics_df = add_metrics(metrics_df, all_metrics_onep_noniid, model_noniid_one_part_name)

### Regular over IID Partition

In [None]:
model_iid_one_part = "regular_one_part_iid"

model_iid_op = build_gru_model(model_iid_one_part, ExpConfig.MULTIVARIATE, ExpConfig.SEQUENCE_LEN)

history_iid_op = model_iid_op.fit(
    iid_partitions[0][0],
    iid_partitions[0][1],
    epochs=cfg_num_epochs,
    batch_size=cfg_batch_size,
    verbose=1,
    validation_data=network_data_test
)
# Plot Train/Val loss
plt.plot(history_iid_op.history['loss'])
plt.plot(history_iid_op.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
pred = model_iid_op.predict(network_data_test[0], batch_size=1)
pred = pd.DataFrame(pred, columns=data_train_df.columns)

# forecast steps
pred.set_index(data_test_df.index[ExpConfig.SEQUENCE_LEN:], inplace=True)


fig, axs = plt.subplots(len(pred.columns),1,figsize=(21,4*len(pred.columns)))
i = 0
for col in pred.columns:
    axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], 'k:', label='real - %s' % col)
    axs[i].plot(df_test[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], label='cleaned - %s' % col)
    axs[i].plot(pred[col][:200], label='pred - %s' % col)
    axs[i].legend()
    i+=1
fig.tight_layout()
plt.show()

# metrics
all_metrics_onep_iid = default_metrics(network_data_test, model_iid_op)
custom_com_metrics = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] ,pred) 
all_metrics_onep_iid.update(**custom_com_metrics)

metrics_df = add_metrics(metrics_df, all_metrics_onep_iid, model_iid_one_part)

### Regular over Vertical Partition
Train on HTTP, Evaluate on both (HTTP and SSL)

In [None]:
model_vertical_http_name = "regular_one_part_only_http"
model_vertical_http = build_gru_model(model_vertical_http_name, 1, ExpConfig.SEQUENCE_LEN)

history_vertical_op = model_vertical_http.fit(
    vertical_partitions[0][0],
    vertical_partitions[0][1],
    epochs=cfg_num_epochs,
    batch_size=cfg_batch_size,
    verbose=1,
    validation_data=network_feature_data_test[0]
)

# Plot Train/Val loss
plt.plot(history_vertical_op.history['loss'])
plt.plot(history_vertical_op.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
pred_http = model_vertical_http.predict(network_feature_data_test[0][0], batch_size=1)
pred_ssl = model_vertical_http.predict(network_feature_data_test[1][0], batch_size=1)
pred = pd.DataFrame({data_test_df.columns[0]:pred_http[:,0] , data_test_df.columns[1]:pred_ssl[:, 0]})


# forecast steps
pred.set_index(data_test_df.index[ExpConfig.SEQUENCE_LEN:], inplace=True)
pred

fig, axs = plt.subplots(len(pred.columns),1,figsize=(21,4*len(pred.columns)))
i = 0
for col in pred.columns:
    axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200 + ExpConfig.SEQUENCE_LEN], 'k:', label='real - %s' % col)
    axs[i].plot(df_test_v[col][ExpConfig.SEQUENCE_LEN:200 + ExpConfig.SEQUENCE_LEN], label='cleaned - %s' % col)
    axs[i].plot(pred[col][:200], label='pred - %s' % col)
    axs[i].legend()
    i+=1
fig.tight_layout()
plt.show()

# metrics
# test on HTTP
all_metrics_onep_vert = default_metrics(network_feature_data_test[0], model_vertical_http)
custom_com_metrics = compute_metrics(df_test_v["http_count_uid_in"][ExpConfig.SEQUENCE_LEN:], pred) # dont forget to transform back
all_metrics_onep_vert.update(**custom_com_metrics)

metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_onep_vert, index=pd.MultiIndex.from_tuples(((model_vertical_http_name, "http"),)))])

# test on SSL
all_metrics_onep_vert = default_metrics(network_feature_data_test[1], model_vertical_http)
custom_com_metrics = compute_metrics(df_test_v["ssl_count_uid_in"][ExpConfig.SEQUENCE_LEN:], pred) # dont forget to transform back
all_metrics_onep_vert.update(**custom_com_metrics)


metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_onep_vert, index=pd.MultiIndex.from_tuples(((model_vertical_http_name, "ssl"),)))])
metrics_df

## Federated Learning Training and Inferences 
 - Run the `run.sh` script with corrensponding settings
 - Load the history from pickle
 - Load the model weights to model
 - Evaluate the results
 - Add to results metrics table

In [None]:
def visualize_fl_training(history, partition_style,  n_clients, n_rounds):
    mse = history.metrics_centralized["mean_squared_error"]
    mse_d = history.metrics_distributed["aggregated_mse"]
    mae = history.metrics_centralized["mean_absolute_error"]
    mae_d = history.metrics_distributed["aggregated_mea"]
    
    
    rounds, mses = list(zip(*mse))
    _, mses_d = list(zip(*mse_d))
    _, maes = list(zip(*mae))
    _, maes_d = list(zip(*mae_d))
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(21, 5))
    ax1.plot(rounds[1:], mses[1:], color="blue")
    ax1.plot(rounds[1:], mses_d, color="orange")
    ax2.plot(rounds[1:], maes[1:], color="blue")
    ax2.plot(rounds[1:], maes_d, color="orange")
    ax1.grid()
    ax2.grid()
    ax1.set(xlabel='Round', ylabel='Mean Squared Error(MSE)')
    ax2.set(xlabel='Rounds', ylabel='Mean Absolute Error(MAE)')
    fig.suptitle(f"NAF - {partition_style}; {n_clients} clients; {n_rounds} rounds;")
    plt.show()
    plt.tight_layout()


def visualize_per_client_losses(metrics_path: Path, model_name: str):
    metrics = pd.read_csv(metrics_path)
    metrics_mse = metrics[metrics["METRIC"] == "mse"]
    metrics_mae = metrics[metrics["METRIC"] == "mae"]
    rounds = metrics[metrics["METRIC"] == "mse"]["ROUND"]
    
    metrics_mse = metrics_mse.drop(["ROUND", "METRIC"], axis=1)
    metrics_mae = metrics_mae.drop(["ROUND", "METRIC"], axis=1)

    # plot per client losses
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(21, 5))
    
    # noniid_fl_metrics_mse.drop("ROUND", axis=1).plot(ax=axes[0], marker='o')
    for col_name in metrics_mse.columns:
        axes[0].plot(rounds, metrics_mse[col_name], label=col_name)
    axes[0].set_xlabel('Round')
    axes[0].set_ylabel('MSE')
    axes[0].set_xticks(rounds) 
    axes[0].legend()
    
    for col_name in metrics_mae.columns:
        axes[1].plot(rounds, metrics_mae[col_name], label=col_name)
    axes[1].set_xlabel('Round')
    axes[1].set_ylabel('MAE')
    axes[1].set_xticks(rounds)
    axes[1].legend()
    
    fig.suptitle(f'Client Based Metrics for {model_name}', fontsize=14)
    plt.show()
    plt.tight_layout()


def predict_and_visualize(model, tsg_test, data_test_df, model_name, batch_size=1):
    #make predictions
    predictions = model.predict(tsg_test, batch_size=batch_size)
    # predictions = transform_pipeline.inverse_transform(predictions)
    pred_df = pd.DataFrame(predictions, columns=data_test_df.columns)

    #add forecast steps
    pred_df.set_index(data_test_df.index[ExpConfig.SEQUENCE_LEN:], inplace=True)

    #plot
    fig, axs = plt.subplots(len(pred_df.columns),1, figsize=(21,5*len(pred_df.columns)))
    i = 0
    for col in pred_df.columns:
        axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN],'k:', label='real - %s' % col)
        
        axs[i].plot(df_test[col].iloc[ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], label='cleaned - %s' % col)
        axs[i].plot(pred_df[col][:200], label='pred - %s' % col)
        axs[i].legend()
        i+=1
        
    fig.suptitle(f'Predictions for the {model_name}', fontsize=14)
    fig.tight_layout()
    plt.show()
    return pred_df

def read_history_pickle(path: str):
    with open(path, "rb") as openfile:
        try:
            history = pickle.load(openfile)
            return history
        except EOFError:
            pass


### FL over Non-IID Partitions

In [None]:
noniid_model_name = "fl_noniid"

noniid_model = models_df.loc[("fl", "regular", "noniid")]
noniid_model_weights = noniid_model.weights.iloc[0]
noniid_model_history = noniid_model.history.iloc[0]
noniid_model_resources = noniid_model.resources.iloc[0]
noniid_model_cmetrics = noniid_model.lclmetrics.iloc[0]

noniid_history = read_history_pickle(noniid_model_history)
visualize_fl_training(noniid_history, "NON-IID", 3, 10)


visualize_per_client_losses(noniid_model_cmetrics, noniid_model_name)

noniid_model = build_gru_model(noniid_model_name, 2, ExpConfig.SEQUENCE_LEN)
noniid_model.load_weights(noniid_model_weights)

pred_noniid_df = predict_and_visualize(noniid_model, network_data_test[0], data_test_df, model_name=noniid_model_name)  

# metrics
all_metrics_fl_noniid = default_metrics(network_data_test, noniid_model)
custom_metrics_noniid = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] , pred_noniid_df) # dont forget to transform back
all_metrics_fl_noniid.update(**custom_metrics_noniid)

metrics_df = add_metrics(metrics_df, all_metrics_fl_noniid, noniid_model_name)

#### Visualization Resource Usage

In [None]:
def read_metrics_df(path):
    metrics_df = pd.read_csv(path, sep="\t")
    metrics_df["Time"] = pd.to_datetime(metrics_df["Time"], format="%Y-%m-%dT%H:%M:%S")
    return metrics_df

def plot_resource_usage(resources_df, model_name: str):
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(21, 15))
    for process_id, group in  resources_df.groupby("PID%"):
        group = group.set_index("Time")
        process_name = f"{group.iloc[0].Command.split('/')[-1]}-{process_id}"
        plot_squence = group
    
        axes[0].plot(plot_squence["CPU"], label=process_name)
        axes[0].set_xlabel('Time')
        axes[0].set_ylabel('CPU')
        axes[0].legend()
        
        axes[1].plot(plot_squence["MEM(MB)"], label=process_name)
        axes[1].set_xlabel('Time')
        axes[1].set_ylabel('MEM(MB)')
        axes[1].legend()
        
        axes[2].plot(plot_squence["GPU_Load(%)"], label=process_name)
        axes[2].set_xlabel('Time')
        axes[2].set_ylabel('GPU_Load(%)')
        axes[2].legend()
    
    fig.suptitle(f'Client Based Metrics for {model_name}', fontsize=14)
    plt.show()
    plt.tight_layout()

noniid_metrics = read_metrics_df(noniid_model_resources)
plot_resource_usage(noniid_metrics, noniid_model_name)

### FL over IID Partitions

In [None]:
iid_model_name = "fl_iid"
iid_model = models_df.loc[("fl", "regular", "iid")]
iid_model_weights = iid_model.weights.iloc[0]
iid_model_history = iid_model.history.iloc[0]
iid_model_resources = iid_model.resources.iloc[0]
iid_model_cmetrics = iid_model.lclmetrics.iloc[0]

iid_history = read_history_pickle(iid_model_history)
visualize_fl_training(iid_history, "IID", 3, 10)

visualize_per_client_losses(iid_model_cmetrics, iid_model_name)

iid_model = build_gru_model(iid_model_name, 2, ExpConfig.SEQUENCE_LEN)
iid_model.load_weights(iid_model_weights)

pred_iid_df = predict_and_visualize(iid_model, network_data_test[0], data_test_df, iid_model_name) 

iid_metrics = read_metrics_df(iid_model_resources)
plot_resource_usage(iid_metrics, iid_model_name)


# metrics
all_metrics_fl_iid = default_metrics( network_data_test, iid_model)
custom_metrics_iid = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] , pred_iid_df) # dont forget to transform back
all_metrics_fl_iid.update(**custom_metrics_iid)

metrics_df = add_metrics(metrics_df, all_metrics_fl_iid, iid_model_name)

### FL over Vertical Partitions

In [None]:
def predict_and_visualize_v(model, tsg_test, real, decomposed, col_name, batch_size=1):
    pred = model.predict(tsg_test, batch_size=batch_size)
    pred_s = pd.Series(pred.flatten(), index=real.index[ExpConfig.SEQUENCE_LEN:])
    plt.figure(figsize=(21,4))
    plt.plot(real[ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], 'k:', label=f'real - {col_name}')
    plt.plot(decomposed[ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN], label=f'cleaned - {col_name}')
    plt.plot(pred_s[:200], label=f'pred - {col_name}')
    plt.legend()
    plt.title(f"`Vertical` model on {col_name}")
    plt.show()

    return pred_s
    
vertical_model_name = "fl_vertical"
vertical_model = models_df.loc[("fl", "regular", "vertical")]
vertical_model_weights = vertical_model.weights.iloc[0]
vertical_model_history = vertical_model.history.iloc[0]
vertical_model_resources = vertical_model.resources.iloc[0]
vertical_model_cmetrics = vertical_model.lclmetrics.iloc[0]

vertical_history = read_history_pickle(vertical_model_history)
visualize_fl_training(vertical_history, "Vertical", 3, 10)

visualize_per_client_losses(vertical_model_cmetrics, vertical_model_name)


vertical_model = build_gru_model(vertical_model_name, 1, ExpConfig.SEQUENCE_LEN)
vertical_model.load_weights(vertical_model_weights)


pred_fl_http_s = predict_and_visualize_v(
    vertical_model, 
    network_feature_data_test[0][0], 
    data_test_df["http_count_uid_in"], 
    df_test_v["http_count_uid_in"], 
    "http_count_uid_in"
)
pred_fl_ssl_s = predict_and_visualize_v(
    vertical_model, 
    network_feature_data_test[1][0], 
    data_test_df["ssl_count_uid_in"], 
    df_test_v["ssl_count_uid_in"], 
    "ssl_count_uid_in"
)

# metrics
all_metrics_fl_http = default_metrics(network_feature_data_test[0], vertical_model)
custom_metrics_fl_http = compute_metrics(df_test_v["http_count_uid_in"].iloc[ExpConfig.SEQUENCE_LEN:], pred_fl_http_s) # dont forget to transform back
all_metrics_fl_http.update(**custom_metrics_fl_http)


metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_fl_http, index=pd.MultiIndex.from_tuples(((vertical_model_name, "http"),)))])

# metrics
all_metrics_fl_ssl = default_metrics(network_feature_data_test[1], vertical_model)
custom_metrics_fl_ssl = compute_metrics(df_test_v["ssl_count_uid_in"].iloc[ExpConfig.SEQUENCE_LEN:],pred_fl_ssl_s) # dont forget to transform back
all_metrics_fl_ssl.update(**custom_metrics_fl_ssl)

metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_fl_ssl, index=pd.MultiIndex.from_tuples(((vertical_model_name, "ssl"),)))])
metrics_df

vertical_metrics = read_metrics_df(vertical_model_resources)
plot_resource_usage(vertical_metrics, vertical_model_name)



## Federated Learning with SeccAgg+ Protocol

### SecFL over Non-IID Partitions

In [None]:
sec_noniid_model_name = "sec_fl_noniid"

sec_noniid_model = models_df.loc[("fl", "secure", "noniid")]
sec_noniid_model_weights = sec_noniid_model.weights.iloc[0]
sec_noniid_model_history = sec_noniid_model.history.iloc[0]
sec_noniid_model_resources = sec_noniid_model.resources.iloc[0]
sec_noniid_model_cmetrics = sec_noniid_model.lclmetrics.iloc[0]

sec_noniid_history = read_history_pickle(sec_noniid_model_history)
visualize_fl_training(sec_noniid_history, "SEC-NON-IID", 3, 10)

visualize_per_client_losses(sec_noniid_model_cmetrics, sec_noniid_model_name)

sec_noniid_model = build_gru_model(sec_noniid_model_name, 2, ExpConfig.SEQUENCE_LEN)
sec_noniid_model.load_weights(sec_noniid_model_weights)

pred_sec_noniid_df = predict_and_visualize(sec_noniid_model, network_data_test[0], data_test_df, sec_noniid_model_name)  

# metrics
all_metrics_fl_sec_noniid = default_metrics(network_data_test, sec_noniid_model)
custom_metrics_sec_noniid = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] , pred_sec_noniid_df) # dont forget to transform back
all_metrics_fl_sec_noniid.update(**custom_metrics_sec_noniid)

metrics_df = add_metrics(metrics_df, all_metrics_fl_sec_noniid, sec_noniid_model_name)

sec_noniid_metrics = read_metrics_df(sec_noniid_model_resources)
plot_resource_usage(sec_noniid_metrics, sec_noniid_model_name)


### SecFL over IID Partitions

In [None]:
sec_iid_model_name = "sec_fl_iid"

sec_iid_model = models_df.loc[("fl", "secure", "iid")]
sec_iid_model_weights = sec_iid_model.weights.iloc[0]
sec_iid_model_history = sec_iid_model.history.iloc[0]
sec_iid_model_resources = sec_iid_model.resources.iloc[0]
sec_iid_model_cmetrics = sec_iid_model.lclmetrics.iloc[0]

sec_iid_history = read_history_pickle(sec_iid_model_history)
visualize_fl_training(sec_iid_history, "IID", 3, 10)

visualize_per_client_losses(sec_iid_model_cmetrics, sec_iid_model_name)

sec_iid_model = build_gru_model(sec_iid_model_name, 2, ExpConfig.SEQUENCE_LEN)
sec_iid_model.load_weights(sec_iid_model_weights)

pred_sec_iid_df = predict_and_visualize(sec_iid_model, network_data_test[0], data_test_df, sec_iid_model_name)  


# metrics
all_metrics_fl_sec_iid = default_metrics( network_data_test, sec_iid_model)
custom_metrics_sec_iid = compute_metrics(data_test_df[ExpConfig.SEQUENCE_LEN:] , pred_sec_iid_df) # dont forget to transform back
all_metrics_fl_sec_iid.update(**custom_metrics_sec_iid)

metrics_df = add_metrics(metrics_df, all_metrics_fl_sec_iid, sec_iid_model_name)

sec_iid_metrics = read_metrics_df(sec_iid_model_resources)
plot_resource_usage(sec_iid_metrics, sec_iid_model_name)

### SecFL over Vertical Partitions

In [None]:
sec_vertical_model_name = "sec_fl_vertical"
sec_vertical_model = models_df.loc[("fl", "secure", "vertical")]
sec_vertical_model_weights = sec_vertical_model.weights.iloc[0]
sec_vertical_model_history = sec_vertical_model.history.iloc[0]
sec_vertical_model_resources = sec_vertical_model.resources.iloc[0]
sec_vertical_model_cmetrics = sec_vertical_model.lclmetrics.iloc[0]

sec_vertical_history = read_history_pickle(sec_vertical_model_history)
visualize_fl_training(sec_vertical_history, "Vertical", 3, 10)

visualize_per_client_losses(sec_vertical_model_cmetrics, sec_vertical_model_name)

sec_vertical_model = build_gru_model(sec_vertical_model_name, 1, ExpConfig.SEQUENCE_LEN)
sec_vertical_model.load_weights(sec_vertical_model_weights)


pred_fl_http_sec = predict_and_visualize_v(
    sec_vertical_model, 
    network_feature_data_test[0][0], 
    data_test_df["http_count_uid_in"], 
    df_test_v["http_count_uid_in"], 
    "http_count_uid_in"
)
pred_fl_ssl_sec = predict_and_visualize_v(
    sec_vertical_model, 
    network_feature_data_test[1][0], 
    data_test_df["ssl_count_uid_in"], 
    df_test_v["ssl_count_uid_in"], 
    "ssl_count_uid_in"
)

# metrics
all_metrics_fl_http_sec = default_metrics(network_feature_data_test[0], sec_vertical_model)
custom_metrics_fl_http_sec = compute_metrics(df_test_v["http_count_uid_in"].iloc[ExpConfig.SEQUENCE_LEN:], pred_fl_http_sec)
all_metrics_fl_http_sec.update(**custom_metrics_fl_http_sec)

# metrics_df.insert(10,"fl_sec_vert_http", all_metrics_fl_http_sec.values())
metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_fl_http_sec, index=pd.MultiIndex.from_tuples(((sec_vertical_model_name, "http"),)))])


# metrics
all_metrics_fl_ssl_sec = default_metrics(network_feature_data_test[1], sec_vertical_model)
custom_metrics_fl_ssl_sec = compute_metrics(df_test_v["ssl_count_uid_in"].iloc[ExpConfig.SEQUENCE_LEN:],pred_fl_ssl_sec) 
all_metrics_fl_ssl_sec.update(**custom_metrics_fl_ssl_sec)

metrics_df = pd.concat([metrics_df, pd.DataFrame(all_metrics_fl_ssl_sec, index=pd.MultiIndex.from_tuples(((sec_vertical_model_name, "ssl"),)))])


sec_vertical_metrics = read_metrics_df(sec_vertical_model_resources)
plot_resource_usage(sec_vertical_metrics, sec_vertical_model_name)

### Conclusion

In [None]:
metrics_df

In [None]:
# print(metrics_df.to_latex())

## Other Visualizations

In [None]:
viz_path = Path().resolve().joinpath("../../src/visualization")

### Custom Combined Predictions

#### NonIID vs IID

In [None]:
import plotly.graph_objects as go


def custom_predict_and_visualize_plotly(models: list, tsg_test, data_test_df, batch_size=1):
    predictions = {model.name: model.predict(tsg_test, batch_size=batch_size) for model in models}
    pred_dfs = {
        model_name: pd.DataFrame(predictions, columns=data_test_df.columns, index=data_test_df.index[ExpConfig.SEQUENCE_LEN:])
        for model_name, predictions in predictions.items()
    }
    f_model_name = models[0].name
    n_rows = len(pred_dfs[f_model_name].columns)


    interval_to_plot = 200
    offset = ExpConfig.SEQUENCE_LEN
    
    #plot
    # Create traces
    
    for col in pred_dfs[f_model_name].columns:
    fig = go.Figure()
        to_plot = df_test[col].iloc[offset:]
        fig.add_trace(go.Scatter(y=to_plot.values, x=to_plot.index, name=f'real - {col}', line=dict(color='grey', width=2, dash='dash')))
        # fig.add_trace(go.Scatter(x=random_x, y=random_y1, mode='lines+markers', name='lines+markers'))
        for model_name, predictions in pred_dfs.items():
            to_plot = predictions[col]
            fig.add_trace(go.Scatter(y=to_plot.values, x=to_plot.index, name=f'pred - {col} - {model_name} Model', line=dict(width=2)))
    
        fig.show()
    return fig


fig_1 = custom_predict_and_visualize_plotly([noniid_model, iid_model], network_data_test[0], data_test_df) 

In [None]:
def custom_predict_and_visualize(models: list, tsg_test, data_test_df, batch_size=1):
    predictions = {model.name: model.predict(tsg_test, batch_size=batch_size) for model in models}
    pred_dfs = {
        model_name: pd.DataFrame(predictions, columns=data_test_df.columns, index=data_test_df.index[ExpConfig.SEQUENCE_LEN:])
        for model_name, predictions in predictions.items()
    }
    f_model_name = models[0].name
    n_rows = len(pred_dfs[f_model_name].columns)


    interval_to_plot = 100
    offset = ExpConfig.SEQUENCE_LEN
    
    #plot
    fig, axs = plt.subplots(n_rows,1, figsize=(15,5*n_rows))   
    i = 0
    for col in pred_dfs[f_model_name].columns:
        # axs[i].plot(data_test_df[col][ExpConfig.SEQUENCE_LEN:200+ExpConfig.SEQUENCE_LEN],'k:', label='real - %s' % col)
        
        axs[i].plot(df_test[col].iloc[ 200 + offset : 200 + interval_to_plot + offset], 'k:', label=f'real - {col}')
        for model_name, preds in pred_dfs.items():
            axs[i].plot(preds[col][200 : 200 + interval_to_plot], label=f'pred - {col} - {model_name} Model')
        axs[i].legend(loc='upper right')
        axs[i].grid(which='both', color='lightgrey', alpha=0.5)
        i+=1
        
    # fig.suptitle(f'Predictions for the {model_name}', fontsize=14) 
    fig.tight_layout()
    plt.show()
    return fig, predictions


fig_1, noniid_vs_iid_predictions = custom_predict_and_visualize([noniid_model, iid_model], network_data_test[0], data_test_df) 
if SAVE_FIGURES:
    fig_1.savefig(viz_path.joinpath("predictions_noniid_vs_iid.png"))

#### Regular NonIID vs Sec NonIID

In [None]:
fig_2, noniid_vs_secnoniid_predictions = custom_predict_and_visualize([noniid_model, sec_noniid_model], network_data_test[0], data_test_df) 
if SAVE_FIGURES:
    fig_2.savefig(viz_path.joinpath("predictions_noniid_vs_secnoniid_section_2_small.png"))



In [None]:
noniid_pred = noniid_vs_secnoniid_predictions["fl_noniid"]
sec_noniid_pred = noniid_vs_secnoniid_predictions["sec_fl_noniid"]

def percentual_difference(init_seq, final_seq):
    percentual_change = []
    for init_s, final_sec in zip(init_seq, final_seq):
        percentual_change.append(
            (final_sec - init_s)/abs(init_s) * 100
        )
    return percentual_change

perc_increase = percentual_difference(noniid_pred, sec_noniid_pred)
display(f"Predictions (prec change) NonIID vs IID: {sum(perc_increase)/len(perc_increase)}")

#### Regular IID and Sec IID

In [None]:
fig_3, iid_vs_seciid_predictions = custom_predict_and_visualize([iid_model, sec_iid_model], network_data_test[0], data_test_df) 
if SAVE_FIGURES:
    fig_3.savefig(viz_path.joinpath("predictions_iid_vs_seciid_section_2_small.png"))

perc_increase = percentual_difference(iid_vs_seciid_predictions["fl_iid"], iid_vs_seciid_predictions["sec_fl_iid"])
display(f"Predictions (prec change) NonIID vs IID: {sum(perc_increase)/len(perc_increase)}")

#### NonIID vs Full vs one NonIID Partition

In [None]:
fig_3, noniid_vs_full_vs_onenoniid_predictions = custom_predict_and_visualize([noniid_model, model_whole, model_noniid_op], network_data_test[0], data_test_df)
if SAVE_FIGURES:
    fig_3.savefig(viz_path.joinpath("predictions_flnoniid_vs_full_vs_onenoniid_section_2.png"))

In [None]:
noniid_pred = noniid_vs_full_vs_onenoniid_predictions["fl_noniid"]
full_pred = noniid_vs_full_vs_onenoniid_predictions["regular_complete"]
onenoniid_pred = noniid_vs_full_vs_onenoniid_predictions["regular_one_part_noniid"]

pred_perc1 = percentual_difference(full_pred, noniid_pred)
display(f"Predictions (prec change) NonIID vs Full Model: {sum(pred_perc1)/len(pred_perc1)}")
pred_perc1 = percentual_difference(onenoniid_pred, noniid_pred)
display(f"Predictions (prec change) NonIID vs One NonIID Part: {sum(pred_perc1)/len(pred_perc1)}",)

diff1 = full_pred - noniid_pred
display(sum(diff1) / len(diff1))
diff1 = onenoniid_pred - noniid_pred
display(sum(diff1) / len(diff1))


#### IID vs Full vs IID Partition

In [None]:
fig_3, iid_vs_full_vs_oneiid_predictions = custom_predict_and_visualize([iid_model, model_iid_op, model_whole,], network_data_test[0], data_test_df) 
if SAVE_FIGURES:
    fig_3.savefig(viz_path.joinpath("predictions_fliid_vs_oneiid_vs_full_section_2.png"))

In [None]:
iid_pred = iid_vs_full_vs_oneiid_predictions["fl_iid"]
# full_pred = iid_vs_full_vs_oneiid_predictions["regular_complete"]
oneiid_pred = iid_vs_full_vs_oneiid_predictions["regular_one_part_iid"]

pred_perc1 = percentual_difference(full_pred, iid_pred)
display(f"Predictions (prec change) NonIID vs Full Model: {sum(pred_perc1)/len(pred_perc1)}")
pred_perc1 = percentual_difference(oneiid_pred, iid_pred)
display(f"Predictions (prec change) NonIID vs One NonIID Part: {sum(pred_perc1)/len(pred_perc1)}",)


diff = full_pred - iid_pred
display(sum(diff) / len(diff))
diff = oneiid_pred - iid_pred
display(sum(diff) / len(diff))

#### Regular Whole vs NonIID Partition vs IID Partitions

In [None]:
fig_3 = custom_predict_and_visualize([model_whole, model_noniid_op, model_iid_op], network_data_test[0], data_test_df) 
if SAVE_FIGURES:
    fig_3.savefig(viz_path.joinpath("predictions_whole_vs_oneiidop_vs_iidop_section_2.png"))

#### Regular Vert vs Sec Vert

In [None]:
def custom_predict_and_visualize_v(models: list, tsg_test, decomposed):
    tsg_test_http, tsg_test_ssl = tsg_test[0][0], tsg_test[1][0]
    
    predictions = {
        model.name: [
            pd.Series(
                model.predict(tsg_test_http, batch_size=1).flatten(), 
                index=decomposed.index[ExpConfig.SEQUENCE_LEN:]
            ),
            pd.Series(
                model.predict(tsg_test_ssl, batch_size=1).flatten(), 
                index=decomposed.index[ExpConfig.SEQUENCE_LEN:]
            ),
        ]
        for model in models
    }

    interval_to_plot = 100
    offset = ExpConfig.SEQUENCE_LEN
    
    n_rows = len(decomposed.columns)
    fig, axs = plt.subplots(n_rows,1, figsize=(8,3*n_rows), dpi=150)
    for i, col in enumerate(decomposed.columns):
        axs[i].plot(decomposed[col][200 + offset : 200 + offset + interval_to_plot], 'k:', label=f'cleaned - {col}')
        for model, preds in predictions.items():
            axs[i].plot(preds[i][200 : 200 + interval_to_plot], label=f'pred - {col} - {model} Model')
        axs[i].legend()
        # plt.title(f"`Vertical` model on {col_name}")
    fig.tight_layout()
    plt.show()
    return fig, predictions



fig_4, fl_pred = custom_predict_and_visualize_v(
    [vertical_model, sec_vertical_model],
    network_feature_data_test, 
    df_test_v
)

if SAVE_FIGURES:
    fig_4.savefig(viz_path.joinpath("predictions_flvertsec_vs_flvert_section_2.png"))

In [None]:
http_perc = percentual_difference(fl_pred["fl_vertical"][0], fl_pred["sec_fl_vertical"][0])
display(sum(http_perc)/len(http_perc))


ssl_perc = percentual_difference(fl_pred["fl_vertical"][1], fl_pred["sec_fl_vertical"][1])
display(sum(ssl_perc)/len(ssl_perc))

In [None]:
pred_http.shape,  fl_pred["fl_vertical"][0].to_numpy().reshape(-1, 1).shape

In [None]:
# pred_http
# pred_ssl

# prec_diff = percentual_difference(pred_http, fl_pred["fl_vertical"][0])
# sum(prec_diff)/len(prec_diff)
diff = pred_http - fl_pred["fl_vertical"][0].to_numpy().reshape(-1, 1)
display(f"Diff HTTP {sum(diff)/len(diff)}")
diff = pred_ssl - fl_pred["fl_vertical"][1].to_numpy().reshape(-1, 1)
display(f"Diff SSL {sum(diff)/len(diff)}")


### Custom History

#### Baseline IID vs NonIID

In [None]:
fig1, ax1 = plt.subplots(figsize=(6, 3))
ax1.plot(history_noniid_op.history['loss'],  label="NonIID Train Loss", color="blue")
ax1.plot(history_noniid_op.history['val_loss'], linestyle="--",label="NonIID Validation Loss", color="blue")
ax1.plot(history_iid_op.history['loss'], label="IID Train Loss", color="orange")
ax1.plot(history_iid_op.history['val_loss'],  linestyle="dashed", label="IID Validation Loss", color="orange" )
ax1.legend()
ax1.grid()
ax1.set_xticks(list(range(0, 10)))
ax1.set(xlabel='Epochs', ylabel='Mean Squared Error(MSE)')
fig1.tight_layout()
if SAVE_FIGURES:
    fig1.savefig(viz_path.joinpath("mse_iid_noniid_regular.png"))

#### NonIID vs IID

In [None]:
SAVE_FIGURES = True


noniid_mse = noniid_history.metrics_centralized["mean_squared_error"]
noniid_mse_d = noniid_history.metrics_distributed["aggregated_mse"]
noniid_mae = noniid_history.metrics_centralized["mean_absolute_error"]
noniid_mae_d = noniid_history.metrics_distributed["aggregated_mea"]

rounds, noniid_mses = list(zip(*noniid_mse))
_, noniid_mses_d = list(zip(*noniid_mse_d))
_, noniid_maes = list(zip(*noniid_mae))
_, noniid_maes_d = list(zip(*noniid_mae_d))

iid_mse = iid_history.metrics_centralized["mean_squared_error"]
iid_mse_d = iid_history.metrics_distributed["aggregated_mse"]
iid_mae = iid_history.metrics_centralized["mean_absolute_error"]
iid_mae_d = iid_history.metrics_distributed["aggregated_mea"]

rounds, iid_mses = list(zip(*iid_mse))
_, iid_mses_d = list(zip(*iid_mse_d))
_, iid_maes = list(zip(*iid_mae))
_, iid_maes_d = list(zip(*iid_mae_d))


# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 4))

# MSE
fig1, ax1 = plt.subplots(figsize=(6, 3))
ax1.plot(rounds[1:], noniid_mses[1:],  label="NonIID Centralized", color="blue")
ax1.plot(rounds[1:], noniid_mses_d, linestyle="--",label="NonIID Aggregated", color="blue")
ax1.plot(rounds[1:], iid_mses[1:], label="IID Centralized", color="orange")
ax1.plot(rounds[1:], iid_mses_d,  linestyle="--", label="IID Aggregated", color="orange" )
ax1.legend()
ax1.grid(which='both', color='lightgrey', alpha=0.5)
ax1.set(xlabel='Rounds', ylabel='Mean Squared Error(MSE)')
fig1.tight_layout()
if SAVE_FIGURES:
    fig1.savefig(viz_path.joinpath("mse_iid_noniid_cent_agg.png"))
# MAE
fig2, ax2 = plt.subplots(figsize=(6, 3))
ax2.plot(rounds[1:], noniid_maes[1:], label="NonIID Centralized", color="blue")
ax2.plot(rounds[1:], noniid_maes_d, linestyle="--", label="NonIID Aggregated", color="blue")
ax2.plot(rounds[1:], iid_maes[1:],  linestyle="--", label="IID Centralized", color="orange")
ax2.plot(rounds[1:], iid_maes_d, label="IID Aggregated", color="orange")
ax2.legend()
ax2.grid(which='both', color='lightgrey', alpha=0.5)
ax2.set(xlabel='Rounds', ylabel='Mean Absolute Error(MAE)')
fig2.tight_layout()
if SAVE_FIGURES:
    fig2.savefig(viz_path.joinpath("mae_iid_noniid_cent_agg.png"))

##### Percentual Increase

In [None]:
# MSE NonIID
mse_perc = percentual_difference(noniid_mses_d, noniid_mses)
display(f"MSE NonIID perc diff: {sum(mse_perc[1:])/len(mse_perc[1:])}", )

# MSE IID
iid_mse_perc = percentual_difference(iid_mses_d, iid_mses)
display(f"MSE IID perc diff: {sum(iid_mse_perc[1:])/len(iid_mse_perc[1:])}", )

# MAE NonIID
mae_perc = percentual_difference(noniid_maes_d, noniid_maes)
display(f"MAE NonIID perc diff: {sum(mae_perc[1:])/len(mae_perc[1:])}")

# MAE IID
iid_mae_perc = percentual_difference(iid_maes_d, iid_maes)
display(f"MAE IID perc diff: {sum(iid_mae_perc[1:])/len(iid_mae_perc[1:])}")

#### NonIID vs SecAgg NonIID

In [None]:
SAVE_FIGURES = True

history = noniid_history 
noniid_mse = history.metrics_centralized["mean_squared_error"]
noniid_mse_d = history.metrics_distributed["aggregated_mse"]
noniid_mae = history.metrics_centralized["mean_absolute_error"]
noniid_mae_d = history.metrics_distributed["aggregated_mea"]

rounds, noniid_mses = list(zip(*noniid_mse))
_, noniid_mses_d = list(zip(*noniid_mse_d))
_, noniid_maes = list(zip(*noniid_mae))
_, noniid_maes_d = list(zip(*noniid_mae_d))

sec_noniid_mse = sec_noniid_history.metrics_centralized["mean_squared_error"]
sec_noniid_mse_d = sec_noniid_history.metrics_distributed["aggregated_mse"]
sec_noniid_mae = sec_noniid_history.metrics_centralized["mean_absolute_error"]
sec_noniid_mae_d = sec_noniid_history.metrics_distributed["aggregated_mea"]

rounds, sec_noniid_mses = list(zip(*sec_noniid_mse))
_, sec_noniid_mses_d = list(zip(*sec_noniid_mse_d))
_, sec_noniid_maes = list(zip(*sec_noniid_mae))
_, sec_noniid_maes_d = list(zip(*sec_noniid_mae_d))


# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 4))

# MSE
fig1, ax1 = plt.subplots(figsize=(6, 3))
ax1.plot(rounds[1:], noniid_mses[1:],  label="NonIID Centralized", color="blue")
ax1.plot(rounds[1:], noniid_mses_d, linestyle="--",label="NonIID Aggregated", color="blue")
ax1.plot(rounds[1:], sec_noniid_mses[1:], label="Sec NonIID Centralized", color="orange")
ax1.plot(rounds[1:], sec_noniid_mses_d,  linestyle="--", label="Sec NonIID Aggregated", color="orange" )
ax1.legend()
ax1.grid(which='both', color='lightgrey', alpha=0.5)
ax1.set(xlabel='Rounds', ylabel='Mean Squared Error(MSE)')
fig1.tight_layout()
if SAVE_FIGURES:
    fig1.savefig(viz_path.joinpath("mse_noniid_vs_secnoniid_cent_agg.png"))
# MAE
fig2, ax2 = plt.subplots(figsize=(6, 3))
ax2.plot(rounds[1:], noniid_maes[1:], label="NonIID Centralized", color="blue")
ax2.plot(rounds[1:], noniid_maes_d, linestyle="--", label="NonIID Aggregated", color="blue")
ax2.plot(rounds[1:], sec_noniid_maes[1:],  linestyle="--", label="Sec NonIID Centralized", color="orange")
ax2.plot(rounds[1:], sec_noniid_maes_d, label="Sec NonIID Aggregated", color="orange")
ax2.legend()
ax2.grid(which='both', color='lightgrey', alpha=0.5)
ax2.set(xlabel='Rounds', ylabel='Mean Absolute Error(MAE)')
fig2.tight_layout()
if SAVE_FIGURES:
    fig2.savefig(viz_path.joinpath("mae_noniid_vs_secnoniid_cent_agg.png"))

In [None]:
# NonIID vs Sec NonIID
mse_reg = [_[1] for _ in noniid_mse]
mse_sec = [_[1] for _ in sec_noniid_mse]
mse_perc = []
for mse_r, mse_s in zip(mse_reg, mse_sec):
    mse_perc.append(
        abs((mse_s - mse_r)/mse_s) * 100
    )
display(mse_perc)

#### IID vs SecAgg IID

In [None]:
SAVE_FIGURES = True

iid_mse = iid_history.metrics_centralized["mean_squared_error"]
iid_mse_d = iid_history.metrics_distributed["aggregated_mse"]
iid_mae = iid_history.metrics_centralized["mean_absolute_error"]
iid_mae_d = iid_history.metrics_distributed["aggregated_mea"]

rounds, iid_mses = list(zip(*iid_mse))
_, iid_mses_d = list(zip(*iid_mse_d))
_, iid_maes = list(zip(*iid_mae))
_, iid_maes_d = list(zip(*iid_mae_d))

sec_iid_mse = sec_iid_history.metrics_centralized["mean_squared_error"]
sec_iid_mse_d = sec_iid_history.metrics_distributed["aggregated_mse"]
sec_iid_mae = sec_iid_history.metrics_centralized["mean_absolute_error"]
sec_iid_mae_d = sec_iid_history.metrics_distributed["aggregated_mea"]

rounds, sec_iid_mses = list(zip(*sec_iid_mse))
_, sec_iid_mses_d = list(zip(*sec_iid_mse_d))
_, sec_iid_maes = list(zip(*sec_iid_mae))
_, sec_iid_maes_d = list(zip(*sec_iid_mae_d))


# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 4))

# MSE
fig1, ax1 = plt.subplots(figsize=(6, 3))
ax1.plot(rounds[1:], iid_mses[1:],  label="IID Centralized", color="blue")
ax1.plot(rounds[1:], iid_mses_d, linestyle="--",label="IID Aggregated", color="blue")
ax1.plot(rounds[1:], sec_iid_mses[1:], label="Sec IID Centralized", color="orange")
ax1.plot(rounds[1:], sec_iid_mses_d,  linestyle="--", label="Sec IID Aggregated", color="orange" )
ax1.legend()
ax1.grid(which='both', color='lightgrey', alpha=0.5)
ax1.set(xlabel='Rounds', ylabel='Mean Squared Error(MSE)')
fig1.tight_layout()
if SAVE_FIGURES:
    fig1.savefig(viz_path.joinpath("mse_iid_vs_seciid_cent_agg.png"))
# MAE
fig2, ax2 = plt.subplots(figsize=(6, 3))
ax2.plot(rounds[1:], iid_maes[1:], label="IID Centralized", color="blue")
ax2.plot(rounds[1:], iid_maes_d, linestyle="--", label="IID Aggregated", color="blue")
ax2.plot(rounds[1:], sec_iid_maes[1:],  linestyle="--", label="Sec IID Centralized", color="orange")
ax2.plot(rounds[1:], sec_iid_maes_d, label="Sec IID Aggregated", color="orange")
ax2.legend()
ax2.grid(which='both', color='lightgrey', alpha=0.5)
ax2.set(xlabel='Rounds', ylabel='Mean Absolute Error(MAE)')
fig2.tight_layout()
if SAVE_FIGURES:
    fig2.savefig(viz_path.joinpath("mae_iid_vs_seciid_cent_agg.png"))

In [None]:
# IID vs Sec NonIID
mse_reg = [_[1] for _ in noniid_mse]
mse_sec = [_[1] for _ in sec_noniid_mse]
mse_perc = []
for mse_r, mse_s in zip(mse_reg, mse_sec):
    mse_perc.append(
        abs((mse_s - mse_r)/mse_s) * 100
    )
display(mse_perc)

#### Regular IID/NonIID vs Secure IID/Noniid (Combo)

In [None]:
noniid_mse = noniid_history.metrics_centralized["mean_squared_error"]
sec_noniid_mse = sec_noniid_history.metrics_centralized["mean_squared_error"]
iid_mse = iid_history.metrics_centralized["mean_squared_error"]
sec_iid_mse = sec_iid_history.metrics_centralized["mean_squared_error"]

rounds, noniid_mses = list(zip(*noniid_mse))
_, sec_noniid_mses = list(zip(*sec_noniid_mse))
_, iid_mses = list(zip(*iid_mse))
_, sec_iid_mses = list(zip(*sec_iid_mse))

fig2, ax2 = plt.subplots(figsize=(6, 3))
ax2.plot(rounds[1:], noniid_mses[1:], label="Regular NonIID", color="blue")
ax2.plot(rounds[1:], sec_noniid_mses[1:], linestyle="--", label="Secure NonIID", color="blue")
ax2.plot(rounds[1:], iid_mses[1:], label="Regular IID ", color="orange")
ax2.plot(rounds[1:], sec_iid_mses[1:], linestyle="--", label="Secure IID", color="orange")
ax2.legend()
ax2.grid(which='both', color='lightgrey', alpha=0.5)
ax2.set(xlabel='Rounds', ylabel='Mean Squared Error(MSE)')
fig2.tight_layout()

if SAVE_FIGURES:
    fig2.savefig(viz_path.joinpath("mse_combo_horizontal_regular_vs_secured_cent_agg.png"))


#### Vertical vs Sec Agg Vertical

In [None]:
vert_mse = vertical_history.metrics_centralized["mean_squared_error"]
vert_mse_d = vertical_history.metrics_distributed["aggregated_mse"]

rounds, vert_mses = list(zip(*vert_mse))
_, vert_mses_d = list(zip(*vert_mse_d))


sec_vert_mse = sec_vertical_history.metrics_centralized["mean_squared_error"]
sec_vert_mse_d = sec_vertical_history.metrics_distributed["aggregated_mse"]

rounds, sec_vert_mses = list(zip(*sec_vert_mse))
_, sec_vert_mses_d = list(zip(*sec_vert_mse_d))

fig1, ax1 = plt.subplots(figsize=(6, 3))
ax1.plot(rounds[1:], vert_mses[1:],  label="Regular Vertical", color="blue")
ax1.plot(rounds[1:], sec_vert_mses[1:], label="Secure Vertical", color="orange")
ax1.legend()
ax1.grid(which='both', color='lightgrey', alpha=0.5)
ax1.set(xlabel='Rounds', ylabel='Mean Squared Error(MSE)')
fig1.tight_layout()
if SAVE_FIGURES:
    fig1.savefig(viz_path.joinpath("mse_vertical_secvertical_cent_agg.png"))

# fig2, ax2 = plt.subplots(figsize=(6, 3))
# ax2.plot(rounds[1:], vert_maes[1:], label="Vertical Centralized", color="blue")
# ax2.plot(rounds[1:], vert_maes_d, linestyle="--", label="Vertical Aggregated", color="blue")
# ax2.plot(rounds[1:], sec_vert_maes[1:],  linestyle="--", label="Sec Vertical Centralized", color="orange")
# ax2.plot(rounds[1:], sec_vert_maes_d, label="Sec Vertical Aggregated", color="orange")
# ax2.legend()
# ax2.grid(which='both', color='lightgrey', alpha=0.5)
# ax2.set(xlabel='Rounds', ylabel='Mean Absolute Error(MAE)')
# fig2.tight_layout()
# if SAVE_FIGURES:
#     fig2.savefig(viz_path.joinpath("mae_vertical_secvertical_cent_agg.png"))

In [None]:
# IID vs Sec NonIID
# mse_reg = [_[1] for _ in vert_mses]
# mse_sec = [_[1] for _ in sec_vert_mses]
mse_perc = []
for mse_r, mse_s in zip(vert_mses, sec_vert_mses):
    mse_perc.append(
        abs((mse_s - mse_r)/mse_s) * 100
    )
display(mse_perc)
sum(mse_perc)/len(mse_perc)

### Per Client Metrics

In [None]:
# Create new indexes
old_index = models_df.index.droplevel(level=3)
old_index = old_index.set_levels(["mse"], level=0)
old_index2 = old_index.set_levels(["mae"], level=0)
new_index = pd.concat([old_index.to_frame(), old_index2.to_frame()])
new_index.columns = ["metric", "agg_type", "part_type"]
new_index = pd.MultiIndex.from_frame(new_index)
new_columns = pd.MultiIndex.from_product([["CLIENT_0","CLIENT_1", "CLIENT_2"],  list(range(0,10))])

client_metrics_df = pd.DataFrame(index=new_index, columns=new_columns)

# Populate table
for idx, row in models_df.iterrows():
    metrics = pd.read_csv(row["lclmetrics"])
    metrics_mse = metrics[metrics["METRIC"] == "mse"]
    metrics_mae = metrics[metrics["METRIC"] == "mae"]
    rounds = metrics[metrics["METRIC"] == "mse"]["ROUND"]
    
    metrics_mse = metrics_mse.drop(["METRIC", "ROUND"], axis=1)
    metrics_mae = metrics_mae.drop(["METRIC", "ROUND"], axis=1)

    
    
    metrics_mse.index = rounds
    metrics_mae.index = rounds

    agg_type, part_type = idx[1:3]
    client_metrics_df.loc["mse", agg_type, part_type] = metrics_mse.T.stack()
    client_metrics_df.loc["mae", agg_type, part_type] = metrics_mae.T.stack()
    
client_metrics_df.T

In [None]:
def addlabels_clients(y, fig = None):
    fig = plt if not fig else fig
    for idx, val in y.items():
        fig.text(idx, val, f'{val:.3f}', 
                 ha = 'center',
                 bbox = dict(facecolor = 'white', alpha=.5, linewidth=0.2)
                )

def visualize_per_client_mse(metrics_path: Path, model_name: str):
    metrics = pd.read_csv(metrics_path)
    metrics_mse = metrics[metrics["METRIC"] == "mse"]
    metrics_mse.set_index('ROUND', inplace=True)
    metrics_mae = metrics[metrics["METRIC"] == "mae"]
    metrics_mae.set_index('ROUND', inplace=True)
    
    metrics_mse = metrics_mse.drop(["METRIC"], axis=1)
    metrics_mae = metrics_mae.drop(["METRIC"], axis=1)

    # plot per client losses
    fig, axes = plt.subplots(1, 1, figsize=(10, 4), dpi=150)
    
    for col_name in metrics_mse.columns:
        axes.plot(metrics_mse[col_name], label=col_name)
        addlabels_clients(metrics_mse[col_name], fig=axes)
    axes.set_xlabel('Round')
    axes.set_ylabel('MSE')
    axes.set_xticks(rounds)
    axes.grid(which='both', color='lightgrey', alpha=0.5)
    axes.legend()
    
    

    
    
    # fig.suptitle(f'Client Based Metrics for {model_name}', fontsize=14)
    fig.tight_layout()
    plt.show()
    return fig


for name, value in [(name,value) for name, value  in vars().items() if name.endswith("_cmetrics")]:
    model_name = "_".join(name.split("_")[:-2])
    fig = visualize_per_client_mse(value, model_name)
    if SAVE_FIGURES:
        fig.savefig(viz_path.joinpath(f"client_based_mse_fl_{model_name}.png"))
    
    
# visualize_per_client_mse(noniid_model_cmetrics, noniid_model_name)
# visualize_per_client_mse(sec_noniid_model_cmetrics, sec_noniid_model_name)
# visualize_per_client_mse(iid_model_cmetrics, iid_model_name)
# visualize_per_client_mse(sec_iid_model_cmetrics, sec_iid_model_name)
# visualize_per_client_mse(vertical_model_cmetrics, vertical_model_name)
# visualize_per_client_mse(sec_vertical_model_cmetrics, sec_vertical_model_name)

### Times Tables

In [None]:
times_path = ExpConfig.SAVE_DIR.joinpath("times.csv")
times_df = pd.read_csv(times_path)
times_df['EXP_NAME'] = times_df['EXP_NAME'].apply(lambda x: x[:-36])
times_df = times_df.set_index('EXP_NAME').map(lambda x: float(x[:-1]))
times_df = times_df.sort_index()
complete_times = times_df["COMPLETE_TIME"]
times_df = times_df.drop("COMPLETE_TIME", axis=1)
times_df

In [None]:
import plotly.express as px

fig = go.Figure(data=[
    go.Bar(name='Regular Aggregation', x=["IID", "NonIID", "Vertical"], y=complete_times.values[:3], text=complete_times.values[:3], marker_color='rgb(107,174,214)'),
    go.Bar(name='Secure Aggregation', x=["IID", "NonIID", "Vertical"], y=complete_times.values[3:], text=complete_times.values[3:], marker_color='rgb(33,113,181)')
])
# Change the bar mode
fig.update_layout(
    barmode='group', 
    width=800, 
    height=500,
    margin=dict(l=20, r=20, t=20, b=20),
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)
fig.show()

In [None]:
perc = []
for reg_time, sec_time in zip( times_df.loc["fl_regular_noniid"] , times_df.loc["fl_secure_noniid"]):
    perc.append(
        (sec_time - reg_time)/reg_time  * 100
    )
sum(perc)/len(perc)

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=3, cols=2, specs=[[{"type": "domain"} for _ in range(2)] for _ in range(3)], horizontal_spacing=0.2)

for i, (exp_name, row) in enumerate(times_df.iterrows()):
    fig.add_pie(
        labels=row.index, values=row.values, 
        title=exp_name,
        textinfo='value',
        insidetextorientation='radial',
        marker={"colors":px.colors.sequential.Blues_r},
        sort=False,
        row=(i%3)+1,
        col=(i//3) + 1,
    )
    
fig.update_layout(height=800, width=500, title_text="Per Round Times", margin=dict(l=20, r=20, t=35, b=35), legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.17,
    xanchor = "center",
    x = 0.5
))
fig.show()
    

In [None]:
times_df