In [None]:
from matplotlib import pyplot as plt
from fed_algo import FedAvg, FedAdam, FedAdagrad, FedYogi, FedAlgo, ClientParam, Layers
import pandas as pd
import numpy as np
from typing import Tuple, List
from utils import read_chunk, select_features_name, read_raw_dataset, train_with_data
from model import gru

FED_OTP_RESULT = Tuple[Layers, Layers, Layers]

# pokemon


In [None]:
X_raw, y = read_raw_dataset("datasets/pokemon.csv",
                            index_col='id',
                            target_name='MOS',
                            drop=['user_id'])
features = select_features_name(X_raw, y)
X_test, y_test = read_chunk('pokemon', 0, features)
result = {
    'num_clients': [5.0, 20.0, 50.0, 100.0, 150.0]
}

## 5 clients


init models


In [None]:
central_model = gru()
init_weights = np.zeros_like(central_model.get_weights())

fed_algos: List[FedAlgo] = [
    FedAvg(init_weights, X_test, y_test),
    FedAdam(init_weights, X_test, y_test),
    FedAdagrad(init_weights, X_test, y_test),
    FedYogi(init_weights, X_test, y_test)
]

In [None]:
datasets = [
    read_chunk('pokemon', chunk_id + 1, features) for chunk_id in range(5)
]
X_central_raw = pd.concat([X for X, _ in datasets], axis=0)
y_central = pd.concat([y for _, y in datasets], axis=0)
X_central = X_central_raw[features]

In [None]:
# centralized training with data of 5 client
central_model.fit(X_central, y_central)

# federated training
local_models = [
    train_with_data(gru(), init_weights, dataset) for dataset in datasets
]
for algo in fed_algos:
    algo.aggregate(local_models)
central_mse = central_model.evaluate(X_test, y_test)[0]
fed_mses = [algo.predict() for algo in fed_algos]


In [None]:
result['central'] = [central_mse]
for algo, mse in zip(fed_algos, fed_mses):
    result[algo.name()] = [mse]

In [None]:
num_clients = [20, 50, 100, 150]
for i, num_client in enumerate(num_clients):

    num_client_this_round = 15 if i == 0 else num_client - num_clients[i - 1]

    datasets = [
        read_chunk('pokemon', chunk_id + 1 + num_client - num_client_this_round,
                   features) for chunk_id in range(num_client_this_round)
    ]
    X_central_raw = pd.concat([X_central_raw] + [X for X, _ in datasets],
                              axis=0)
    y_central = pd.concat([y_central] + [y for _, y in datasets], axis=0)
    X_central = X_central_raw[features]
    # centralized training
    central_model.set_weights(init_weights)
    central_model.fit(X_central, y_central)

    # federated training
    for algo in fed_algos:
        local_models = [
            train_with_data(gru(), algo.get_weights(), dataset)
            for dataset in datasets
        ]
        algo.aggregate(local_models)

    central_mse = central_model.evaluate(X_test, y_test)[0]
    fed_mses = [algo.predict() for algo in fed_algos]

    result['central'].append(central_mse)
    for algo, mse in zip(fed_algos, fed_mses):
        result[algo.name()].append(mse)

In [None]:
import json

json_result = json.dumps(result, indent=4)
print(json_result)