In [None]:
from matplotlib import pyplot as plt
from fed_algo import FedAvg, FedAdam, FedAdagrad, FedYogi, FedAlgo, ClientParam, Layers
import pandas as pd
import numpy as np
from typing import Tuple, List
from utils import read_chunk, select_features_name, read_raw_dataset, train_with_data
from model import gru

# pokemon


In [None]:
X_raw, y = read_raw_dataset("datasets/pokemon.csv",
                            index_col='id',
                            target_name='MOS',
                            drop=['user_id'])
features = select_features_name(X_raw, y)
X_test, y_test = read_chunk('pokemon', '-test', features)
X_init, y_init = read_chunk('pokemon', '-init', features)
result = {
    'num_clients': [5.0, 20.0, 50.0, 100.0, 150.0]
}

init models


In [None]:
central_model = gru()
central_model.fit(X_init,
                  y_init,
                  validation_split=0.05,
                  batch_size=128,
                  epochs=50,
                  shuffle=True)
                  
init_weights = central_model.get_weights()

fed_avg = FedAvg(gru(), init_weights, X_test, y_test)


In [None]:
datasets = [
    read_chunk('pokemon', chunk_id + 1, features) for chunk_id in range(5)
]
X_central = pd.concat([X for X, _ in datasets], axis=0)
y_central = pd.concat([y for _, y in datasets], axis=0)

In [None]:
# centralized training with data of 5 client
central_model.fit(X_central,
                  y_central,
                  validation_split=0.05,
                  batch_size=128,
                  epochs=50,
                  shuffle=True)

# federated training
local_models = [
    train_with_data(gru(), init_weights, dataset) for dataset in datasets
]
fed_avg.aggregate(local_models)
central_mse = central_model.evaluate(X_test, y_test)[0]
fed_mse = fed_avg.predict()


In [None]:
result['central'] = [central_mse]
result[fed_avg.name()] = [fed_mse]

In [None]:
num_clients = [20, 50, 100, 150]
for i, num_client in enumerate(num_clients):

    num_client_this_round = 15 if i == 0 else num_client - num_clients[i - 1]

    datasets = [
        read_chunk('pokemon',
                   chunk_id + 1 + num_client - num_client_this_round, features)
        for chunk_id in range(num_client_this_round)
    ]
    X_central = pd.concat([X_central] + [X for X, _ in datasets], axis=0)
    y_central = pd.concat([y_central] + [y for _, y in datasets], axis=0)

    # centralized training
    central_model.set_weights(init_weights)
    central_model.fit(X_central,
                      y_central,
                      validation_split=0.05,
                      batch_size=128,
                      epochs=50,
                      shuffle=True)

    # federated training

    local_models = [
        train_with_data(gru(), fed_avg.get_weights(), dataset)
        for dataset in datasets
    ]
    fed_avg.aggregate(local_models)

    central_mse = central_model.evaluate(X_test, y_test)[0]
    fed_mse = fed_avg.predict()

    result['central'].append(central_mse)
    result[fed_avg.name()].append(fed_mse)

In [None]:
import json

json_result = json.dumps(result, indent=4)
print(json_result)