In [185]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Client Update Example

In [186]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]

coefs = np.array([29., 0., 0.]) # should be of size num_classes * num_features
intercepts = np.array([-9])
weights = [coefs, intercepts]

epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

[array([[28.48292577,  0.        ,  0.        ]]), array([-9.])]


## Server Update Example

In [187]:
import numpy as np
from simulation_util import server_update

init_weights = [np.array([0, 0, 0]), np.array([0])]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size, display_weight_per_round)

Updated Weights:  [[-1.88998529 -7.77928383  0.1247547 ]] [8.06948321]
Updated Weights:  [[-3.14658285  2.60053666  5.43189638]] [8.60357767]
Updated Weights:  [[-1.28140522  5.42823545  1.89898726]] [8.43339149]
Updated Weights:  [[-3.74997627 10.91294467  1.0971158 ]] [5.33885579]
Updated Weights:  [[4.42919342 6.61835419 8.26757996]] [9.09195967]
Updated Weights:  [[-8.58616465  1.53515246 -1.22986693]] [6.34834551]
Updated Weights:  [[ 6.09787979 -2.36450218 -2.20511742]] [16.86760162]
Updated Weights:  [[-5.79978775 -4.49429409  1.88039804]] [7.42129726]
Updated Weights:  [[7.07518744 6.76691657 3.90897526]] [9.50596262]
Updated Weights:  [[-3.3246793   7.09700542  5.48971927]] [5.91119309]


## Simulation Runner

In [198]:
from sklearn.model_selection import ParameterGrid, train_test_split
from simulation_util import server_update
import numpy as np
import random_data_gen as rdata_gen
import pandas as pd

# Load the data
NUM_SAMPLES = 20000
NUM_LABELS = 3
NUM_FEATURES = 4
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/blob_S20000_L3_F4_U100.csv")

sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
features = np.array(sim_features)
labels = np.array(sim_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.4, random_state=0)

init_weights = np.zeros((NUM_LABELS, NUM_FEATURES), dtype=np.float64, order="C")
init_intercept = np.zeros(NUM_LABELS, dtype=np.float64, order="C")

# Find all the permutations of the parameters
param_grid = {"client_fraction": [1, 0.1],
              "epoch": [1, 5],
              "batch_size": [40], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]],
              "num_rounds": [10]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print("Params: ", params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Weights: {}\nScore: {:f}\n\n'.format(weights, score))

Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 1, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[  8.77069867,  -2.63840591,  31.31750977,  -9.57683456],
       [ 22.80217526,  10.64899234,  16.78286   ,  -7.34525358],
       [ -5.04143886,   7.95329319,  15.32743645, -16.63465208]]), array([-238.35135925, -256.52792777, -265.14371203])]
Score: 0.343625


Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 5, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[ 10.47819007,  10.47905525,  26.27432185, -12.67122868],
       [  5.68969177,  17.33458831,  13.31253131, -12.84603441],
       [  5.86914394,  27.51810474,   2.03090646,  -4.10592691]]), array([-287.22822838, -274.64129043, -268.71344802])]
Score: 0.323625


Training...
Params:  {'batch