In [1]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from mozfldp.simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

## Client Update Example

In [4]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]

coefs = np.array([29., 0., 0.]) # should be of size num_classes * num_features
intercepts = np.array([-9])
weights = [coefs, intercepts]

epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

[array([[28.48672566,  0.        ,  0.        ]]), array([-9.])]


## Server Update Example

In [5]:
import numpy as np
from mozfldp.simulation_util import server_update

init_weights = [np.array([0, 0, 0]), np.array([0])]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size, display_weight_per_round)

Updated Weights:  [[ 0.80400241 -3.12282041 -3.92707568]] [-8.0286598]
Updated Weights:  [[-7.28915493 -3.03298239 -1.92202299]] [40.74921299]
Updated Weights:  [[-2.45964505  2.7924037  -1.22915393]] [25.20736215]
Updated Weights:  [[13.6692742   1.48275331 -1.84787059]] [-9.19809708]
Updated Weights:  [[-4.91304623  8.41655083  4.96001876]] [-0.88102296]
Updated Weights:  [[ 6.52044623 -3.09614447  0.01755993]] [19.97969872]
Updated Weights:  [[  6.22102608   3.24009844 -10.68396063]] [-19.16314781]
Updated Weights:  [[ 3.0790037  -2.42007241  2.84207234]] [19.33979612]
Updated Weights:  [[-6.99879558  3.1977462   5.9107004 ]] [13.83269944]
Updated Weights:  [[0.98751793 4.26986506 6.28814264]] [5.99900549]


## Simulation Runner

In [6]:
from sklearn.model_selection import ParameterGrid, train_test_split
from mozfldp.simulation_util import server_update
import numpy as np
import mozfldp.random_data_gen as rdata_gen
import pandas as pd

# Load the data
NUM_SAMPLES = 20000
NUM_LABELS = 3
NUM_FEATURES = 4
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/blob_S20000_L3_F4_U100.csv")

sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
features = np.array(sim_features)
labels = np.array(sim_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.4, random_state=0)

init_weights = np.zeros((NUM_LABELS, NUM_FEATURES), dtype=np.float64, order="C")
init_intercept = np.zeros(NUM_LABELS, dtype=np.float64, order="C")

# Find all the permutations of the parameters
param_grid = {"client_fraction": [1, 0.1],
              "epoch": [1, 5],
              "batch_size": [40], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]],
              "num_rounds": [10]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print("Params: ", params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Weights: {}\nScore: {:f}\n\n'.format(weights, score))

Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 1, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[ 16.34326005,  -0.50580066,  30.56774855,  -4.35001733],
       [ 10.85435969,  21.46259506,   4.04081311, -12.78816112],
       [-13.84453399,  27.72390721,   4.06205591, -11.13830017]]), array([-202.26483295, -223.30841626, -214.60556442])]
Score: 0.342000


Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 5, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[  3.179614  ,   1.99292087,  32.9183223 ,  -9.58484057],
       [  6.30625111,  16.53126575,   3.2083592 ,  -8.19112142],
       [-17.55541852,  22.99300796,  -1.90997027,  -4.803823  ]]), array([-209.62041684, -218.74500102, -217.03806768])]
Score: 0.340750


Training...
Params:  {'batch