In [1]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from mozfldp.simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

## Client Update Example

In [2]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]

coefs = np.array([29., 0., 0.]) # should be of size num_classes * num_features
intercepts = np.array([-9])
weights = [coefs, intercepts]

epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

[array([[28.48292577,  0.        ,  0.        ]]), array([-9.])]


## Server Update Example

In [4]:
import numpy as np
from mozfldp.simulation_util import server_update

init_weights = [np.array([0, 0, 0]), np.array([0])]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size, display_weight_per_round)

Updated Weights:  [[ 9.21039435  3.42478697 10.5526031 ]] [-0.50832144]
Updated Weights:  [[-12.39387656  -2.63282931  10.92549293]] [-30.23488422]
Updated Weights:  [[-0.39884221  3.05007116  1.63993618]] [6.87589235]
Updated Weights:  [[-10.58692427   7.21644198   4.20351571]] [-3.3782652]
Updated Weights:  [[1.68096538 4.17314952 3.80210084]] [-27.61753893]
Updated Weights:  [[1.86070607 2.39630708 5.12672219]] [-3.18491918]
Updated Weights:  [[ -3.83699408 -14.42908596  -3.41825447]] [-13.95188863]
Updated Weights:  [[ 7.77314372 -6.5005065  -4.40597068]] [-38.14942825]
Updated Weights:  [[10.63308565 -1.27519038  3.92540565]] [-14.12513733]
Updated Weights:  [[0.15083477 1.50987645 6.76575981]] [-18.83791491]


## Simulation Runner

In [6]:
from sklearn.model_selection import ParameterGrid, train_test_split
from mozfldp.simulation_util import server_update
import numpy as np
import mozfldp.random_data_gen as rdata_gen
import pandas as pd

# Load the data
NUM_SAMPLES = 20000
NUM_LABELS = 3
NUM_FEATURES = 4
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/blob_S20000_L3_F4_U100.csv")

sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
features = np.array(sim_features)
labels = np.array(sim_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.4, random_state=0)

init_weights = np.zeros((NUM_LABELS, NUM_FEATURES), dtype=np.float64, order="C")
init_intercept = np.zeros(NUM_LABELS, dtype=np.float64, order="C")

# Find all the permutations of the parameters
param_grid = {"client_fraction": [1, 0.1],
              "epoch": [1, 5],
              "batch_size": [40], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]],
              "num_rounds": [10]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print("Params: ", params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Weights: {}\nScore: {:f}\n\n'.format(weights, score))

Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 1, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[ 14.57954214,   5.17006556,  31.56363127,  -9.96949121],
       [ -4.69985766,   1.515726  ,  11.90157783, -16.37977076],
       [  9.99352147,  22.11192303,  13.81040962, -16.5691806 ]]), array([-259.50851682, -251.06849701, -276.58063764])]
Score: 0.328250


Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 5, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[ 14.93429043,   6.54932159,  27.45016802, -12.41496433],
       [  3.38199928,  18.17709668,  23.4173894 ,  -9.74048276],
       [  1.66314514,  22.51783106,  18.47015954, -14.83775359]]), array([-263.28207476, -281.25648769, -282.83460913])]
Score: 0.338250


Training...
Params:  {'batch