In [62]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

## Client Update Example

In [63]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]
weights = [29., 0., 0., -9]
epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

[28.48292577  0.          0.         -9.        ]


## Server Update Example

In [None]:
import numpy as np
from simulation_util import server_update

init_weights = [0, 0, 0, 0]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size,display_weight_per_round)

## Simulation Runner

In [167]:
from sklearn.model_selection import ParameterGrid, train_test_split
from simulation_util import server_update
import numpy as np
import random_data_gen as rdata_gen
import pandas as pd



# Load the data
num_client = 100
samples_per_client = 30
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

NUM_SAMPLES = 20000
NUM_LABELS = 3
NUM_FEATURES = 4
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/blob_S20000_L3_F4_U100.csv")

sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
print(np.array(sim_labels).shape, labels.shape)
print(np.array(sim_feats).shape, features.shape)

labels = np.array(sim_labels)
features = np.array(sim_features)

# print(sim_feats[0][0])
# print(features[0][0])


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.4, random_state=0)

print(X_train.shape, y_train.shape)
print(X_train[1][0], y_train[1][0])

# init_weights = np.array([[0,0,0,0], [0,0,0,0], [0,0,0,0]])
init_weights = np.zeros((1, 3), dtype=np.float64, order="C")
init_intercept = np.zeros(1, dtype=np.float64, order="C")

print(init_weights.shape, init_intercept.shape)

# print([init_weights[0], init_intercept])
# print(np.append(init_weights, init_intercept))

# Find all the permutations of the parameters
param_grid = {"client_fraction": [1, 0.1],
              "epoch": [1, 5],
              "batch_size": [40], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]], # TODO: need to generate this based on the input data
              "num_rounds": [10]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print(params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Params: {}\nWeights: {}\nScore: {:f}\n\n'.format(params, weights, score))

(100, 200) (100, 30)
(100, 200, 4) (100, 30, 3)
(60, 30, 3) (60, 30)
[2 6 3] 0
(1, 3) (1,)
Training...
{'batch_size': 40, 'client_fraction': 1, 'epoch': 1, 'init_weight': [array([[0., 0., 0.]]), array([0.])], 'num_rounds': 10}
Old:  [array([0., 0., 0.])] [0.0]
New:  [[0.80984775 9.07029478 4.85908649]] [16.88919077]
New:  [[-10.55163145  -0.58837918   6.98499586]] [17.30944317]
New:  [[ 14.21339829 -11.4231851    0.14060013]] [28.46350781]
New:  [[ 6.18644467 -3.1536397  -1.4831945 ]] [25.51813433]
New:  [[-10.83875132  -8.24472244  -8.40464332]] [33.64306133]
New:  [[-9.71290499 -6.06071693  2.2064412 ]] [26.20185113]
New:  [[ 0.93632126 -7.17977604 -3.36485122]] [27.84422792]
New:  [[-7.83733242  6.47301951 -3.2667506 ]] [9.90248156]
New:  [[11.01765946 -2.46205395 10.75787121]] [16.3295543]
New:  [[-5.33854053 -5.62966495 -5.91469387]] [-11.93394531]
Params: {'batch_size': 40, 'client_fraction': 1, 'epoch': 1, 'init_weight': [array([[0., 0., 0.]]), array([0.])], 'num_rounds': 10}
We