In [1]:
import tensorflow as tf
print(tf.__version__)
import sys
print('Python version')
print(sys.version)


2.2.0
Python version
3.6.9 (default, Oct  8 2020, 12:12:24) 
[GCC 8.4.0]


In [2]:

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing import image

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import Activation, Dense, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD

from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split


from matplotlib import pyplot

import numpy as np
import pandas as pd

In [3]:
# load dataset
(trainX, trainy), (testX, testy) = cifar100.load_data(label_mode="fine")

In [4]:
trainX.shape, trainy.shape,testX.shape,testy.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [5]:
# Info on y labels. 
trainy_label=pd.Series(trainy.ravel())
trainy_label.unique(),trainy_label.value_counts()

(array([19, 29,  0, 11,  1, 86, 90, 28, 23, 31, 39, 96, 82, 17, 71,  8, 97,
        80, 74, 59, 70, 87, 84, 64, 52, 42, 47, 65, 21, 22, 81, 24, 78, 45,
        49, 56, 76, 89, 73, 14,  9,  6, 20, 98, 36, 55, 72, 43, 51, 35, 83,
        33, 27, 53, 92, 50, 15, 18, 46, 75, 38, 66, 77, 69, 95, 99, 93,  4,
        61, 94, 68, 34, 32, 88, 67, 30, 62, 63, 40, 26, 48, 79, 85, 54, 44,
         7, 12,  2, 41, 37, 13, 25, 10, 57,  5, 60, 91,  3, 58, 16]),
 95    500
 74    500
 39    500
 71    500
 8     500
      ... 
 85    500
 22    500
 54    500
 86    500
 0     500
 Length: 100, dtype: int64)

In [6]:
# Separate out three classes out of 100
def sep_class(trainx,trainy,y):
    class_x=[]
    for i in range(len(trainx)):
        if trainy[i]==y:
           #Normalise by 255
            class_x.append(trainx[i]/255)
        y_label=y*np.ones((len(class_x),1))
    return (np.array(class_x),np.array(y_label))

In [7]:
#Separate out three classes out of 100   
trainX0,trainy0=sep_class(trainX,trainy,0)
testX0,testy0=sep_class(testX,testy,0)

#Separate out one classes out of 100   
trainX1,trainy1=sep_class(trainX,trainy,1)
testX1,testy1=sep_class(testX,testy,1)

#Separate out 2 class out of 100   
trainX2,trainy2=sep_class(trainX,trainy,2)
testX2,testy2=sep_class(testX,testy,2)

# ----combining training and test data together---
dataX=np.concatenate((trainX0,testX0,trainX1,testX1,trainX2,testX2)) # concatenating both train and test togther
datay=np.concatenate((trainy0,testy0,trainy1,testy1,trainy2,testy2))

# training samples
dataXshuffle=list(zip(dataX,datay))
np.random.shuffle(dataXshuffle)

dataXn,datayn=zip(*dataXshuffle)
dataXn=np.array(dataXn)
datayn=np.array(datayn)
dataXn.shape, datayn.shape

((1800, 32, 32, 3), (1800, 1))

In [8]:
#split data into training and test set
trainXt,testXt,trainy,testy = train_test_split(dataXn,datayn,test_size=0.1,random_state=42)
print(trainXt.shape,testXt.shape)
trainX=trainXt.reshape(-1,32*32*3)
testX=testXt.reshape(-1,32*32*3)
print(trainX.shape,testX.shape, trainy.shape, testy.shape)

(1620, 32, 32, 3) (180, 32, 32, 3)
(1620, 3072) (180, 3072) (1620, 1) (180, 1)


In [9]:
trainy=keras.utils.to_categorical(trainy,3)
testy=keras.utils.to_categorical(testy,3)


In [10]:

test_batched = tf.data.Dataset.from_tensor_slices((list(testX),list(testy))).batch(len(testy))


In [11]:
def create_clients(image_list, label_list, num_clients=3, initial='clients'):
    ''' return: a dictionary with keys clients' names and value as 
                data shards - tuple of images and label lists.
        args: 
            image_list: a list of numpy arrays of training images
            label_list:a list of binarized labels for each image
            num_client: number of fedrated members (clients)
            initials: the clients'name prefix, e.g, clients_1 
            
    '''

    #create a list of client names
    client_names = ['{}_{}'.format(initial, i+1) for i in range(num_clients)]

    #randomize the data
    data = list(zip(image_list, label_list))
    np.random.shuffle(data)

    #shard data and place at each client
    size = len(data)//num_clients
    shards = [data[i:i + size] for i in range(0, size*num_clients, size)]

    #number of clients must equal number of shards
    assert(len(shards) == len(client_names))
    #clientN={'client_names[0]': shards[0]}
    #return clientN
    return {client_names[i] : shards[i] for i in range(len(client_names))} 

In [12]:
#create clients
clients = create_clients(trainX,trainy, num_clients=3, initial='client')
clients.keys()

dict_keys(['client_1', 'client_2', 'client_3'])

In [13]:
for key, value in clients.items():
    print('key:',key,len(value))

key: client_1 540
key: client_2 540
key: client_3 540


In [14]:
def batch_data(data_shard, bs=24):
    '''Takes in a clients data shard and create a tfds object off it
    args:
        shard: a data, label constituting a client's data shard
        bs:batch size
    return:
        tfds object'''
    #seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    return dataset.shuffle(len(label)).batch(bs)




In [15]:
clients_batched = dict()
for (client_name, data) in clients.items():
    clients_batched[client_name] = batch_data(data)

In [16]:
clients_batched.items()

dict_items([('client_1', <BatchDataset shapes: ((None, 3072), (None, 3)), types: (tf.float64, tf.float32)>), ('client_2', <BatchDataset shapes: ((None, 3072), (None, 3)), types: (tf.float64, tf.float32)>), ('client_3', <BatchDataset shapes: ((None, 3072), (None, 3)), types: (tf.float64, tf.float32)>)])

In [17]:
clients_batched['client_1'].element_spec


(TensorSpec(shape=(None, 3072), dtype=tf.float64, name=None),
 TensorSpec(shape=(None, 3), dtype=tf.float32, name=None))

In [18]:
from tensorflow.keras.optimizers import SGD
class SimpleMLP:
    @staticmethod
    def build(shape, classes):
        model = Sequential()
        model.add(Dense(500, input_shape=(shape,)))
        model.add(Activation("relu"))
        model.add(Dense(300))
        model.add(Activation("relu"))
        model.add(Dense(classes))
        model.add(Activation("softmax"))
        return model

In [19]:
lr = 0.01 
comms_round = 25
loss='categorical_crossentropy'
metrics = ['accuracy']
optimizer = SGD(lr=lr, 
                decay=lr / comms_round, 
                momentum=0.9
               )        

In [20]:

def weight_scalling_factor(clients_trn_data, client_name):
    client_names = list(clients_trn_data.keys())
    #get the bs
    bs = list(clients_trn_data[client_name])[0][0].shape[0]
    #first calculate the total training data points across clinets
    global_count = sum([tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy() for client_name in client_names])*bs
    # get the total number of data points held by a client
    local_count = tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy()*bs
    return local_count/global_count


In [21]:
def scale_model_weights(weight, scalar):
    '''function for scaling a models weights'''
    weight_final = []
    steps = len(weight)
    for i in range(steps):
        weight_final.append(scalar * weight[i])
    return weight_final

In [22]:
def sum_scaled_weights(scaled_weight_list):
    '''Return the sum of the listed scaled weights. The is equivalent to scaled avg of the weights'''
    avg_grad = list()
    #get the average grad accross all client gradients
    for grad_list_tuple in zip(*scaled_weight_list):
        layer_mean = tf.math.reduce_sum(grad_list_tuple, axis=0)        
        avg_grad.append(layer_mean)
        
    return avg_grad


In [23]:

def test_model(X_test, Y_test,  model, comm_round):
    cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    #logits = model.predict(X_test, batch_size=100)
    logits = model.predict(X_test)
    loss = cce(Y_test, logits)
    acc = accuracy_score(tf.argmax(logits, axis=1), tf.argmax(Y_test, axis=1))
    print('comm_round: {} | global_acc: {:.3%} | global_loss: {}'.format(comm_round, acc, loss))
    return acc, loss

In [24]:
#initialize global model
smlp_global = SimpleMLP()
global_model = smlp_global.build(32*32*3, 3)
        
#commence global training loop
for comm_round in range(comms_round):
            
    # get the global model's weights - will serve as the initial weights for all local models
    global_weights = global_model.get_weights()
    #print(len(global_weights))
    
    #initial list to collect local model weights after scalling
    scaled_local_weight_list = list()

    #randomize client data - using keys
    client_names= list(clients_batched.keys())
    #print(client_names)
    np.random.shuffle(client_names)
    
    #loop through each client and create new local model
    for client in client_names:
        smlp_local = SimpleMLP()
        local_model = smlp_local.build(32*32*3, 3)
        local_model.compile(loss=loss, 
                      optimizer=optimizer, 
                      metrics=metrics)
        
        #set local model weight to the weight of the global model
        local_model.set_weights(global_weights)
        
        #fit local model with client's data
        local_model.fit(clients_batched[client], epochs=1, verbose=0)
        
        #scale the model weights and add to list
        scaling_factor = weight_scalling_factor(clients_batched, client)
        scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
        scaled_local_weight_list.append(scaled_weights)
        from tensorflow.keras import backend as K
        #clear session to free memory after each communication round
        K.clear_session()
        
    #to get the average over all the local model, we simply take the sum of the scaled weights
    average_weights = sum_scaled_weights(scaled_local_weight_list)
    
    #update global model 
    global_model.set_weights(average_weights)

    #test global model and print out metrics after each communications round
    for(X_test, Y_test) in test_batched:
        global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round)

comm_round: 0 | global_acc: 65.000% | global_loss: 0.9423385858535767
comm_round: 1 | global_acc: 70.556% | global_loss: 0.8979196548461914
comm_round: 2 | global_acc: 70.000% | global_loss: 0.8777601718902588
comm_round: 3 | global_acc: 70.000% | global_loss: 0.8953971862792969
comm_round: 4 | global_acc: 72.778% | global_loss: 0.8568248152732849
comm_round: 5 | global_acc: 66.667% | global_loss: 0.8861738443374634
comm_round: 6 | global_acc: 71.111% | global_loss: 0.8562011122703552
comm_round: 7 | global_acc: 75.000% | global_loss: 0.8331377506256104
comm_round: 8 | global_acc: 74.444% | global_loss: 0.8322610855102539
comm_round: 9 | global_acc: 76.667% | global_loss: 0.8255817890167236
comm_round: 10 | global_acc: 73.889% | global_loss: 0.83018559217453
comm_round: 11 | global_acc: 75.000% | global_loss: 0.819553792476654
comm_round: 12 | global_acc: 75.000% | global_loss: 0.8259878158569336
comm_round: 13 | global_acc: 78.889% | global_loss: 0.8108406662940979
comm_round: 14 | gl