<a href="https://colab.research.google.com/github/ssvakil/Machine-Learning-in-Python-Workshop/blob/master/Federated_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from keras.datasets import mnist
(train_X,train_y) , (test_X,test_Y) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## The accuracy of training on  centerilized learning

In [2]:
import numpy as np
from keras.utils import to_categorical
def prepare_data(X,Y):
    X = X.astype("float32") / 255
    X = np.expand_dims(X, -1)
    print("X:", X.shape)

    # convert class vectors to binary class matrices
    Y = to_categorical(Y, 10)
    return X,Y
from tensorflow.keras import Input,Sequential
from tensorflow.keras import layers
def create_model():
    model = Sequential(
    [
        Input(shape=(28,28,1)),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(10, activation="softmax"),
    ]
    )
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model


In [3]:
model = create_model()
model.fit(*prepare_data(train_X,train_y))
model.evaluate(*prepare_data(test_X,test_Y))

X: (60000, 28, 28, 1)
X: (10000, 28, 28, 1)


[0.05634487420320511, 0.9828000068664551]

# What if data is not gathered in  single location?

In [4]:
from sklearn.model_selection import train_test_split
def split_data(X,Y,split):
    parts = []
    part_len = len(X)//split
    for i in range(split):
        parts.append((X[i*part_len:(i+1)*part_len],Y[i*part_len:(i+1)*part_len]))
    return parts
splitted_data = split_data(train_X,train_y,3)

In [5]:
len(splitted_data[0][0])

20000

In [6]:
from matplotlib import pyplot
def showIMG(x):
    pyplot.imshow(x, cmap=pyplot.get_cmap('gray'))


In [7]:
models = []
for i in range(3):
    models.append(create_model())


In [8]:
for i in range(len(models)):
    models[i].fit(*prepare_data(splitted_data[i][0],splitted_data[i][1]))


X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)


In [9]:
for i in range(len(models)):
    models[i].evaluate(*prepare_data(test_X,test_Y))

X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)


## Even worse if data parts are none Independent and identically distributed

In [10]:
def split_data_nonIID(X,Y,split):
    parts = []
    numbers = [i for i in range(10)]
    part_len = len(numbers)//split
    for i in range(split):
        current_numbers = numbers[i*part_len:(i+1)*part_len]
        print(current_numbers)
        parts.append((np.array([x for x,y in zip(X,Y) if y in current_numbers]),
                      np.array([y for x,y in zip(X,Y) if y in current_numbers])))
    return parts
splitted_data = split_data_nonIID(train_X,train_y,3)

[0, 1, 2]
[3, 4, 5]
[6, 7, 8]


In [11]:
models = []
for i in range(3):
    models.append(create_model())


In [12]:
for i in range(len(models)):
    models[i].fit(*prepare_data(splitted_data[i][0],splitted_data[i][1]))


X: (18623, 28, 28, 1)
X: (17394, 28, 28, 1)
X: (18034, 28, 28, 1)


In [13]:
for i in range(len(models)):
    models[i].evaluate(*prepare_data(test_X,test_Y))

X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)


# Federated Learning

In [14]:
class Fed_Agent:
    def __init__(self,X,Y):
        self.local_X = X
        self.local_Y = Y
        self.create_local_model()
    def create_local_model(self):
        self.model = create_model()
    def local_train(self):
        self.model.fit(*prepare_data(self.local_X,self.local_Y))
    def evaluate(self,test_X,test_Y):
        self.model.evaluate(*prepare_data(test_X,test_Y))
    def get_local_weights(self):
        return self.model.get_weights()
    def set_local_weights(self,global_model):
        self.model.set_weights(global_model.get_weights())

class Aggregator:
    def Aggregate(self,agents):
        #Fed_AVG
        new_weights = []
        for layer in range(len(agents[0].get_local_weights())):
            layer_weights = []
            for agent in agents:
                layer_weights.append(agent.get_local_weights()[layer])
            layer_avg = np.average(layer_weights,axis=0)

            new_weights.append(layer_avg)
        global_model = create_model()
        global_model.set_weights(new_weights)
        return global_model


In [15]:
no = 3
aggregator = Aggregator()
splitted_data = split_data(train_X,train_y,no)
agents = [Fed_Agent(*data_part) for data_part in splitted_data]
for epoch in range(10):
    ### TRAIN
    print('*'*10,'TRAINING')
    for agent in agents:
        agent.local_train()
    print('*'*10,'EVALUATING BEFORE AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    global_model = aggregator.Aggregate(agents)
    for agent in agents:
        agent.set_local_weights(global_model)
    print('*'*10,'EVALUATING AFTER AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    

********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING B

# Federated learning on NonIID

In [16]:
no=5
splitted_data = split_data_nonIID(train_X,train_y,no)
aggregator = Aggregator()
agents = [Fed_Agent(*data_part) for data_part in splitted_data]
for epoch in range(10):
    ### TRAIN
    print('*'*10,'TRAINING')
    for agent in agents:
        agent.local_train()
    print('*'*10,'EVALUATING BEFORE AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    global_model = aggregator.Aggregate(agents)
    for agent in agents:
        agent.set_local_weights(global_model)
    print('*'*10,'EVALUATING AFTER AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    

[0, 1]
[2, 3]
[4, 5]
[6, 7]
[8, 9]
********** TRAINING
X: (12665, 28, 28, 1)
X: (12089, 28, 28, 1)
X: (11263, 28, 28, 1)
X: (12183, 28, 28, 1)
X: (11800, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (12665, 28, 28, 1)
X: (12089, 28, 28, 1)
X: (11263, 28, 28, 1)
X: (12183, 28, 28, 1)
X: (11800, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (12665, 28, 28, 1)
X: (12089, 28, 28, 1)
X: (11263, 28, 28, 1)
X: (12183, 28, 28, 1)

# Attacking federated learning

## Data Poisoing

In [17]:
class Attacker(Fed_Agent):
    def __init__(self,X,Y):
        self.local_X = X
        self.local_Y = np.array([(y+1)%10 for y in Y])
        self.create_local_model()

In [18]:
no=3
splitted_data = split_data(train_X,train_y,no)
aggregator = Aggregator()
agents = [Fed_Agent(*data_part) for data_part in splitted_data]
agents[-1] = Attacker(*splitted_data[-1])
for epoch in range(10):
    ### TRAIN
    print('*'*10,'TRAINING')
    for agent in agents:
        agent.local_train()
    print('*'*10,'EVALUATING BEFORE AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    global_model = aggregator.Aggregate(agents)
    for agent in agents:
        agent.set_local_weights(global_model)
    print('*'*10,'EVALUATING AFTER AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    

********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
********** EVALUATING B

## Model Poisoning

In [None]:
# aslan deta nadere har bar ye model tasadofi dorost mikone

In [19]:

class Attacker(Fed_Agent):
    def local_train(self):
        print('HAHA. I do nothing for traning')
    def get_local_weights(self):
        weights = super().get_local_weights()
        new_weights = [np.random.rand(*w.shape) for w in weights]
        return new_weights


In [20]:
no=3
splitted_data = split_data(train_X,train_y,no)
aggregator = Aggregator()
agents = [Fed_Agent(*data_part) for data_part in splitted_data]
agents[-1] = Attacker(*splitted_data[-1])
for epoch in range(10):
    ### TRAIN
    print('*'*10,'TRAINING')
    for agent in agents:
        agent.local_train()
    print('*'*10,'EVALUATING BEFORE AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    global_model = aggregator.Aggregate(agents)
    for agent in agents:
        agent.set_local_weights(global_model)
    print('*'*10,'EVALUATING AFTER AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    

********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
HAHA. I do nothing for traning
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
HAHA. I do nothing for traning
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
HAHA. I do nothing for traning
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
HAHA. I do nothing

## selfish attacker 

In [None]:
#hich data dar ekhtiar nadaram faghat tazahor mikonam

In [21]:


class Attacker(Fed_Agent):
    def __init__(self):
        self.create_local_model()
    def create_local_model(self):
        self.model = create_model()
    def local_train(self):
        print('.....IM TRAINING. wait.........')
    def evaluate(self,test_X,test_Y):
        self.model.evaluate(*prepare_data(test_X,test_Y))
    def get_local_weights(self):
        return self.model.get_weights()
    def set_local_weights(self,global_model):
        self.model.set_weights(global_model.get_weights())


In [22]:
no=3
splitted_data = split_data(train_X,train_y,no)
aggregator = Aggregator()
agents = [Fed_Agent(*data_part) for data_part in splitted_data]
agents.append(Attacker())
for epoch in range(10):
    ### TRAIN
    print('*'*10,'TRAINING')
    for agent in agents:
        agent.local_train()
    print('*'*10,'EVALUATING BEFORE AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    global_model = aggregator.Aggregate(agents)
    for agent in agents:
        agent.set_local_weights(global_model)
    print('*'*10,'EVALUATING AFTER AGREGATION')
    for agent in agents:
        agent.evaluate(test_X,test_Y)
    

********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
.....IM TRAINING. wait.........
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
.....IM TRAINING. wait.........
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** EVALUATING AFTER AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********** TRAINING
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
X: (20000, 28, 28, 1)
.....IM TRAINING. wait.........
********** EVALUATING BEFORE AGREGATION
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
X: (10000, 28, 28, 1)
********