# Federated MNIST Experiment

In [1]:
#Install dependencies if not already installed
!pip install tensorflow mnist

You should consider upgrading via the '/home/pfoley1/fl_venv/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import numpy as np
import mnist
import tensorflow      as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

import fledge.native as fx
from fledge.federated import FederatedModel,FederatedDataSet


After importing the required packages, the next step is setting up our fledge workspace. To do this, simply run the `fx.init()` command as follows:

In [3]:
#Setup default workspace, logging, etc.
fx.init('keras_cnn_mnist')

Creating Workspace Directories
Creating Workspace Certifications
Creating Workspace Templates
Successfully installed packages from /home/pfoley1/.local/workspace/requirements.txt.

New workspace directory structure:
workspace
├── plan
│   ├── plan.yaml
│   ├── cols.yaml
│   ├── data.yaml
│   ├── plans
│   │   └── default
│   └── defaults
│       ├── collaborator.yaml
│       ├── tasks_torch.yaml
│       ├── tasks_fast_estimator.yaml
│       ├── tasks_keras.yaml
│       ├── assigner.yaml
│       ├── task_runner.yaml
│       ├── tasks_tensorflow.yaml
│       ├── network.yaml
│       ├── aggregator.yaml
│       ├── defaults
│       └── data_loader.yaml
├── data
│   ├── MNIST
│   │   ├── processed
│   │   └── raw
│   ├── cifar-10-batches-py
│   │   ├── data_batch_1
│   │   ├── test_batch
│   │   ├── data_batch_3
│   │   ├── batches.meta
│   │   ├── data_batch_2
│   │   ├── data_batch_5
│   │   ├── data_batch_4
│   │   └── readme.html
│   └── cifar-10-python.tar.gz
├── save
│   ├── last.pbu

We can see the current plan settings by running the `fx.get_plan()` command:

In [None]:
#Get the current values of the plan. Each of these can be overridden
import json
print(json.dumps(fx.get_plan(), indent=4, sort_keys=True))

Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple fully connected model that is trained on the MNIST dataset. 

In [4]:
#Import training and validation images/labels
train_images = mnist.train_images()
train_labels = to_categorical(mnist.train_labels())
valid_images = mnist.test_images()
valid_labels = to_categorical(mnist.test_labels())

def preprocess(images):
    #Normalize
    images = (images / 255) - 0.5
    #Flatten
    images = images.reshape((-1, 784))
    return images

# Preprocess the images.
train_images = preprocess(train_images)
valid_images = preprocess(valid_images)

feature_shape = train_images.shape[1]
classes       = 10

fl_data = FederatedDataSet(train_images,train_labels,valid_images,valid_labels,batch_size=32,num_classes=classes)

def build_model(feature_shape,classes):
    #Defines the MNIST model
    model = Sequential()
    model.add(Dense(64, input_shape=feature_shape, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(classes, activation='softmax'))
    
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)
    return model

In [5]:

#Create a federated model using the build model function and dataset
fl_model = FederatedModel(build_model,data_loader=fl_data)

The `FederatedModel` object is a wrapper around your Keras, Tensorflow or PyTorch model that makes it compatible with fledge. It provides built in federated training and validation functions that we will see used below. Using it's `setup` function, collaborator models and datasets can be automatically defined for the experiment. 

In [6]:
collaborator_models = fl_model.setup(num_collaborators=2)
collaborators = {'one':collaborator_models[0],'two':collaborator_models[1]}#, 'three':collaborator_models[2]}

In [7]:
#Original MNIST dataset
print(f'Original training data size: {len(train_images)}')
print(f'Original validation data size: {len(valid_images)}\n')

#Collaborator one's data
print(f'Collaborator one\'s training data size: {len(collaborator_models[0].data_loader.X_train)}')
print(f'Collaborator one\'s validation data size: {len(collaborator_models[0].data_loader.X_valid)}\n')

#Collaborator two's data
print(f'Collaborator two\'s training data size: {len(collaborator_models[1].data_loader.X_train)}')
print(f'Collaborator two\'s validation data size: {len(collaborator_models[1].data_loader.X_valid)}\n')

#Collaborator three's data
#print(f'Collaborator three\'s training data size: {len(collaborator_models[2].data_loader.X_train)}')
#print(f'Collaborator three\'s validation data size: {len(collaborator_models[2].data_loader.X_valid)}')

Original training data size: 60000
Original validation data size: 10000

Collaborator one's training data size: 30000
Collaborator one's validation data size: 5000

Collaborator two's training data size: 30000
Collaborator two's validation data size: 5000



Any of the fields returned by `fx.get_plan()` can be overridden when running the experiment. Simply set the `override_config` parameter to the dictionary of fields you wish to change.

In [8]:
#Run experiment, return trained FederatedModel
final_fl_model = fx.run_experiment(collaborators,override_config={"aggregator.settings.rounds_to_train": 5})

In [9]:
#Save final model
final_fl_model.save_native('final_model')