# Federated MXNex Landmarks Tutorial


In [None]:
# Install dependencies if not already installed
!pip install -r requirements.txt

## Connect to the Federation

In [None]:
# Create a federation
from openfl.interface.interactive_api.federation import Federation

# please use the same identificator that was used in signed certificate
client_id = "api"
cert_dir = "cert"
director_node_fqdn = "localhost"
# 1) Run with API layer - Director mTLS
# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface
# cert_chain = f'{cert_dir}/root_ca.crt'
# api_certificate = f'{cert_dir}/{client_id}.crt'
# api_private_key = f'{cert_dir}/{client_id}.key'

# federation = Federation(client_id=client_id,
#                         director_node_fqdn=director_node_fqdn,
#                         director_port='50051',
#                         cert_chain=cert_chain,
#                         api_cert=api_certificate,
#                         api_private_key=api_private_key)

# --------------------------------------------------------------------------------------------------------------------

# 2) Run with TLS disabled (trusted environment)
# Federation can also determine local fqdn automatically
federation = Federation(
    client_id=client_id,
    director_node_fqdn=director_node_fqdn,
    director_port="50051",
    tls=False,
)

In [None]:
shard_registry = federation.get_shard_registry()
shard_registry

In [None]:
federation.target_shape

In [None]:
# First, request a dummy_shard_desc that holds information about the federated dataset
dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)
dummy_shard_dataset = dummy_shard_desc.get_dataset("train")
sample, target = dummy_shard_dataset[0]
f"Sample shape: {sample.shape}, target shape: {target.shape}"

## Describing FL experimen

In [None]:
from openfl.interface.interactive_api.experiment import (
    DataInterface,
    FLExperiment,
    ModelInterface,
    TaskInterface,
)

In [None]:
import mxnet as mx
import numpy as np
import pandas as pd
import tqdm
from matplotlib import pyplot as plt
from mxnet.gluon import data as gdata
from mxnet.gluon import loss as gloss
from mxnet.gluon import nn

### Describe a model and optimizer

In [None]:
"""
MXNet model definition
"""
model = nn.Sequential()
model.add(
    nn.Conv2D(channels=64, kernel_size=3, padding=1, activation="relu"),
    nn.BatchNorm(),
    nn.MaxPool2D(),
    nn.Conv2D(channels=128, kernel_size=3, padding=1, activation="relu"),
    nn.BatchNorm(),
    nn.MaxPool2D(),
    nn.Conv2D(channels=256, kernel_size=3, padding=1, activation="relu"),
    nn.BatchNorm(),
    nn.MaxPool2D(),
    nn.Flatten(),
    nn.Dense(64),
    nn.Activation("relu"),
    nn.Dropout(rate=0.005),
    nn.Dense(30),
)

model.initialize(force_reinit=True, ctx=None, init=mx.init.Xavier())
model(
    mx.nd.ones((1, 1, 96, 96), ctx=None)
)  # first forward pass for weight initialization

In [None]:
# optimizer
optimizer = mx.optimizer.Adam(learning_rate=0.001)
trainer = mx.gluon.Trainer(model.collect_params(), optimizer=optimizer)
# loss function
loss_fn = gloss.L2Loss()

### Register model

In [None]:
framework_adapter = "mxnet_adapter.FrameworkAdapterPlugin"

MI = ModelInterface(model=model, optimizer=trainer, framework_plugin=framework_adapter)

### Register dataset

In [None]:
class LandmarkShardDataset(gdata.Dataset):
    def __init__(self, dataset):
        self._dataset = dataset

    def __len__(self):
        self.filelength = len(self._dataset)
        return self.filelength

    def __getitem__(self, idx):
        return self._dataset[idx]


class LandmarkShardDescriptor(DataInterface):
    def __init__(self, validation_fraction=1 / 5, **kwargs):
        super().__init__(**kwargs)
        self.validation_fraction = validation_fraction

    @property
    def shard_descriptor(self):
        return self._shard_descriptor

    @shard_descriptor.setter
    def shard_descriptor(self, shard_descriptor):
        """
        Describe per-collaborator procedures or sharding.

        This method will be called during a collaborator initialization.
        Local shard_descriptor will be set by Envoy.
        """
        self._shard_descriptor = shard_descriptor
        self._shard_dataset = LandmarkShardDataset(
            shard_descriptor.get_dataset("train")
        )

        self.validation_size = max(
            1, int(len(self._shard_dataset) * self.validation_fraction)
        )

        self.train_indexes = len(self._shard_dataset) - self.validation_size
        self.val_indexes = [self.validation_size, self.train_indexes]

    def get_train_loader(self):
        """
        Output of this method will be provided to tasks with optimizer in contract
        """
        return gdata.DataLoader(
            self._shard_dataset,
            batch_size=self.kwargs["train_bs"],
            sampler=gdata.RandomSampler(self.train_indexes),
            last_batch="keep",
        )

    def get_valid_loader(self):
        """
        Output of this method will be provided to tasks without optimizer in contract
        """
        return gdata.DataLoader(
            self._shard_dataset,
            batch_size=self.kwargs["valid_bs"],
            sampler=gdata.SequentialSampler(*self.val_indexes),
            last_batch="keep",
        )

    def get_train_data_size(self):
        """
        Information for aggregation
        """
        return self.train_indexes

    def get_valid_data_size(self):
        """
        Information for aggregation
        """
        return self.validation_size

### Create Mnist federated dataset

In [None]:
train_bs, valid_bs = 64, 64
fed_dataset = LandmarkShardDescriptor(train_bs=train_bs, valid_bs=valid_bs)

## Define and register FL tasks

In [None]:
TI = TaskInterface()


@TI.register_fl_task(
    model="model",
    data_loader="train_dataset",
    device="device",
    optimizer="optimizer",
    round_num="round_num",
)
def train(model, train_dataset, optimizer, round_num, device, loss_fn=loss_fn):
    device = (
        mx.cpu()
        if device.startswith("cpu")
        else mx.gpu(int(device.split(":")[1].strip()))
    )

    print("train on:", device)

    if round_num == 0:
        optimizer._contexts = [device]

    train_dataset = tqdm.tqdm(train_dataset, desc="train")
    train_sum_l = 0
    for X, y in train_dataset:
        X, y = X.expand_dims(axis=1).as_in_context(device), y.as_in_context(device)
        with mx.autograd.record():
            pred = model(X)
            l = loss_fn(pred, y).mean()
        l.backward()
        optimizer.step(train_bs)
        train_sum_l += l.mean().asscalar()
    train_loss = train_sum_l / len(train_dataset)
    return {
        "train_mse": train_loss,
    }


@TI.register_fl_task(model="model", data_loader="val_dataset", device="device")
def validate(model, val_dataset, device):
    device = (
        mx.cpu()
        if device.startswith("cpu")
        else mx.gpu(int(device.split(":")[1].strip()))
    )

    # Run a validation loop at the end of each epoch.
    test_sum_l = 0
    for X, y in val_dataset:
        X, y = X.expand_dims(axis=1).as_in_context(device), y.as_in_context(device)
        pred = model(X)
        l = loss_fn(pred, y)
        test_sum_l += l.mean().asscalar()
    test_loss = test_sum_l / len(val_dataset)
    return {
        "val_mse": test_loss,
    }

## Time to start a federated learning experiment

In [None]:
# create an experimnet in federation
experiment_name = "landmark_experiment"
fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)

In [None]:
# The following command zips the workspace and python requirements to be transfered to collaborator nodes
from openfl.utilities.enum_types import DevicePolicy
from openfl.utilities.enum_types import OptTreatment

fl_experiment.start(
    model_provider=MI,
    task_keeper=TI,
    data_loader=fed_dataset,
    rounds_to_train=10,
    opt_treatment=OptTreatment.CONTINUE_GLOBAL,
    device_assignment_policy=DevicePolicy.CUDA_PREFERRED,
)

In [None]:
fl_experiment.stream_metrics()

## Let's have a look at the results

In [None]:
import os
from zipfile import ZipFile

from kaggle.api.kaggle_api_extended import KaggleApi

In [None]:
if not os.path.exists("./test"):
    api = KaggleApi()
    api.authenticate()
    api.competition_download_file("facial-keypoints-detection", "test.zip")
    with ZipFile("test.zip", "r") as zipobj:
        zipobj.extractall("./test")
    os.remove("test.zip")

In [None]:
last_model = fl_experiment.get_last_model()

In [None]:
Test_Dir = "./test/test.csv"

In [None]:
def get_data(path_to_csv_file):
    data_df = pd.read_csv(path_to_csv_file)
    data_df.fillna(method="ffill", inplace=True)
    labels = data_df.drop("Image", axis=1)
    imag, keypoints = [], []
    for i in range(data_df.shape[0]):
        img = data_df["Image"][i].split(" ")
        img = ["0" if x == "" else x for x in img]
        imag.append(img)
        y = labels.iloc[i, :]
        keypoints.append(y)

    X = np.array(imag, dtype="float").reshape(-1, 96, 96)
    y = np.array(keypoints, dtype="float")

    return X, y

In [None]:
test_imgs, _ = get_data(Test_Dir)  # prepare test dataset

In [None]:
fig = plt.figure(figsize=(10, 10))
for i in range(9):
    ax = fig.add_subplot(3, 3, i + 1)
    in_for_net = (
        mx.nd.array([test_imgs[i + 1]]).expand_dims(axis=1).as_in_context(mx.cpu())
    )
    pred = last_model(in_for_net)[0].asnumpy().reshape(-1, 2)
    ax.imshow(test_imgs[i + 1], cmap="gray")
    x_cords = pred[:, 0]
    y_cords = pred[:, 1]
    plt.scatter(x_cords, y_cords, label='Predicted keypoints')
plt.legend(bbox_to_anchor=(2.1, 3.4), prop={'size': 12})

plt.show()