In [16]:
import numpy as np
import pandas as pd

# Tensorflow
import tensorflow as tf
import tensorflow_federated as tff

# Tensorflow Keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.utils import to_categorical

# Asynchronous, needed for federated learning 
import nest_asyncio
nest_asyncio.apply()

# Others
import collections
import attr
import functools

np.random.seed(0)

## 1. Human activity recognition (HAR) using convolutional neural network (CNN)

Code reference : [CNN models for human activity recognition time-series classification](https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/)

CNN algorithm is selected based on [this blog's information](https://machinelearningmastery.com/deep-learning-models-for-human-activity-recognition/), quoted:

    "When applied to time series classification like HAR, CNN has two advantages over other models: local dependency and scale invariance. Local dependency means the nearby signals in HAR are likely to be correlated, while scale invariance refers to the scale-invariant for different paces or frequencies."
   ([Original source](https://arxiv.org/pdf/1707.03502.pdf))

In [19]:
# load the dataset from a client, returns train dataset (X) and label (y) elements
def load_dataset(subject_num=1):
    # import csv
    df = pd.read_csv('dataset/mHealth_subject' + str(subject_num+1) + '.csv', header=None)
    
    # exclude 0
    df = df[df[21] != 0]
    
    # split to dataset and elements
    trainy = df[21]
    df_X = df.drop([21], axis=1)
    
    # stack group so that features are the 3rd dimension
    loaded = list()
    loaded.append(df_X.values)
    trainX = np.dstack(loaded)
    
    # one hot encode y
    trainy = to_categorical(trainy)
                            
    return trainX, trainy

# Fit and evaluate a model
def evaluate_model(trainX, trainy):
    # set epochs and batch_size to 1 each due to its purpose solely as example and limiting resource
    # set verbose to 1 to see training progress
    verbose, epochs, batch_size = 1, 1, 1
    
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    
    # Set precision and recall to calculate F1 score
    precision = tf.keras.metrics.Precision(name='precision')
    recall = tf.keras.metrics.Recall(name='recall')
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', precision, recall])
    
    # fit network
    history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    # get evaluation metrics
    accuracy = history.history['accuracy'][epochs-1]
    precision = history.history['precision'][epochs-1]
    recall = history.history['recall'][epochs-1]
    
    return accuracy, precision, recall

# summarize scores
def summarize_results(scores, f1):
    m, s = np.mean(scores), np.std(scores)
    m2, s2 = np.mean(f1), np.std(f1)
    print('Accuracy: %.3f%% (+/-%.3f), F1 score: %.3f%% (+/-%.3f)' % (m, s, m2, s2))

# run an experiment
def run_experiment(repeats=1):
    # load data
    trainX, trainy = load_dataset()
    # repeat experiment
    accuracies = list()
    f1s = list()
    for r in range(repeats):
        accuracy, precision, recall = evaluate_model(trainX, trainy)
        accuracy = accuracy * 100.0
        f1_score = (2.0*((precision * recall)/(precision + recall))) * 100.0
        print('> Iteration #%d: %.3f, F1: %.3f' % (r+1, accuracy, f1_score))
        accuracies.append(accuracy)
        f1s.append(f1_score)
    # summarize results
    summarize_results(accuracies, f1s)

In [20]:
# run the experiment
run_experiment()

> Iteration #1: 91.916, F1: 91.867
Accuracy: 91.916% (+/-0.000), F1 score: 91.867% (+/-0.000)


## 2. Federated learning for HAR using CNN

Code references :
1. Previous section
2. [Tensorflow federated learning homepage and API](https://www.tensorflow.org/federated)
3. [Google Workshop on Federated Learning and Analytics](https://events.withgoogle.com/demostutorials-workshop-on-federated-learning-and-analytics-2020/)
4. Our beloved [StackOverflow](https://stackoverflow.com/)

In [29]:
# similar to load_dataset() function, but returning tensorflow dataset in batch form
def load_client_dataset(client_id=1):
    # import csv
    df = pd.read_csv('dataset/mHealth_subject' + str(client_id) + '.csv', header=None)
    
    # exclude 0
    df = df[df[21] != 0]
    
    # split to features (X) and label (y)
    y = df[21]
    X = df.drop([21], axis=1)
    X = np.expand_dims(X, axis=2)
    
    # one hot encode y
    y = to_categorical(y)
    
    # create tensorflow dataset
    dataset = tf.data.Dataset.from_tensor_slices((X,y))
    dataset = dataset.batch(20) # 20 rows for each batch
    return dataset

In [30]:
# Create federated train data from 8 clients (not randomized for now)
train_data = [load_client_dataset(x) for x in range(1,9)]

In [23]:
def create_keras_model():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(21,1)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(7, activation='softmax'))
    
    return model

In [24]:
# Wrap a Keras model for use with TFF
def model_fn():
  keras_model = create_keras_model()
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=train_data[0].element_spec,
      loss=tf.keras.losses.CategoricalCrossentropy(),
      metrics=[tf.keras.metrics.Accuracy(name='acc'),\
               tf.keras.metrics.Precision(name='pr'),\
               tf.keras.metrics.Recall(name='rc')\
              ])

In [31]:
# Simulate federated learning with federated averaging as model aggregation
trainer = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(0.001))

# Init
state = trainer.initialize()

# Simulate "batch" learning
for i in range(25):
    state, metrics = trainer.next(state, train_data)
    print('> Iteration #%d | Accuracy: %.3f%%, F1 score: %.3f%%' % ( \
            i+1, \
            metrics['train']['acc'] * 100.0, \
            2.0*((metrics['train']['pr'] * metrics['train']['rc'])/(metrics['train']['pr'] + metrics['train']['rc'])) * 100.0 \
         )
    )

> Iteration #1 | Accuracy: 17.799%, F1 score: 97.592%
> Iteration #2 | Accuracy: 18.008%, F1 score: 95.590%
> Iteration #3 | Accuracy: 16.959%, F1 score: 94.505%
> Iteration #4 | Accuracy: 16.880%, F1 score: 94.805%
> Iteration #5 | Accuracy: 19.727%, F1 score: 95.391%
> Iteration #6 | Accuracy: 18.955%, F1 score: 95.798%
> Iteration #7 | Accuracy: 18.974%, F1 score: 96.125%
> Iteration #8 | Accuracy: 20.421%, F1 score: 96.430%
> Iteration #9 | Accuracy: 20.831%, F1 score: 96.762%
> Iteration #10 | Accuracy: 22.724%, F1 score: 96.914%
> Iteration #11 | Accuracy: 20.605%, F1 score: 96.746%
> Iteration #12 | Accuracy: 22.978%, F1 score: 96.784%
> Iteration #13 | Accuracy: 23.973%, F1 score: 97.355%
> Iteration #14 | Accuracy: 24.577%, F1 score: 97.267%
> Iteration #15 | Accuracy: 24.577%, F1 score: 97.631%
> Iteration #16 | Accuracy: 25.802%, F1 score: 97.568%
> Iteration #17 | Accuracy: 24.385%, F1 score: 97.385%
> Iteration #18 | Accuracy: 25.064%, F1 score: 97.744%
> Iteration #19 | A

KeyboardInterrupt: 