First, let's import the modules we need.

In [1]:
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import networkx as nx
import matplotlib.pyplot as plt
import collections
import tensorflow as tf
import numpy as np

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
NUM_CLIENTS = 51
NUM_CLASSES = 18
ALPHA = 0.05
LOCAL_EPOCHS = 4
FL_ENABLED = True
NUM_ROUNDS = 10




Let's define some helper functions to segment the timeseries data to segments of 25 datapoints, with each having a 10-datapoint overlap with the previous and next segments. All the segments have purely the same activity label.

In [2]:
import numpy as np

def segment_data(df, window_size=50, step_size=25):
    segments = []
    labels = []
    for start in range(0, len(df) - window_size, step_size):
        end = start + window_size
        segment = df.iloc[start:end]
        if len(segment) == window_size:
            segment_data = segment[["x", "y", "z"]].values
            segments.append(segment_data)
            label = segment["activity"].mode()[0]
            labels.append(label)
    return np.array(segments), np.array(labels)

def segment_each_activity(df, window_size=25, step_size=15):
    all_segments = []
    all_labels = []
    for activity in df["activity"].unique():
        activity_data = df[df["activity"] == activity]
        
        segments, labels = segment_data(activity_data, window_size, step_size)
        
        all_segments.append(segments)
        all_labels.append(labels)

    all_segments = np.concatenate(all_segments, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_segments, all_labels


Read the data and encode the activity label, do this same for each user and make a big array consisting of these local datasets. Also, let's create a dictionary to create human-readable labels for the activities according to the [dataset description](https://archive.ics.uci.edu/ml/machine-learning-databases/00507/WISDM-dataset-description.pdf).

In [3]:
activity_dict = {1: "Walking", 2: "Jogging", 3: "Stairs", 4: "Sitting", 5: "Standing",
    6: "Typing", 7: "Brushing Teeth", 8: "Eating Soup", 9: "Eating Chips",
    10: "Eating Pasta", 11: "Drinking from Cup", 12: "Eating Sandwich",
    13: "Kicking (Soccer Ball)", 14: "Playing Catch w/Tennis Ball",
    15: "Dribbling (Basketball)", 16: "Writing", 17: "Clapping", 18: "Folding Clothes"
}

datasets = []
lencoder = LabelEncoder()

for uid in range(1600, 1600 + NUM_CLIENTS):
    ds = pd.read_csv(f"./raw/phone/accel/data_{uid}_accel_phone.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
    ds["activity"] = lencoder.fit_transform(ds["activity"])
    ds["z"] = ds["z"].astype(str).str.replace(";", "", regex=False).astype(float)
    datasets.append(segment_each_activity(ds))

Let's create some helper functions for the next steps.

In [4]:
# Create and return a basic nx graph
def create_fully_connected_graph():
    G = nx.complete_graph(NUM_CLIENTS)
    for i, (X, y) in enumerate(datasets):
        G.nodes[i]['X'] = X
        G.nodes[i]['y'] = y
    return G

# Create a basic CNN mode with number of our classes
def create_CNN(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(64, 3, activation='relu', input_shape=input_shape),
        tf.keras.layers.Conv1D(32, 3, activation='relu'),
        tf.keras.layers.Conv1D(16, 3, activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    return model

# Function for calculating a GTV loss - we need the current node model + its idx, and graph
def calculate_gtv_loss(model, graph, node_idx):
    gtv_loss = 0
    for j in graph.neighbors(node_idx):
        neigh = graph.nodes[j]
        if 'model' in neigh:
            neighbor_weights = neigh['model'].get_weights()
            local_weights = model.get_weights()
            gtv_loss += sum(np.linalg.norm(w1 - w2) for w1, w2 in zip(local_weights, neighbor_weights))
    return gtv_loss

# Function for training the model with GTV-Min
def train_model_with_gtv(model, X_train, y_train, graph, node_idx, epochs=LOCAL_EPOCHS, alpha=ALPHA):
    optimizer = tf.keras.optimizers.Adam()
    ce_loss_fn = tf.keras.losses.CategoricalCrossentropy()
    dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)

    for epoch in range(epochs):
        for x_batch, y_batch in dataset:
            with tf.GradientTape() as tape:
                
                y_pred = model(x_batch, training=True)
                ce_loss = ce_loss_fn(y_batch, y_pred)
                gtv = calculate_gtv_loss(model, graph, node_idx)
                total_loss = ce_loss + alpha * gtv

            grads = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

In [None]:
devices = tf.config.list_physical_devices()
print("\nDevices: ", devices)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  details = tf.config.experimental.get_device_details(gpus[0])
  print("GPU details: ", details)
G = create_fully_connected_graph()

for r in range(NUM_ROUNDS):
    print(f"Round {r+1} starting.")
    
    for i in G:
        node = G.nodes[i]
        X, y = node['X'], node['y']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        y_train = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
        y_test = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)
        
        if r == 0:
            model = create_CNN(input_shape=X_train.shape[1:])
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Only for evaluation

            model.fit(X_train, y_train, epochs=LOCAL_EPOCHS, batch_size=32, verbose=0)
            node['model'] = model
        else:
            model = node['model']
            train_model_with_gtv(model, X_train, y_train, G, i, alpha=0.3)
            node['model'] = model
            
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Only for evaluation
        ce, acc = model.evaluate(X_test, y_test)





Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU details:  {'device_name': 'METAL'}
Round 1 starting.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8836 - loss: 0.3272
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8521 - loss: 0.3678
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8101 - loss: 0.5930
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8812 - loss: 0.2863
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7943 - loss: 0.5591
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7566 - loss: 0.6839
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8528 - loss: 0.5148
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9442 - loss: 0.2417
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9125 - loss: 0.2327
[1m26/26[0m [32m━━━━━━━━━━━━━━━━