In [434]:
# First, let's import some modules.
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import networkx as nx
import matplotlib.pyplot as plt
import collections

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

NUM_CLIENTS = 51
NUM_CLASSES = 18


Let's define some helper functions to segment the timeseries data to segments of 25 datapoints, with each having a 10-datapoint overlap with the previous and next segments. All the segments have purely the same activity label.

In [435]:
import numpy as np

def segment_data(df, window_size=25, step_size=15):
    segments = []
    labels = []
    for start in range(0, len(df) - window_size, step_size):
        end = start + window_size
        segment = df.iloc[start:end]
        if len(segment) == window_size:
            segment_data = segment[["x", "y", "z"]].values
            segments.append(segment_data)
            label = segment["activity"].mode()[0]
            labels.append(label)
    return np.array(segments), np.array(labels)

def segment_each_activity(df, window_size=25, step_size=15):
    all_segments = []
    all_labels = []
    for activity in df["activity"].unique():
        activity_data = df[df["activity"] == activity]
        
        segments, labels = segment_data(activity_data, window_size, step_size)
        
        all_segments.append(segments)
        all_labels.append(labels)

    all_segments = np.concatenate(all_segments, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_segments, all_labels


Read the data and encode the activity label, do this same for each user and make a big array consisting of these local datasets. Also, let's create a dictionary to create human-readable labels for the activities according to the [dataset description](https://archive.ics.uci.edu/ml/machine-learning-databases/00507/WISDM-dataset-description.pdf).

In [436]:
activity_dict = {1: "Walking", 2: "Jogging", 3: "Stairs", 4: "Sitting", 5: "Standing",
    6: "Typing", 7: "Brushing Teeth", 8: "Eating Soup", 9: "Eating Chips",
    10: "Eating Pasta", 11: "Drinking from Cup", 12: "Eating Sandwich",
    13: "Kicking (Soccer Ball)", 14: "Playing Catch w/Tennis Ball",
    15: "Dribbling (Basketball)", 16: "Writing", 17: "Clapping", 18: "Folding Clothes"
}

datasets = []
lencoder = LabelEncoder()

for uid in range(1600, 1600 + NUM_CLIENTS):
    ds = pd.read_csv(f"./raw/phone/accel/data_{uid}_accel_phone.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
    ds["activity"] = lencoder.fit_transform(ds["activity"])
    ds["z"] = ds["z"].astype(str).str.replace(";", "", regex=False).astype(float)
    datasets.append(segment_each_activity(ds))

In [489]:
ALPHA = 0.05
LOCAL_EPOCHS = 300
FL_ENABLED = True

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))


G = nx.complete_graph(NUM_CLIENTS)

for i, (X, y) in enumerate(datasets):
    G.nodes[i]['X'] = X
    G.nodes[i]['y'] = y

for r in range(NUM_ROUNDS):
    print(f"Round {r+1} starting.")
    
    for i in G:
        node = G.nodes[i]
        X, y = node['X'], node['y']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        X_train = X_train.reshape(X_train.shape[0], -1)
        
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

        model = MLP(input_dim=X_train.shape[1], hidden_dim=64, output_dim=NUM_CLASSES).to(device)
        node['model'] = model
        
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        model.train()
        for epoch in range(LOCAL_EPOCHS):
            optimizer.zero_grad()

            fwd_pass = model(X_train_tensor)
            loss = criterion(fwd_pass, y_train_tensor)

            loss.backward(retain_graph=True)
            if FL_ENABLED and r != 0: 
                gtv_loss = 0
                for j in G.neighbors(i):
                    if 'model' in G.nodes[j]:
                        
                        neighbor_model_state_dict = G.nodes[j]['model']
                        neighbor_model = MLP(input_dim=X_train.shape[1], hidden_dim=64, output_dim=NUM_CLASSES).to(device)
                        neighbor_model.eval()
                        # Calculate GTV loss as sum of L2 norms between model parameters
                        for param, neighbor_param in zip(model.parameters(), neighbor_model.parameters()):
                            #print(gtv_loss)
                            gtv_loss += torch.norm(param - neighbor_param, p=2)
                    else:
                        continue
                loss += ALPHA * gtv_loss
            loss.backward()
            optimizer.step()
            #print(f"Node {i} - Epoch {epoch+1}/{LOCAL_EPOCHS}, Loss: {loss.item()}")

        G.nodes[i]['model'] = model.state_dict()

        # If you want to test the model:
        model.eval()  # Set to evaluation mode
        X_test_tensor = torch.tensor(X_test.reshape(X_test.shape[0], -1), dtype=torch.float32).to(device)
        y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

        with torch.no_grad():  # No gradient computation needed during evaluation
            y_pred = model(X_test_tensor)
            _, predicted = torch.max(y_pred, 1)
            acc = accuracy_score(y_test_tensor.cpu(), predicted.cpu())
            print(f"Node {i} Test Accuracy: {acc:.4f}")




Round 1 starting.
Node 0 Test Accuracy: 0.9110
Node 1 Test Accuracy: 0.8735
Node 2 Test Accuracy: 0.8245
Node 3 Test Accuracy: 0.9090
Node 4 Test Accuracy: 0.8220
Node 5 Test Accuracy: 0.8035
Node 6 Test Accuracy: 0.8466
Node 7 Test Accuracy: 0.9431
Node 8 Test Accuracy: 0.9236
Node 9 Test Accuracy: 0.8747
Node 10 Test Accuracy: 0.8277
Node 11 Test Accuracy: 0.9297
Node 12 Test Accuracy: 0.9649
Node 13 Test Accuracy: 0.8018
Node 14 Test Accuracy: 0.8779
Node 15 Test Accuracy: 0.8899
Node 16 Test Accuracy: 0.5594
Node 17 Test Accuracy: 0.8993
Node 18 Test Accuracy: 0.8910
Node 19 Test Accuracy: 0.8560
Node 20 Test Accuracy: 0.7100
Node 21 Test Accuracy: 0.7917
Node 22 Test Accuracy: 0.9719
Node 23 Test Accuracy: 0.8441
Node 24 Test Accuracy: 0.7307
Node 25 Test Accuracy: 0.8625
Node 26 Test Accuracy: 0.9549
Node 27 Test Accuracy: 0.8135
Node 28 Test Accuracy: 0.8341
Node 29 Test Accuracy: 0.9520
Node 30 Test Accuracy: 0.7986
Node 31 Test Accuracy: 0.8806
Node 32 Test Accuracy: 0.8817
No

KeyboardInterrupt: 