In [368]:
# First, let's import some modules.
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from torch.utils.data import TensorDataset, DataLoader
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

NUM_CLIENTS = 51
NUM_ROUNDS = 5
LOCAL_EPOCHS = 5
NUM_CLASSES = 18

Let's define some helper functions to segment the timeseries data to segments of 25 datapoints, with each having a 10-datapoint overlap with the previous and next segments. All the segments have purely the same activity label.

In [352]:
import numpy as np

def segment_data(df, window_size=25, step_size=15):
    segments = []
    labels = []
    for start in range(0, len(df) - window_size, step_size):
        end = start + window_size
        segment = df.iloc[start:end]
        if len(segment) == window_size:
            segment_data = segment[["x", "y", "z"]].values
            segments.append(segment_data)
            label = segment["activity"].mode()[0]
            labels.append(label)
    return np.array(segments), np.array(labels)

def segment_each_activity(df, window_size=25, step_size=15):
    all_segments = []
    all_labels = []
    for activity in df["activity"].unique():
        activity_data = df[df["activity"] == activity]
        
        segments, labels = segment_data(activity_data, window_size, step_size)
        
        all_segments.append(segments)
        all_labels.append(labels)

    all_segments = np.concatenate(all_segments, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_segments, all_labels


Read the data and encode the activity label, do this same for each user and make a big array consisting of these local datasets. Also, let's create a dictionary to create human-readable labels for the activities according to the [dataset description](https://archive.ics.uci.edu/ml/machine-learning-databases/00507/WISDM-dataset-description.pdf).

In [353]:
activity_dict = {1: "Walking", 2: "Jogging", 3: "Stairs", 4: "Sitting", 5: "Standing",
                 6: "Typing", 7: "Brushing Teeth", 8: "Eating Soup", 9: "Eating Chips",
                 10: "Eating Pasta", 11: "Drinking from Cup", 12: "Eating Sandwich",
                 13: "Kicking (Soccer Ball)", 14: "Playing Catch w/Tennis Ball",
                 15: "Dribbling (Basketball)", 16: "Writing", 17: "Clapping", 18: "Folding Clothes"
                 }

datasets = []
lencoder = LabelEncoder()

for uid in range(1600, 1600 + NUM_CLIENTS):
    ds = pd.read_csv(f"./raw/phone/accel/data_{uid}_accel_phone.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
    ds["activity"] = lencoder.fit_transform(ds["activity"])
    ds["z"] = ds["z"].astype(str).str.replace(";", "", regex=False).astype(float)
    datasets.append(ds)


Next, let's check how many datapoints for each activity.

In [354]:
for activity in datasets[0]["activity"].unique():
    activity_data = phone_accel[phone_accel["activity"] == activity]
    print(f"Activity {activity} shape: {activity_data.shape}")

Activity 0 shape: (3604, 6)
Activity 1 shape: (3604, 6)
Activity 2 shape: (3603, 6)
Activity 3 shape: (3603, 6)
Activity 4 shape: (3603, 6)
Activity 5 shape: (3603, 6)
Activity 6 shape: (3603, 6)
Activity 7 shape: (3603, 6)
Activity 8 shape: (3603, 6)
Activity 9 shape: (3603, 6)
Activity 10 shape: (3603, 6)
Activity 11 shape: (3603, 6)
Activity 12 shape: (3914, 6)
Activity 13 shape: (3601, 6)
Activity 14 shape: (3604, 6)
Activity 15 shape: (3603, 6)
Activity 16 shape: (3603, 6)
Activity 17 shape: (3603, 6)


In [355]:
loaders = []
for idx, ds in enumerate(datasets):
    X, y = segment_each_activity(ds)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_ds = TensorDataset(X_train_tensor, y_train_tensor)
    test_ds = TensorDataset(X_test_tensor, y_test_tensor)

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=64)
    loaders.append((train_loader, test_loader))
    print(idx, end=" ")
    #loaders.append((train_loader, test_loader))
print(f"Number of laoders: {len(loaders)}")

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 Number of laoders: 51


In [356]:
class CNNModel(nn.Module):
    def __init__(self, input_dim=3, output_dim=6):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool1d(2)
        self.fc1 = nn.Linear(64 * ((X.shape[1] - 4) // 2), output_dim)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # [batch, channels, seq_len]
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        return self.fc1(x)


In [370]:

def train(model, dataloader, criterion, optimizer, device):
    model.train()
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


In [358]:
def evaluate(model, dataloader, device):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total


In [359]:
def get_model_weights(model):
    return {k: v.cpu().clone() for k, v in model.state_dict().items()}

In [360]:
def set_model_weights(model, weights):
    model.load_state_dict(weights)

In [361]:
def average_weights(weight_list):
    avg_weights = {}
    for k in weight_list[0].keys():
        avg_weights[k] = sum(w[k] for w in weight_list) / len(weight_list)
    return avg_weights

In [362]:
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

NUM_CLIENTS = 51

NUM_SAMPLES_PER_CLIENT = 10  

all_X = []
all_y = []

for i in range(NUM_CLIENTS):
    X, y = segment_each_activity(datasets[i])
    random_indices = np.random.choice(len(X), NUM_SAMPLES_PER_CLIENT, replace=False)

    X_random = X[random_indices]
    y_random = y[random_indices]
    
    all_X.append(X_random)
    all_y.append(y_random)
    
all_X = np.concatenate(all_X, axis=0)
all_y = np.concatenate(all_y, axis=0)

X_tensor = torch.tensor(all_X, dtype=torch.float32)
y_tensor = torch.tensor(all_y, dtype=torch.long)

global_dataset = TensorDataset(X_tensor, y_tensor)

global_loader = DataLoader(global_dataset, batch_size=2, shuffle=True)



In [None]:
global_model = CNNModel(output_dim=NUM_CLASSES).to(device)

for round in range(NUM_ROUNDS):
    local_weights = []
    
    for i in range(NUM_CLIENTS):
        local_model = CNNModel(output_dim=NUM_CLASSES).to(device)
        set_model_weights(local_model, get_model_weights(global_model))

        optimizer = torch.optim.Adam(local_model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        train_loader, test_loader = loaders[i]
        
        for _ in range(LOCAL_EPOCHS):
            train(local_model, train_loader, criterion, optimizer, device)
            acc = evaluate(local_model, test_loader, device)
            print(f"Client {i} Round {round+1} Accuracy: {acc:.2f}")

        local_weights.append(get_model_weights(local_model))

    # Federated Averaging step
    new_global_weights = average_weights(local_weights)
    set_model_weights(global_model, new_global_weights)

    # Evaluate global model (on global test data, or on first client's test data)
    global_acc = evaluate(global_model, loaders[0][0], device)  # Test on first client's test_loader
    print(f"Round {round+1}, Global Accuracy: {global_acc:.2f}")


Client 0 Round 1 Accuracy: 0.72
Client 0 Round 1 Accuracy: 0.79
Client 0 Round 1 Accuracy: 0.84
Client 0 Round 1 Accuracy: 0.88
Client 0 Round 1 Accuracy: 0.87
Client 1 Round 1 Accuracy: 0.70
Client 1 Round 1 Accuracy: 0.81
Client 1 Round 1 Accuracy: 0.86
Client 1 Round 1 Accuracy: 0.88
Client 1 Round 1 Accuracy: 0.90


KeyboardInterrupt: 

In [219]:

model = CNNModel(output_dim=len(np.unique(y))).to(device)  # or CNNModel(...)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    train(model, train_loader, criterion, optimizer, device)
    acc = evaluate(model, test_loader, device)
    print(f"Epoch {epoch+1}, Accuracy: {acc:.2f}")


Epoch 1, Accuracy: 0.68
Epoch 2, Accuracy: 0.77
Epoch 3, Accuracy: 0.78
Epoch 4, Accuracy: 0.81
Epoch 5, Accuracy: 0.79
Epoch 6, Accuracy: 0.82
Epoch 7, Accuracy: 0.81
Epoch 8, Accuracy: 0.84
Epoch 9, Accuracy: 0.84
Epoch 10, Accuracy: 0.85


In [220]:
test_seg_X = X[1000]
test_seg_y = y[1000]
segment_tensor = torch.tensor(test_seg_X, dtype=torch.float32).unsqueeze(0)  

model.eval()
with torch.no_grad():
    output = model(segment_tensor.to(device))  # shape: (1, num_classes)
    predicted_label = torch.argmax(output, dim=1).item()
    print("Predicted activity:", predicted_label)


Predicted activity: 4
