v2 - UCI HAR dataset

# Setup

In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
import copy
from scipy.interpolate import CubicSpline
import torch.optim as optim
import torch.nn as nn
import torch
from torch.optim import Adam
from scipy.fftpack import fft, ifft
from scipy.stats import mode
from torch.utils.data import DataLoader, TensorDataset
import datetime
from sklearn.metrics import f1_score

## Hyperparameters

In [2]:
num_epochs = 200
batch_size = 32  # Set your batch size
learning_rate_client = 0.01
local_epochs = 5
subject_dir = 'FL_Data/windowed_data_filtered_w_UCI2/subject_'  # Set your directory to the subject data
numclients = 75
num_classes = 8

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#current timestamp
current_time = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

In [3]:
# set the seed
torch.manual_seed(420)
np.random.seed(420)

# Load Data

In [4]:
def load_data_client(id, batch_size=batch_size, type='labelled_train'):
    # Load the data
    data = np.load(subject_dir + str(id) + '/windowed_' + type + '_x.npy')
    labels = np.load(subject_dir + str(id) + '/windowed_' + type + '_y.npy')

    # print shape of data
    # print(data.shape)
    # print(labels.shape)

    # Convert to torch tensor
    data = torch.from_numpy(data).float()
    labels = torch.from_numpy(labels).long()

    # Create a dataset
    dataset = torch.utils.data.TensorDataset(data, labels)

    # Create a dataloader
    if type == 'labelled_train' or type == 'unlabelled_train':
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    else:
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    return dataloader

In [5]:
# for i in range(54):
#     data_label_train = load_data_client(i, batch_size, 'labelled_train')
#     data_unlabel_train = load_data_client(i, batch_size, 'unlabelled_train')
#     data_test = load_data_client(i, batch_size, 'test')

# Model Architecture

In [6]:
import torch
import torch.nn as nn

class PoolingAutoEncoder(nn.Module):
    def __init__(self, input_channels=3, sequence_length=128):
        super(PoolingAutoEncoder, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),  # Adding pooling layer
            nn.Conv1d(32, 64, 5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2, 2),  # Adding pooling layer
            nn.Conv1d(64, 128, 5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2, 2),  # Adding pooling layer
        )
        
        # Adjusted for pooling layers
        self.flat_features = self._get_flat_features(input_channels, sequence_length)
        
        # Latent space
        self.latent_space = nn.Linear(self.flat_features, 128)  # Project to a 128-dimensional latent space

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(128, 128, 5, stride=2, padding=2, output_padding=1),  # Adjust stride for upsampling
            nn.ReLU(),
            nn.ConvTranspose1d(128, 64, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(64, 32, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(32, input_channels, 5, stride=1, padding=2),  # Output channels = input channels
            nn.Sigmoid(),  # Assuming the input data is normalized between 0 and 1
        )
        
    def _get_flat_features(self, input_channels, sequence_length):
        # Temporarily override self.encoder with self.encoder[:3] to include only up to the first pooling layer
        tmp_encoder = nn.Sequential(*list(self.encoder.children())[:3])
        x = torch.randn(1, input_channels, sequence_length)
        x = tmp_encoder(x)
        return x.numel()

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)  # Flatten for linear layer
        x = self.latent_space(x)
        x = x.view(x.size(0), 128, -1)  # Reshape for decoder, adjust based on the actual size after flattening
        x = self.decoder(x)
        return x

# Example instantiation of the model
# model = PoolingAutoEncoder(input_channels=3, sequence_length=128)
# print(model)

In [7]:
class HARAutoEncoder(nn.Module):
    def __init__(self, input_channels=3, sequence_length=128):
        super(HARAutoEncoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
        )
        flattened_size = 32 * sequence_length  # Adjust based on the actual architecture
        self.fc_encoder = nn.Linear(flattened_size, 128)

        # Decoder
        self.fc_decoder = nn.Linear(128, flattened_size)
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.ConvTranspose1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.ConvTranspose1d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.ConvTranspose1d(32, input_channels, kernel_size=5, stride=1, padding=2),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(-1, 32 * 128)  # Adjust based on the actual architecture
        x = self.fc_encoder(x)
        x = self.fc_decoder(x)
        x = x.view(-1, 32, 128)  # Adjust based on the actual architecture
        x = self.decoder(x)
        return x


# Train and Test Function

In [8]:
def train_autoencoder(model, train_loader, device, learning_rate=0.01, epochs=5):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    model.train()
    total_loss = 0
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            # print(data.shape)
            data = data.permute(0, 2, 1)
            optimizer.zero_grad()
            output = model(data)
            # print(output.shape)
            loss = criterion(output, data)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        epoch_loss = total_loss / len(train_loader)
        # print(f'Epoch {epoch+1}, Loss: {epoch_loss}')
        total_loss = 0  # Reset total loss for the next epoch

    results = {
        'train_loss': epoch_loss
    }
    
    return results  # Returns the average loss of the last epoch

In [9]:
def test_autoencoder(model, test_loader, device):
    model.to(device)
    model.eval()
    
    criterion = nn.MSELoss()
    total_loss = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.permute(0, 2, 1)
            output = model(data)
            loss = criterion(output, data)
            total_loss += loss.item()
    
    avg_loss = total_loss / len(test_loader)
    # print(f'Test Loss: {avg_loss}')
    
    return avg_loss  # Returns the average loss for the test data

# Client

In [10]:
class Client():
  def __init__(self, client_config:dict):
    # client config as dict to make configuration dynamic
    self.id = client_config["id"]
    self.config = client_config
    self.__model = None

    self.labelled_loader = self.config["labelled"]
    self.unlabelled_loader = self.config["unlabelled"]
    self.test_loader = self.config["test"]

  @property
  def model(self):
    return self.__model

  @model.setter
  def model(self, model):
    self.__model = model

  def __len__(self):
    """Return a total size of the client's local data."""
    return len(self.unlabelled_loader.sampler)

  def train_ssl(self):
    results = train_autoencoder(model = self.model,
                    train_loader = self.unlabelled_loader,
                    device=device,
                    learning_rate=learning_rate_client,
                    epochs=local_epochs)
    print(f"Train result client {self.id}: {results}")

  def test_ssl(self):
    loss = test_autoencoder(model = self.model,
                    test_loader = self.test_loader,
                    device=device)
    print(f"Test result client {self.id}: {loss}")
    return loss

# Server

In [11]:
class FedAvg():
  def __init__(self):
    self.globalmodel = HARAutoEncoder(input_channels=3, sequence_length=128)
    self.rounds = 0
    self.params = {}

  def aggregate(self, round):
    #v1:update the aggregate to save the model with round and date indicator
    modelparams = []
    for i in self.params.keys():
      modelparams.append(self.params[i])

    avg_weights = {}
    for name in modelparams[0].keys():
      avg_weights[name] = torch.mean(torch.stack([w[name] for w in modelparams]), dim = 0)

    self.globalmodel.load_state_dict(avg_weights)
    
    #save the model
    name_path = f'AutoEncoder_FL/Model_Global_Filtered_v3/{current_time}'
    if not os.path.exists(name_path):
      os.makedirs(name_path)

    torch.save(self.globalmodel.state_dict(), f"{name_path}/global_model_round_{round}.pth")
    
    # filename = f"{path_glob_m}/global_model_round_{round}_{current_time}.pth"
    # torch.save(self.globalmodel.state_dict(), filename)

  def clientstrain(self, clientconfig):
    clients = clientconfig
    for i in clients.keys():
      test_client = Client(clients[i])
      test_client.model = copy.deepcopy(self.globalmodel)
      test_client.train_ssl()
      test_client.test_ssl()
      self.params[i] = test_client.model.state_dict()

  def initiate_FL(self, clientconfig, serverdata):
    clients = clientconfig
    print("Round: {}".format(self.rounds))

    print("Obtaining Weights!!")
    self.clientstrain(clients)

    #### Aggregate model
    print("Aggregating Model!!")
    self.aggregate(self.rounds)

    #### Replace parameters with global model parameters
    for i in self.params.keys():
        self.params[i] = self.globalmodel.state_dict()


    servertest = serverdata
    loss = test_autoencoder(model = self.globalmodel,
                    test_loader = servertest,
                    device=device)
    print("Round {} metrics:".format(self.rounds))
    print("Server Loss = {}".format(loss))
    print("Round {} finished!".format(self.rounds))
    self.rounds += 1
    return clients, loss

# Main

In [12]:
clients = {}

for i in range(numclients):
    clients[i] = {"id": i, "batch_size": batch_size, "local_epoch": 5}
    clients[i]['labelled'] = load_data_client(i, batch_size, 'labelled_train')
    clients[i]['unlabelled'] = load_data_client(i, batch_size, 'unlabelled_train')
    clients[i]['test'] = load_data_client(i, batch_size, 'test')

    print(f"client: {i}")
    print(f"labelled: {len(clients[i]['labelled'])}")
    print(f"unlabelled: {len(clients[i]['unlabelled'])}")
    print(f"test: {len(clients[i]['test'])}")

# combine all client test data into one
combined_test_data = []
combined_test_labels = []
for i in range(numclients):
    for data, labels in clients[i]['test']:
        combined_test_data.append(data)
        combined_test_labels.append(labels)
combined_test_data = torch.cat(combined_test_data, dim=0)
combined_test_labels = torch.cat(combined_test_labels, dim=0)
# create dataset and dataloader
combined_test_dataset = torch.utils.data.TensorDataset(combined_test_data, combined_test_labels)
combined_test_dataloader = torch.utils.data.DataLoader(combined_test_dataset, batch_size=batch_size, shuffle=False)

print(f"combined test: {len(combined_test_dataloader)}")

# server test_data
server_test_data = combined_test_dataloader

server = FedAvg()

start = time.time()
loss_rounds = []
for i in range(num_epochs):
    clients, loss = server.initiate_FL(clientconfig=clients, serverdata=server_test_data)
    loss_rounds.append(loss)

print("\n")
print("-" * 50)
print("Loss values all rounds: ", loss_rounds)

print(f"\nTime cost: {round((time.time()-start)/60, 2)}min.")

client: 0
labelled: 2
unlabelled: 8
test: 3
client: 1
labelled: 2
unlabelled: 7
test: 3
client: 2
labelled: 2
unlabelled: 8
test: 3
client: 3
labelled: 2
unlabelled: 8
test: 3
client: 4
labelled: 2
unlabelled: 7
test: 3
client: 5
labelled: 2
unlabelled: 8
test: 3
client: 6
labelled: 2
unlabelled: 7
test: 3
client: 7
labelled: 2
unlabelled: 7
test: 2
client: 8
labelled: 2
unlabelled: 7
test: 3
client: 9
labelled: 2
unlabelled: 7
test: 3
client: 10
labelled: 2
unlabelled: 8
test: 3
client: 11
labelled: 2
unlabelled: 8
test: 3
client: 12
labelled: 2
unlabelled: 8
test: 3
client: 13
labelled: 2
unlabelled: 8
test: 3
client: 14
labelled: 2
unlabelled: 8
test: 3
client: 15
labelled: 2
unlabelled: 9
test: 3
client: 16
labelled: 3
unlabelled: 9
test: 3
client: 17
labelled: 3
unlabelled: 9
test: 3
client: 18
labelled: 2
unlabelled: 9
test: 3
client: 19
labelled: 2
unlabelled: 8
test: 3
client: 20
labelled: 3
unlabelled: 10
test: 3
client: 21
labelled: 2
unlabelled: 8
test: 3
client: 22
labelled

Round: 0
Obtaining Weights!!
Train result client 0: {'train_loss': 0.2372329067438841}
Test result client 0: 0.23400460183620453
Train result client 1: {'train_loss': 0.2309308158499854}
Test result client 1: 0.22964919110139212
Train result client 2: {'train_loss': 0.24157635495066643}
Test result client 2: 0.2387211173772812
Train result client 3: {'train_loss': 0.2248433493077755}
Test result client 3: 0.2221470276514689
Train result client 4: {'train_loss': 0.239158890077046}
Test result client 4: 0.23713673651218414
Train result client 5: {'train_loss': 0.2425880115479231}
Test result client 5: 0.23804126679897308
Train result client 6: {'train_loss': 0.2497550504548209}
Test result client 6: 0.24915127456188202
Train result client 7: {'train_loss': 0.24279314066682542}
Test result client 7: 0.23983832448720932
Train result client 8: {'train_loss': 0.23593700996467046}
Test result client 8: 0.23495484391848245
Train result client 9: {'train_loss': 0.24202402787549154}
Test result 

# Fine-Tuned Phase

## Model Fine-tuned

In [37]:
class HARClassifier(nn.Module):
    def __init__(self, input_channels=3, sequence_length=128, num_classes=9):  # Adjust num_classes to 13
        super(HARClassifier, self).__init__()
        
        # Assuming the encoder from the AutoEncoder
        self.autoencoder = HARAutoEncoder(input_channels=input_channels, sequence_length=sequence_length)
        
        # Freeze the autoencoder's parameters
        for param in self.autoencoder.parameters():
            param.requires_grad = False
        
        # Classifier part as described
        self.classifier = nn.Sequential(
            nn.Linear(4096, 32),  # Adjust the first linear layer to have 32 outputs
            nn.ReLU(),
            nn.Linear(32, num_classes),  # Final layer for 13 classes
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        # Use the encoder to get the features
        x = self.autoencoder.encoder(x)
        x = x.view(x.size(0), -1)
        # Classify using the extracted features
        x = self.classifier(x)
        return x

In [38]:
# load model
model = HARClassifier(input_channels=3, sequence_length=128, num_classes=num_classes)
model_pretained = HARAutoEncoder(input_channels=3, sequence_length=128)

# load the global model
model_pretained.load_state_dict(torch.load(f'AutoEncoder_FL/Model_Global_Filtered_v3/{current_time}/global_model_round_199.pth'))

# transfer encoder wieghts to classifier
model.autoencoder.load_state_dict(model_pretained.state_dict())
print(model)

HARClassifier(
  (autoencoder): HARAutoEncoder(
    (encoder): Sequential(
      (0): Conv1d(3, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): ReLU()
      (2): Conv1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (3): ReLU()
      (4): Conv1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (5): ReLU()
      (6): Conv1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (7): ReLU()
    )
    (fc_encoder): Linear(in_features=4096, out_features=128, bias=True)
    (fc_decoder): Linear(in_features=128, out_features=4096, bias=True)
    (decoder): Sequential(
      (0): ConvTranspose1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): ReLU()
      (2): ConvTranspose1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (3): ReLU()
      (4): ConvTranspose1d(32, 32, kernel_size=(5,), stride=(1,), padding=(2,))
      (5): ReLU()
      (6): ConvTranspose1d(32, 3, kernel_size=(5,), stride=(1,), padding=(2,))
      (7): Sigmoid()

In [39]:
num_classes

8

## Data Fine-Tuned

In [40]:
# combine all client labelled data into one
combined_labelled_data = []
combined_labelled_labels = []
for i in range(numclients):
    for data, labels in clients[i]['labelled']:
        combined_labelled_data.append(data)
        combined_labelled_labels.append(labels)
combined_labelled_data = torch.cat(combined_labelled_data, dim=0)
combined_labelled_labels = torch.cat(combined_labelled_labels, dim=0)
# create dataset and dataloader
combined_labelled_dataset = torch.utils.data.TensorDataset(combined_labelled_data, combined_labelled_labels)
combined_labelled_dataloader = torch.utils.data.DataLoader(combined_labelled_dataset, batch_size=batch_size, shuffle=True)

print(f"combined labelled: {len(combined_labelled_dataloader)}")

combined labelled: 409


### Class Weight

In [41]:
# Count the frequency of each class
class_counts = torch.zeros(num_classes)  # num_classes should be defined based on your dataset
for _, target in combined_labelled_dataloader:
    class_counts += torch.bincount(target, minlength=num_classes)

# Calculate class weights
class_counts += 1  # Add 1 to each class count to avoid division by zero
c_weight = 1. / class_counts
c_weight = c_weight / c_weight.sum() * num_classes
c_weight = c_weight.to(device)

In [42]:
class_counts

tensor([2.7210e+03, 2.0800e+03, 2.4950e+03, 1.9440e+03, 3.3400e+02, 1.2290e+03,
        2.2650e+03, 1.0000e+00])

## Fine-Tuning

In [43]:
def fine_tune_model(model, train_loader, num_epochs=100, learning_rate=5e-5):
    # Assuming class weights are calculated and provided as `class_weights`
    class_weights = torch.tensor(c_weight).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    model.train()
    for epoch in range(num_epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            data = data.permute(0, 2, 1)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


In [44]:
fine_tune_model(model.to(device), combined_labelled_dataloader, num_epochs=200, learning_rate=0.00005)

  class_weights = torch.tensor(c_weight).to(device)


Epoch 1/200, Loss: 1.8132460117340088
Epoch 2/200, Loss: 1.8108965158462524
Epoch 3/200, Loss: 1.7976248264312744
Epoch 4/200, Loss: 1.733414888381958
Epoch 5/200, Loss: 1.579136848449707
Epoch 6/200, Loss: 1.5801359415054321
Epoch 7/200, Loss: 2.2409896850585938
Epoch 8/200, Loss: 1.538578748703003
Epoch 9/200, Loss: 1.9014718532562256
Epoch 10/200, Loss: 1.9450279474258423
Epoch 11/200, Loss: 2.020195484161377
Epoch 12/200, Loss: 1.9509551525115967
Epoch 13/200, Loss: 1.6792662143707275
Epoch 14/200, Loss: 1.7883745431900024
Epoch 15/200, Loss: 2.0542685985565186
Epoch 16/200, Loss: 1.7374002933502197
Epoch 17/200, Loss: 1.767801284790039
Epoch 18/200, Loss: 2.0961222648620605
Epoch 19/200, Loss: 2.188673257827759
Epoch 20/200, Loss: 1.9989677667617798
Epoch 21/200, Loss: 1.4641225337982178
Epoch 22/200, Loss: 1.5918306112289429
Epoch 23/200, Loss: 1.4099600315093994
Epoch 24/200, Loss: 1.9277660846710205
Epoch 25/200, Loss: 2.115424633026123
Epoch 26/200, Loss: 1.4659702777862549
Ep

In [45]:
# method to test the model and get the accuracy and f1 score
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.permute(0, 2, 1)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            y_true.extend(target.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    accuracy = correct / total
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f'Accuracy: {accuracy}, F1 Score: {f1}')
    return accuracy, f1

In [46]:
test_model(model, combined_test_dataloader)

Accuracy: 0.3177535788572128, F1 Score: 0.27874838709888367


(0.3177535788572128, 0.27874838709888367)

weight - 30 | 27.99