In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
import copy
from scipy.interpolate import CubicSpline
import torch.optim as optim
import torch.nn as nn
import torch
from torch.optim import Adam
from scipy.fftpack import fft, ifft
from scipy.stats import mode
from torch.utils.data import DataLoader, TensorDataset
import datetime
from sklearn.metrics import f1_score

In [2]:
num_epochs = 200
batch_size = 32  # Set your batch size
learning_rate_client = 0.001
local_epochs = 1
subject_dir = 'FL_Data/windowed_data_refused/subject_'  # Set your directory to the subject data
numclients = 54
num_classes = 9

#current timestamp
current_time = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda', index=0)

In [4]:
# set the seed
torch.manual_seed(420)
np.random.seed(420)

In [5]:
def load_data_client(id, batch_size=batch_size, type='labelled_train'):
    # Load the data
    data = np.load(subject_dir + str(id) + '/windowed_' + type + '_x.npy')
    labels = np.load(subject_dir + str(id) + '/windowed_' + type + '_y.npy')

    # print shape of data
    # print(data.shape)
    # print(labels.shape)

    # Convert to torch tensor
    data = torch.from_numpy(data).float()
    labels = torch.from_numpy(labels).long()

    # Create a dataset
    dataset = torch.utils.data.TensorDataset(data, labels)

    # Create a dataloader
    if type == 'labelled_train' or type == 'unlabelled_train':
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    else:
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    return dataloader

In [6]:
labelled_data = []

for i in range(numclients):
    data_label = load_data_client(id= i, batch_size=batch_size, type='labelled_train')
    print(f"subject id: {i}, len: {len(data_label)}")
    labelled_data.append(data_label)

subject id: 0, len: 12
subject id: 1, len: 11
subject id: 2, len: 12
subject id: 3, len: 10
subject id: 4, len: 13
subject id: 5, len: 12
subject id: 6, len: 3
subject id: 7, len: 4
subject id: 8, len: 2
subject id: 9, len: 11
subject id: 10, len: 9
subject id: 11, len: 12
subject id: 12, len: 11
subject id: 13, len: 13
subject id: 14, len: 11
subject id: 15, len: 10
subject id: 16, len: 13
subject id: 17, len: 12
subject id: 18, len: 11
subject id: 19, len: 11
subject id: 20, len: 11
subject id: 21, len: 11
subject id: 22, len: 9
subject id: 23, len: 12
subject id: 24, len: 2
subject id: 25, len: 2
subject id: 26, len: 2
subject id: 27, len: 2
subject id: 28, len: 2
subject id: 29, len: 2
subject id: 30, len: 2
subject id: 31, len: 2
subject id: 32, len: 2
subject id: 33, len: 2
subject id: 34, len: 2
subject id: 35, len: 2
subject id: 36, len: 2
subject id: 37, len: 2
subject id: 38, len: 2
subject id: 39, len: 2
subject id: 40, len: 2
subject id: 41, len: 2
subject id: 42, len: 2
su

In [7]:
# combine all client labelled data into one
combined_labelled_data = []
combined_labelled_labels = []
for i in range(numclients):
    for data, labels in labelled_data[i]:
        combined_labelled_data.append(data)
        combined_labelled_labels.append(labels)
combined_labelled_data = torch.cat(combined_labelled_data, dim=0)
combined_labelled_labels = torch.cat(combined_labelled_labels, dim=0)
# create dataset and dataloader
combined_labelled_dataset = torch.utils.data.TensorDataset(combined_labelled_data, combined_labelled_labels)
combined_labelled_dataloader = torch.utils.data.DataLoader(combined_labelled_dataset, batch_size=batch_size, shuffle=True)

print(f"combined labelled: {len(combined_labelled_dataloader)}")

combined labelled: 273


In [8]:
# combine all unlabelled data into one
unlabelled_data = []
for i in range(numclients):
    data = load_data_client(id= i, batch_size=batch_size, type='unlabelled_train')
    unlabelled_data.append(data)

combined_unlabelled_data = []
combined_unlabelled_labels = []
for i in range(numclients):
    for data, labels in unlabelled_data[i]:
        combined_unlabelled_data.append(data)
        combined_unlabelled_labels.append(labels)
combined_unlabelled_data = torch.cat(combined_unlabelled_data, dim=0)
combined_unlabelled_labels = torch.cat(combined_unlabelled_labels, dim=0)
# create dataset and dataloader
combined_unlabelled_dataset = torch.utils.data.TensorDataset(combined_unlabelled_data, combined_unlabelled_labels)
combined_unlabelled_dataloader = torch.utils.data.DataLoader(combined_unlabelled_dataset, batch_size=batch_size, shuffle=True)

print(f"combined unlabelled: {len(combined_unlabelled_dataloader)}")

combined unlabelled: 1092


In [9]:
# combine all test data into one
test_data = []
for i in range(numclients):
    data = load_data_client(id= i, batch_size=batch_size, type='test')
    test_data.append(data)

combined_test_data = []
combined_test_labels = []
for i in range(numclients):
    for data, labels in test_data[i]:
        combined_test_data.append(data)
        combined_test_labels.append(labels)
combined_test_data = torch.cat(combined_test_data, dim=0)
combined_test_labels = torch.cat(combined_test_labels, dim=0)
# create dataset and dataloader
combined_test_dataset = torch.utils.data.TensorDataset(combined_test_data, combined_test_labels)
combined_test_dataloader = torch.utils.data.DataLoader(combined_test_dataset, batch_size=batch_size, shuffle=False)

print(f"combined test: {len(combined_test_dataloader)}")

combined test: 341


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNFeatureExtractor(nn.Module):
    def __init__(self, num_classes=4):
        super(CNNFeatureExtractor, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * 12, 128)  # Adjust the input features according to your final conv layer output
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [11]:
# create training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.transpose(1, 2)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [12]:
# create testing function
def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    #calculate accuracy
    correct = 0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs = inputs.transpose(1, 2)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            #calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    accuracy = correct / len(test_loader.dataset)
    return running_loss / len(test_loader), accuracy

In [13]:
# create function to train and test model
def train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs):
    train_losses = []
    test_losses = []
    test_accuracies = []
    for epoch in range(num_epochs):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        test_loss, test_accuracy = test(model, test_loader, criterion, device)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
        print(f"Epoch: {epoch + 1}/{num_epochs}.. Train Loss: {train_loss:.3f}.. Test Loss: {test_loss:.3f}.. Test Accuracy: {test_accuracy:.3f}")
    return train_losses, test_losses, test_accuracies

In [14]:
num_classes = 4

model = CNNFeatureExtractor(num_classes=num_classes)

# move model to GPU if available
model.to(device)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# train and test model
num_epochs = 100
train_losses, test_losses, test_accuracies = train_and_test(model, combined_unlabelled_dataloader, combined_unlabelled_dataloader, criterion, optimizer, device, num_epochs)

Epoch: 1/100.. Train Loss: 1.389.. Test Loss: 1.387.. Test Accuracy: 0.255
Epoch: 2/100.. Train Loss: 1.387.. Test Loss: 1.387.. Test Accuracy: 0.256
Epoch: 3/100.. Train Loss: 1.386.. Test Loss: 1.385.. Test Accuracy: 0.263
Epoch: 4/100.. Train Loss: 1.386.. Test Loss: 1.385.. Test Accuracy: 0.268
Epoch: 5/100.. Train Loss: 1.385.. Test Loss: 1.386.. Test Accuracy: 0.256
Epoch: 6/100.. Train Loss: 1.384.. Test Loss: 1.385.. Test Accuracy: 0.261
Epoch: 7/100.. Train Loss: 1.384.. Test Loss: 1.383.. Test Accuracy: 0.269
Epoch: 8/100.. Train Loss: 1.383.. Test Loss: 1.382.. Test Accuracy: 0.286
Epoch: 9/100.. Train Loss: 1.383.. Test Loss: 1.381.. Test Accuracy: 0.281
Epoch: 10/100.. Train Loss: 1.382.. Test Loss: 1.381.. Test Accuracy: 0.287
Epoch: 11/100.. Train Loss: 1.381.. Test Loss: 1.381.. Test Accuracy: 0.271
Epoch: 12/100.. Train Loss: 1.381.. Test Loss: 1.382.. Test Accuracy: 0.259
Epoch: 13/100.. Train Loss: 1.380.. Test Loss: 1.382.. Test Accuracy: 0.278
Epoch: 14/100.. Train

In [15]:
# method to test the model and get the accuracy and f1 score
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.permute(0, 2, 1)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            y_true.extend(target.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    accuracy = correct / total
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f'Accuracy: {accuracy}, F1 Score: {f1}')
    return accuracy, f1

In [16]:
def fine_tune_model(model, train_loader, test_loader, num_epochs=200):
    # Assuming class weights are calculated and provided as `class_weights`
    # class_weights = torch.tensor(c_weight).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    # optimizer = torch.optim.Adam(model.fc2.parameters(), lr=0.001)
    
    model.train()
    for epoch in range(num_epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            data = data.permute(0, 2, 1)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        acc, f1 = test_model(model, test_loader)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Accuracy: {acc}, F1 Score: {f1}')

In [17]:
# create model fine tuning
model_tuned = copy.deepcopy(model)

 # Freezing layers up to conv3
for name, param in model_tuned.named_parameters():
    if 'conv3' in name:
        break
    param.requires_grad = False

# Unfreeze layers from conv3 onwards
unfreeze = False
for name, param in model_tuned.named_parameters():
    if 'conv3' in name:
        unfreeze = True
    if unfreeze:
        param.requires_grad = True

model_tuned.fc2 = nn.Linear(in_features=model_tuned.fc2.in_features, out_features=9)
model_tuned.to(device)
print(model_tuned)

fine_tune_model(model_tuned.to(device), combined_labelled_dataloader, combined_test_dataloader,num_epochs=50)



CNNFeatureExtractor(
  (conv1): Conv1d(3, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv2): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=3072, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=9, bias=True)
)
Accuracy: 0.34355771879878777, F1 Score: 0.2850249148646848
Epoch 1/50, Loss: 1.8627986907958984, Accuracy: 0.34355771879878777, F1 Score: 0.2850249148646848
Accuracy: 0.35981265497290843, F1 Score: 0.3075872788934069
Epoch 2/50, Loss: 2.348477840423584, Accuracy: 0.35981265497290843, F1 Score: 0.3075872788934069
Accuracy: 0.36816971255395353, F1 Score: 0.3201810610845825
Epoch 3/50, Loss: 1.1873211860656738, Accuracy: 0.36816971255395353, F1 Score: 0.3201810610845825
Accuracy: 0.3825879327761962, F1 Sc