In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch

In [2]:
df = pd.read_csv("../../../datasets/Machine Predictive Maintenance Classification/binary_classification.csv", index_col=[0])

In [26]:
X = df.drop(columns='Target')
Y = df['Target']

In [29]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [6]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device("cuda")
        device_name = torch.cuda.get_device_name(0)
        print(f"Using GPU: {device_name}")
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

device = get_device()

Using CPU


In [30]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.reset_index(drop=True)  # Reset indices to avoid indexing issues
        self.y = y.reset_index(drop=True)  # Reset indices to avoid indexing issues

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        try:
            X_tensor = torch.tensor(self.X.iloc[idx].values, dtype=torch.float32)
            y_tensor = torch.tensor(self.y.iloc[idx], dtype=torch.long)
            return X_tensor, y_tensor
        except TypeError:
            self._check_indexing_error(idx)
        except Exception as e:
            print(f"Unexpected error: {e}, Index: {idx}")

    def _check_indexing_error(self, idx):
        if isinstance(idx, (list, tuple, pd.Index)):
            raise IndexError("Invalid index provided. Index should be an integer.")
        raise

In [31]:
train_dataset = CustomDataset(x_train, y_train)
test_dataset = CustomDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

In [9]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
input_size = X.shape[1]
hidden_size = 10
num_classes = 2

model = SimpleNN(input_size, hidden_size, num_classes)

In [10]:
num_parameters = sum(value.numel() for value in model.state_dict().values())
print(f"{num_parameters = }")

num_parameters = 82


In [43]:
def train(net, trainloader, optimizer, epochs):
    """Train the network on the training set."""
    criterion = torch.nn.CrossEntropyLoss()
    net.train()
    for _ in range(epochs):
        for images, labels in trainloader:
            optimizer.zero_grad()
            loss = criterion(net(images), labels)
            loss.backward()
            optimizer.step()
    return net


def test(net, testloader):
    """Validate the network on the entire test set."""
    criterion = torch.nn.CrossEntropyLoss()
    correct, loss = 0, 0.0
    net.eval()
    with torch.no_grad():
        for images, labels in testloader:
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    accuracy = correct / len(testloader.dataset)
    return loss, accuracy


def run_centralised(epochs: int, lr: float, model: SimpleNN, train_dataset: CustomDataset, test_dataset: CustomDataset, momentum: float = 0.9):
# def run_centralised(epochs: int, lr: float, momentum: float = 0.9):
    # instantiate the model
    # model = SimpleNN(input_size, hidden_size, num_classes)

    # define optimiser with hyperparameters supplied
    optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    train_dataset = CustomDataset(x_train, y_train)
    test_dataset = CustomDataset(x_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

    # train for the specified number of epochs
    trained_model = train(model, train_loader, optim, epochs)

    # training is completed, then evaluate model on the test set
    loss, accuracy = test(trained_model, test_loader)
    print(f"{loss = }")
    print(f"{accuracy = }")

In [44]:
run_centralised(epochs=5, lr=0.01, model=model, train_dataset=train_dataset, test_dataset=test_dataset)
# run_centralised(epochs=5, lr=0.01)

loss = 100.25208443822339
accuracy = 0.972


In [45]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[ 7.2073e-01,  1.1847e+00, -6.9887e-01, -2.4762e+00, -8.6059e-01],
                      [-8.6437e-01, -1.7447e-01, -1.4074e+00,  3.8150e+00,  8.0337e-01],
                      [-7.7218e-02,  1.1339e-01,  1.5609e+00, -1.2647e+00, -1.1231e-02],
                      [ 7.0153e-01,  1.0174e+00, -9.8110e-01, -2.1112e+00, -6.0228e-01],
                      [-7.6304e-02,  4.8556e-01, -2.0388e-01, -1.9905e-01, -2.4395e-01],
                      [-3.9132e-01,  3.9288e-01, -2.4111e-01, -3.5332e-01, -2.7727e-01],
                      [ 1.5035e-01, -5.6717e-02, -1.9578e-01, -1.8157e-01,  5.3484e-02],
                      [-1.3667e-01, -2.4926e-01, -1.1325e-01,  8.5793e-02, -4.1751e-01],
                      [-3.4502e-01,  9.9287e-02,  2.5702e-01, -1.7292e-03, -4.3818e-01],
                      [ 6.4855e-03,  7.4033e-01, -4.0296e-01, -1.1668e+00, -4.5637e-01]])),
             ('fc1.bias',
              tensor([ 1.2658, -0.8143, -0.5112,  1.0

### Federated Learning

In [2]:
from torch.utils.data import random_split

# This function partitions the training set into N disjoint subsets, each will become the local dataset of a client. This function also subsequently partitions each traininset partition into train and validation. The test set is left intact and will be used by the central server to asses the performance of the global model.
def prepare_dataset(num_partitions: int, batch_size: int, val_ratio: float = 0.1):
    train_dataset = CustomDataset(x_train, y_train)
    test_dataset = CustomDataset(x_test, y_test)

    num_instances = len(train_dataset) // num_partitions
    partition_len = [num_instances] * num_partitions

    trainsets = random_split(
        train_dataset, partition_len, torch.Generator().manual_seed(42)
    )

    #Creating Dataloaders with train and validation support

    trainloaders = []
    valloaders = []
    for trainset_ in trainsets:
        num_total = len(trainset_)
        num_val = int(val_ratio * num_total)
        num_train = num_total - num_val

        for_train, for_val = random_split(
            trainset_, [num_train, num_val], torch.Generator().manual_seed(42)
        )
        trainloaders.append(
            DataLoader(for_train, batch_size=batch_size, shuffle=True, num_workers=2)
        )
        valloaders.append(
            DataLoader(for_val, batch_size=batch_size, shuffle=False, num_workers=2)
            
        )
    testloader = DataLoader(test_dataset, batch_size=64)

    return trainloaders, valloaders, testloader

In [12]:
trainloaders, valloaders, testloader = prepare_dataset(
    num_partitions=10, batch_size=32
)

In [26]:
# first partition
len(trainloaders[0])

23