# This notebook explores how to train an MLP model to solve the HAR task

##  1. Instantiating the datamodule object and preparing the data

In [1]:
from data_modules.har import HarDataModule

# Instantiating the HarDataModule with root dir at data/example
my_datamodule = HarDataModule(root_data_dir="data/har", 
                              flatten = True, 
                              target_column = "standard activity code", 
                              batch_size=16)

data/har/train.csv file is missing
Creating the root data directory: [data/har]
Could not find the zip file [data/har/har.zip]
Trying to download it.
Data downloaded and extracted


In [2]:
# Once the object is instantiated, we shall invoke the prepare_data() method to ensure the data is downloaded and extracted at the root_data_dir. Once you execute this command, you may inspect the data/example directory for CSV data files.
my_datamodule.prepare_data()

## 2. Retrieving the training and validation set dataloaders

Besides defining the batch size, the data module manages the data loaders for the training, validation, and test partitions. 
The next command shows how to retrieve the training and validation data loaders.

In [3]:
# Retrieve the training set dataloader (dl)
train_dl = my_datamodule.train_dataloader()
val_dl = my_datamodule.val_dataloader()

## 3. Creating the ML model



In [4]:
from models.mlp import MultiLayerPerceptron

model = MultiLayerPerceptron(input_features = 360, 
                             hidden_size = 64,
                             num_classes = 6)
print(model)

MultiLayerPerceptron(
  (block): Sequential(
    (0): Linear(in_features=360, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=6, bias=True)
  )
  (loss): CrossEntropyLoss()
)


## 4. Test the untrained model

First, lets test the model in a very detailed way, so we can inspect the shape of the tensors being processed and produced.
NOTE: the model has not been trained yet, hence, we expect very poor results.


In [5]:
import torch

# Initialize counters:
number_of_predictions = 0
number_of_correct_predictions = 0

# For each batch, compute the predictions and compare with the labels.
for batch_idx, (X, y) in enumerate(train_dl):
    print(f"-- Processing the batch {batch_idx}")

    # X and y contains the input features and the expected labels for the B samples in the batch
    # Lets print their shape
    print(" X.shape = ", X.shape)
    print(" y.shape = ", y.shape)

    # Invoke the mlp model produces the logits. Logits are the raw scores output by the last layer 
    # of the neural network before applying the softmax function to convert them into probabilities. 
    # In this case, it is an array with 6 raw scores.
    # Note: the following code invokes the forward() method from the MultiLayerPerceptron class
    logits = model(X)
    print(" logits.shape = ", logits.shape)

    # We'll utilize the argmax function to determine the index of the array position with the highest score. 
    # For instance, in the array argmax([2, 8, 15, 0, 3, 9]) == 2, as the value 15 is located at index 2
    predictions = torch.argmax(logits, dim=1)  

    # Now, we have predictions for each one of the B input samples. 
    # Lets print the shape of the predictions tensor and print the predictions themselves.    
    print(" predictions.shape = ", predictions.shape)
    print(" predictions = ", predictions)

    # Next, lets compare the predictions against the expected labels
    correct = (predictions == y)
    print(" labels      = ", y)
    print(" correct     = ", correct)

    # Finally, lets count the total number of correct predictions and the total number of predictions.
    number_of_predictions += int(X.shape[0]) # Number of elements in the batch
    number_of_correct_predictions += correct.sum() # Number of True values in correct

print(f"Total number of predictions   = {number_of_predictions}")
print(f"Number of correct predictions = {number_of_correct_predictions}")
print("Accuracy = ", number_of_correct_predictions/number_of_predictions)



-- Processing the batch 0
 X.shape =  torch.Size([16, 360])
 y.shape =  torch.Size([16])
 logits.shape =  torch.Size([16, 6])
 predictions.shape =  torch.Size([16])
 predictions =  tensor([0, 3, 0, 4, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0])
 labels      =  tensor([2, 5, 5, 4, 2, 2, 0, 2, 0, 2, 1, 2, 0, 3, 4, 4])
 correct     =  tensor([False, False, False,  True, False, False, False,  True, False, False,
        False, False, False, False, False, False])
-- Processing the batch 1
 X.shape =  torch.Size([16, 360])
 y.shape =  torch.Size([16])
 logits.shape =  torch.Size([16, 6])
 predictions.shape =  torch.Size([16])
 predictions =  tensor([2, 3, 2, 2, 0, 0, 0, 3, 4, 2, 3, 4, 3, 2, 4, 3])
 labels      =  tensor([0, 5, 2, 1, 3, 3, 3, 3, 5, 1, 5, 3, 2, 0, 5, 5])
 correct     =  tensor([False, False,  True, False, False, False, False,  True, False, False,
        False, False, False, False, False, False])
-- Processing the batch 2
 X.shape =  torch.Size([16, 360])
 y.shape =  torch.Size([16])

Take some time to inspect the output of the previous code block and make sure it makes sense.
Next, lets perform the same test, but without printing all the intermediate values.
We will also encapsulate the code in a function so we can reuse it later.

In [6]:
def evaluate_model(model, dataset_dl):
    # Initialize counters:
    number_of_predictions = 0
    number_of_correct_predictions = 0

    # For each batch, compute the predictions and compare with the labels.
    for X, y in dataset_dl:
        logits = model(X)
        predictions = torch.argmax(logits, dim=1)  
        correct = (predictions == y)
        number_of_predictions += int(X.shape[0])
        number_of_correct_predictions += correct.sum()
    # Return a tuple with the number of correct predictions and the total number of predictions
    return (int(number_of_correct_predictions), int(number_of_predictions))

def report_accuracy(model, dataset_dl, prefix=""):
    number_of_correct_predictions, number_of_predictions = evaluate_model(model, dataset_dl)
    print(prefix+"Accuracy = {:0.2f} % ({}/{})".format(100*number_of_correct_predictions/number_of_predictions,
                                             number_of_correct_predictions, 
                                             number_of_predictions))

In [7]:
report_accuracy(model, train_dl, prefix="Training dataset ")
report_accuracy(model, val_dl, prefix="Validation dataset ")

Training dataset Accuracy = 10.00 % (6/60)
Validation dataset Accuracy = 4.17 % (1/24)


## 5. Train the model

In this section, we will explore how a model is trained with pytorch.

First, lets define the optimizer and the loss function.

In [8]:
# We will employ pytorch SGD optimizer. We must provide it with the model parameters and the learning rate.
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)

Now, lets instanciate the loss function. In this case, we will employ the pytorch CrossEntropyLoss function.

In [9]:
loss_fn = torch.nn.CrossEntropyLoss()

Training the model involves:

- Making predictions on the training samples.
- Calculating the gradients with respect to the loss functions.
- Adjusting the model parameters by applying the gradients scaled by the learning rate.

Lets first try it on a single batch.

In [10]:
# Fetch the first batch
first_batch = next(iter(train_dl))
# Fetch the first element on the batch (X_0, y_0)
X, y = first_batch
# Print the shape and contents of the batch
print("X.shape = ", X.shape)
print("y.shape = ", y.shape)

X.shape =  torch.Size([16, 360])
y.shape =  torch.Size([16])


In [11]:
# Set the model in train mode -- this makes the forward pass store intermediate results that are required by the backward pass.
model.train()

# Perform the predictions (forward pass)
logits = model(X)

# Compute the loss
loss = loss_fn(logits, y)

# Reset the gradient values. The gradients are stored next to the model parameters, 
# on the same tensor objects, but on different attributes.
optimizer.zero_grad()

# Perform the backwards pass -- it computes and accumulates the gradients
loss.backward()

# Adjust the model weights according to the computed gradients and the learning rate.
optimizer.step()

# Evaluate the model
report_accuracy(model, train_dl)

Accuracy = 31.67 % (19/60)


The previous code compute the gradients and updated the model weights using only a subset (a batch) of the training set. 

The next code shows how to train the model using all the batches and for multiple epochs. 
Notice that each epoch consists of training the model with all batches in the dataset (hence, the second loop).

In [12]:
number_of_epochs = 10

for epoch in range(number_of_epochs):
    print(f" ** Epoch {epoch} **")

    # Set the model in train mode. 
    model.train()

    for X,y in train_dl:
        # Perform the predictions (forward pass)
        logits = model(X)
        # Compute the loss
        loss = loss_fn(logits, y)
        # Reset the gradient values.
        optimizer.zero_grad()
        # Perform the backwards pass
        loss.backward()
        # Adjust the model weights according to the computed gradients and the learning rate.
        optimizer.step()
    
    # Set the model in evaluation mode for faster evaluation
    model.eval()
    report_accuracy(model, train_dl, prefix="   Training ")
    report_accuracy(model, val_dl, prefix="   Validation ")

 ** Epoch 0 **
   Training Accuracy = 75.00 % (45/60)
   Validation Accuracy = 33.33 % (8/24)
 ** Epoch 1 **
   Training Accuracy = 78.33 % (47/60)
   Validation Accuracy = 37.50 % (9/24)
 ** Epoch 2 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 33.33 % (8/24)
 ** Epoch 3 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 33.33 % (8/24)
 ** Epoch 4 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 37.50 % (9/24)
 ** Epoch 5 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 37.50 % (9/24)
 ** Epoch 6 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 37.50 % (9/24)
 ** Epoch 7 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 37.50 % (9/24)
 ** Epoch 8 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 33.33 % (8/24)
 ** Epoch 9 **
   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 33.33 % (8/24)


Our model has been trained for 10 epochs. Notice that the training accuracy increased while the validation accuracy might have increased up to a point and then decreased again (hint: this might characterize overfitting). 

You might want to change the previous code to record the loss and accuracy values so you can plot them.

## 6. Train the model using [Pytorch Lightning](https://lightning.ai/docs/pytorch/stable/)

[Pytorch Lightning](https://lightning.ai/docs/pytorch/stable/) provides an API to facilitate training ML models with Pytorch.

Before continuing, please, read the [Lightning in 15 minues](https://lightning.ai/docs/pytorch/stable/starter/introduction.html) section of the PyTorch Lightning framework.

Lets create a LightningModule with our model.

In [13]:
import lightning as L
from models.mlp import MultiLayerPerceptron

# define the lightning module to train our model
class LitModule(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = MultiLayerPerceptron(input_features = 360, 
                             hidden_size = 64,
                             num_classes = 6)
        self.loss_fn = torch.nn.CrossEntropyLoss()

    def training_step(self, batch, batch_idx):
        # Training_step defines the train loop.
        # Just compute and return the loss. The trainer will take care of zeroing 
        # the gradient, computing the gradients and updating the weights.
        X, y = batch
        logits = self.model(X)
        # Compute the loss
        loss = self.loss_fn(logits, y)
        # Logging to TensorBoard (if installed) by default
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(params=self.model.parameters(), lr=0.1)
        return optimizer

In [14]:
import torch

# Create the model
model = MultiLayerPerceptron(input_features = 360, 
                             hidden_size = 64,
                             num_classes = 6)

# Create the lightning module and initialize it with our model.
lit_model = LitModule(model)

In [15]:
# Train the model
#  - We will use the train data loader
trainer = L.Trainer(max_epochs=10, accelerator="cpu", log_every_n_steps=1)
trainer.fit(model=lit_model, train_dataloaders=train_dl)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/homebrew/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name    | Type                 | Params
-------------------------------------------------
0 | model   | MultiLayerPerceptron | 23.5 K
1 | loss_fn | CrossEntropyLoss     | 0     
-------------------------------------------------
23.5 K    Trainable params
0         Non-trainable params
23.5 K    Total params
0.094     Total estimated model params size (MB)
/opt/homebrew/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [16]:
model = lit_model.model

model.eval()
report_accuracy(model, train_dl, prefix="   Training ")
report_accuracy(model, val_dl, prefix="   Validation ")

   Training Accuracy = 83.33 % (50/60)
   Validation Accuracy = 33.33 % (8/24)
