In [1]:
%load_ext watermark
%watermark -p torch,pytorch_lightning,torchmetrics,tensorboard

torch            : 1.10.1
pytorch_lightning: 1.5.9
torchmetrics     : 0.7.0
tensorboard      : 2.8.0



  warn(f"Failed to load image Python extension: {e}")


# MLP Classifier -- Cement Dataset

## General settings and hyperparameters

In [2]:
BATCH_SIZE = 64
NUM_EPOCHS = 200
LEARNING_RATE = 0.01
NUM_WORKERS = 0

## Setting up the PyTorch Lightning model

In [3]:
import pytorch_lightning as pl
import torch

import torchmetrics

- Set up model architecture
- Use loggers to track mean absolute errors for both the training and validation set during training; this allows us to select the best model (i.e., the epoch) based on validation set performance

In [4]:
class MultiLayerPerceptron(pl.LightningModule):
    def __init__(self, input_size, hidden_units, num_classes):
        super().__init__()

        self.train_mae = torchmetrics.MeanAbsoluteError()
        self.valid_mae = torchmetrics.MeanAbsoluteError()
        self.test_mae = torchmetrics.MeanAbsoluteError()
        
        all_layers = []
        for hidden_unit in hidden_units:
            layer = torch.nn.Linear(input_size, hidden_unit) 
            all_layers.append(layer) 
            all_layers.append(torch.nn.ReLU()) 
            input_size = hidden_unit
 
        all_layers.append(torch.nn.Linear(hidden_units[-1], num_classes)) 
        all_layers.append(torch.nn.Softmax(dim=1)) 
        self.model = torch.nn.Sequential(*all_layers)

    def forward(self, x):
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = torch.nn.functional.cross_entropy(logits, y)
        self.log("train_loss", loss, on_step=True)
        
        preds = torch.argmax(logits, dim=1)
        self.train_mae.update(preds, y)
        return loss

    def training_epoch_end(self, outs):
        self.log("train_mae", self.train_mae.compute())
        self.train_mae.reset()
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = torch.nn.functional.cross_entropy(logits, y)
        self.log("valid_loss", loss, on_step=True)
        
        preds = torch.argmax(logits, dim=1)
        self.valid_mae.update(preds, y)
        return loss
    
    def validation_epoch_end(self, outs):
        self.log("valid_mae", self.valid_mae.compute())
        self.valid_mae.reset()

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = torch.nn.functional.cross_entropy(logits, y)
        self.log("test_loss", loss)
        
        preds = torch.argmax(logits, dim=1)
        self.test_mae.update(preds, y)
        self.log("test_mae", self.test_mae.compute())
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=LEARNING_RATE)
        return optimizer

## Setting up the dataset

### Inspecting the dataset

In [5]:
import pandas as pd
import numpy as np


data_df = pd.read_csv("datasets/cement_strength.csv", skiprows=1)
data_df["response"] = data_df["response"]-1 # labels should start at 0

data_labels = data_df["response"]
data_features = data_df.loc[:, ["V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8"]]

print('Number of features:', data_features.shape[1])
print('Number of examples:', data_features.shape[0])
print('Labels:', np.unique(data_labels.values))
print('Label distribution:', np.bincount(data_labels))

Number of features: 8
Number of examples: 998
Labels: [0 1 2 3 4]
Label distribution: [196 310 244 152  96]


**Performance Baseline**

In [6]:
avg_prediction = np.median(data_labels.values) # median minimizes MAE
baseline_mae = np.mean(np.abs(data_labels.values - avg_prediction))
print(f'Baseline MAE: {baseline_mae:.2f}')

Baseline MAE: 1.03


### Setting Dataset class

In [7]:
from torch.utils.data import Dataset


class MyDataset(Dataset):

    def __init__(self, feature_array, label_array, dtype=np.float32):

        self.features = feature_array.astype(np.float32)
        self.labels = label_array

    def __getitem__(self, index):
        inputs = self.features[index]
        label = self.labels[index]
        return inputs, label

    def __len__(self):
        return self.labels.shape[0]

### Setting up DataModule

In [8]:
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader


class DataModule(pl.LightningDataModule):
    def __init__(self, data_path='./'):
        super().__init__()
        self.data_path = data_path
        self.transform = None
        
    def prepare_data(self):
        data_df = pd.read_csv(
            os.path.join(self.data_path, 'cement_strength.csv'), skiprows=1)
        data_df["response"] = data_df["response"]-1 # labels should start at 0
        self.data_labels = data_df["response"]
        self.data_features = data_df.loc[:, [
            "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8"]]

    def setup(self, stage=None):
        
        # Split into
        # 70% train, 10% validation, 20% testing
        
        X_temp, X_test, y_temp, y_test = train_test_split(
            self.data_features.values,
            self.data_labels.values,
            test_size=0.2,
            random_state=1,
            stratify=self.data_labels.values)

        X_train, X_valid, y_train, y_valid = train_test_split(
            X_temp,
            y_temp,
            test_size=0.1,
            random_state=1,
            stratify=y_temp)
        
        # Standardize features
        sc = StandardScaler()
        X_train_std = sc.fit_transform(X_train)
        X_valid_std = sc.transform(X_valid)
        X_test_std = sc.transform(X_test)
        
        self.train = MyDataset(X_train_std, y_train)
        self.valid = MyDataset(X_valid_std, y_valid) 
        self.test = MyDataset(X_test_std, y_test)
        

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=64, num_workers=NUM_WORKERS)

    def val_dataloader(self):
        return DataLoader(self.valid, batch_size=64, num_workers=NUM_WORKERS)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=64, num_workers=NUM_WORKERS)
    
    
torch.manual_seed(1) 
data_module = DataModule(data_path='datasets')

In [9]:
data_module.prepare_data()
data_module.setup()
len(data_module.train)

718

### Training the model using the PyTorch Lightning Trainer class

In [10]:
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger


model = MultiLayerPerceptron(
    input_size=data_features.shape[1],
    hidden_units=(32, 16),
    num_classes=np.bincount(data_labels).shape[0])


callbacks = [ModelCheckpoint(save_top_k=1, mode='max', monitor="valid_mae")] # save top 1 model

if torch.cuda.is_available(): # if you have GPUs
    gpus = 1 # number of GPUs to use
else:
    gpus = None

    
logger = CSVLogger(save_dir="logs/", name="mlp-crossentropy-cement", flush_logs_every_n_steps=1)
    
trainer = pl.Trainer(max_epochs=10, callbacks=callbacks, gpus=gpus, logger=logger)
trainer.fit(model=model, datamodule=data_module)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(

  | Name      | Type              | Params
------------------------------------------------
0 | train_mae | MeanAbsoluteError | 0     
1 | valid_mae | MeanAbsoluteError | 0     
2 | test_mae  | MeanAbsoluteError | 0     
3 | model     | Sequential        | 901   
------------------------------------------------
901       Trainable params
0         Non-trainable params
901       Total params
0.004     Total estimated model params size (MB)


                                                                                

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:  86%|██████████▎ | 12/14 [00:00<00:00, 396.27it/s, loss=1.58, v_num=14]
Validating: 0it [00:00, ?it/s][A
Epoch 0: 100%|████████████| 14/14 [00:00<00:00, 353.40it/s, loss=1.58, v_num=14][A
Epoch 1:  86%|██████████▎ | 12/14 [00:00<00:00, 473.43it/s, loss=1.54, v_num=14][A
Validating: 0it [00:00, ?it/s][A
Epoch 1: 100%|████████████| 14/14 [00:00<00:00, 416.62it/s, loss=1.54, v_num=14][A
Epoch 2:  86%|██████████▎ | 12/14 [00:00<00:00, 514.91it/s, loss=1.47, v_num=14][A
Validating: 0it [00:00, ?it/s][A
Epoch 2: 100%|████████████| 14/14 [00:00<00:00, 449.45it/s, loss=1.47, v_num=14][A
Epoch 3:  86%|██████████▎ | 12/14 [00:00<00:00, 520.97it/s, loss=1.42, v_num=14][A
Validating: 0it [00:00, ?it/s][A
Epoch 3: 100%|████████████| 14/14 [00:00<00:00, 445.93it/s, loss=1.42, v_num=14][A
Epoch 4:  86%|██████████▎ | 12/14 [00:00<00:00, 456.83it/s, loss=1.37, v_num=14][A
Validating: 0it [00:00, ?it/s][A
Epoch 4: 100%|████████████| 14/14 [00:00<00:00, 408.06it/s, loss=1.37, v_num=

### Evaluating the model using TensorBoard

In [11]:
trainer.test(model=model, datamodule=data_module, ckpt_path='best')

Restoring states from the checkpoint path at logs/mlp-crossentropy-cement/version_14/checkpoints/epoch=0-step=11.ckpt
Loaded model weights from checkpoint at logs/mlp-crossentropy-cement/version_14/checkpoints/epoch=0-step=11.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 1.5538876056671143, 'test_mae': 0.9420334100723267}
--------------------------------------------------------------------------------
Testing: 100%|███████████████████████████████████| 4/4 [00:00<00:00, 854.67it/s]


[{'test_loss': 1.5538876056671143, 'test_mae': 0.9420334100723267}]