In [1]:
import os
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

import mlflow
import mlflow.pytorch
from torch.utils.tensorboard import SummaryWriter

In [None]:
num_workers = os.cpu_count()
print(f"Number of workers: {num_workers}")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

#### Load dataset

In [4]:
bc = datasets.load_wine()
X, y = bc.data, bc.target

In [5]:
X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.int64))
# y = y.view(y.shape[0], 1)
n_samples, n_features = X.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=1
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, stratify=y_train, test_size=0.2, random_state=1
)

X_train.shape, X_val.shape, X_test.shape

### Prepare Dataset and Dataloader

In [7]:
class LRDataset(Dataset):

    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return len(self.y)

In [8]:
train_ds = LRDataset(X_train, y_train)
val_ds = LRDataset(X_val, y_val)
test_ds = LRDataset(X_test, y_test)

In [9]:
num_workers = 0

train_dl = DataLoader(
    dataset=train_ds, batch_size=32, shuffle=True, num_workers=num_workers
)
val_dl = DataLoader(
    dataset=val_ds, batch_size=32, shuffle=False, num_workers=num_workers
)
test_dl = DataLoader(
    dataset=test_ds, batch_size=32, shuffle=False, num_workers=num_workers
)

### Define MLP

In [10]:
class ImprovedMLP(nn.Module):
    def __init__(self, n_features, n_classes, dropout_prob=0.3):
        super(ImprovedMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(n_features, 32),
            nn.BatchNorm1d(32),  # Batch Normalization
            nn.ReLU(),
            nn.Dropout(dropout_prob),  # Dropout
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),  # Batch Normalization
            nn.ReLU(),
            nn.Dropout(dropout_prob),  # Dropout
            nn.Linear(16, 8),
            nn.BatchNorm1d(8),  # Batch Normalization
            nn.ReLU(),
            nn.Dropout(dropout_prob),  # Dropout
            nn.Linear(8, n_classes),  # Output layer (no activation for logits)
        )

    def forward(self, x):
        return self.layers(x)

#### Instantiate Model and Define Loss & Optimizer

In [11]:
model = ImprovedMLP(n_features=n_features, n_classes=3).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.005)

#### MLOps

In [12]:
class MLOpsHandler:
    def __init__(
        self,
        log_dir="../../runs/mlp_ops",
        model_save_dir="../../runs/models",
        experiment_name="MLP_Experiment",
    ):
        self.writer = SummaryWriter(log_dir=log_dir)
        self.model_save_dir = model_save_dir
        os.makedirs(model_save_dir, exist_ok=True)

        # Initialize MLFlow
        os.makedirs(f"{log_dir}/mlruns", exist_ok=True)
        mlflow.set_tracking_uri(f"{log_dir}/mlruns")
        mlflow.set_experiment(experiment_name)
        self.run = mlflow.start_run()

    def log_metrics(self, metrics, epoch):
        """Logs metrics to TensorBoard and MLFlow."""
        for key, value in metrics.items():
            self.writer.add_scalar(key, value, epoch)
            mlflow.log_metric(key, value, step=epoch)

    def log_hyperparameters(self, params):
        """Logs hyperparameters to MLFlow."""
        mlflow.log_params(params)

    def save_model(self, model, epoch):
        """Saves the model checkpoint and logs it to MLFlow."""
        model_path = os.path.join(self.model_save_dir, f"model_epoch_{epoch}.pth")
        torch.save(model.state_dict(), model_path)
        mlflow.pytorch.log_model(model, artifact_path=f"model_epoch_{epoch}")

    def load_model(self, model, model_path):
        """Loads a model checkpoint."""
        model.load_state_dict(torch.load(model_path))

    def close(self):
        """Closes the TensorBoard writer and MLFlow run."""
        self.writer.close()
        mlflow.end_run()

#### Run training

In [None]:
mlops_handler = MLOpsHandler()

In [14]:
verbose = 5
n_epochs = 100
train_losses = []
val_losses = []

In [None]:
for epoch in tqdm(range(n_epochs)):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_dl:
        # Move data to device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Forward pass
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)

        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        epoch_loss += loss.item()

    epoch_loss /= len(train_dl)
    train_losses.append(epoch_loss)

    # Validation loss calculation
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for X_val_batch, y_val_batch in val_dl:
            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            y_val_pred = model(X_val_batch)
            val_loss += loss_fn(y_val_pred, y_val_batch).item()

        val_loss /= len(val_dl)
        val_losses.append(val_loss)

    mlops_handler.log_metrics({"train_loss": epoch_loss, "val_loss": val_loss}, epoch)
    if verbose and (epoch + 1) % verbose == 0:
        print(
            f"Epoch {epoch + 1}/{n_epochs} | Training Loss: {epoch_loss:.4f} | Validation Loss: {val_loss:.4f}"
        )
        mlops_handler.save_model(model, epoch)

print(f"Final Training Loss: {epoch_loss:.4f} | Validation Loss: {val_loss:.4f}")
mlops_handler.close()