# Building and Managing a Simple CNN Model for MNIST with Pytorch and MLflow

## Prerequisites

In [5]:
# Before running the code, make sure you have the required libraries installed:
# !pip install torch torchvision mlflow

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import mlflow
import mlflow.pytorch
import os

## Model Definition
- A simple CNN model with two convolutional layers followed by fully connected layers.

In [3]:
# Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

## Data Loading
- MNIST dataset is loaded and transformed.

In [2]:
# Define hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 5

# Load the dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






## Experiment Tracking
### Training Loop
- Model training is performed for a specified number of epochs, logging the loss periodically.
### Model Evaluation
- The model's accuracy is computed on the test set and logged.
### MLflow Logging
- Parameters, metrics, and the trained model are logged using MLflow.

In [4]:
# Initialize the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# MLflow experiment setup
mlflow.set_experiment("MNIST_CNN_Experiment")

with mlflow.start_run():
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("num_epochs", num_epochs)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if (i+1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

        # Log the running loss
        mlflow.log_metric("loss", running_loss / len(train_loader), step=epoch)

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')
        mlflow.log_metric("accuracy", accuracy)

    # Save the model
    model_path = "mnist_cnn_model.pth"
    torch.save(model.state_dict(), model_path)
    mlflow.log_artifact(model_path)

    # Log the model with MLflow
    mlflow.pytorch.log_model(model, "model")

print("Training complete and model saved with MLflow.")

2024/05/26 19:27:37 INFO mlflow.tracking.fluent: Experiment with name 'MNIST_CNN_Experiment' does not exist. Creating a new experiment.


Epoch [1/5], Step [100/938], Loss: 0.1898
Epoch [1/5], Step [200/938], Loss: 0.0753
Epoch [1/5], Step [300/938], Loss: 0.1094
Epoch [1/5], Step [400/938], Loss: 0.0642
Epoch [1/5], Step [500/938], Loss: 0.0625
Epoch [1/5], Step [600/938], Loss: 0.0199
Epoch [1/5], Step [700/938], Loss: 0.0268
Epoch [1/5], Step [800/938], Loss: 0.0257
Epoch [1/5], Step [900/938], Loss: 0.0589
Epoch [2/5], Step [100/938], Loss: 0.0199
Epoch [2/5], Step [200/938], Loss: 0.0760
Epoch [2/5], Step [300/938], Loss: 0.0219
Epoch [2/5], Step [400/938], Loss: 0.0085
Epoch [2/5], Step [500/938], Loss: 0.0050
Epoch [2/5], Step [600/938], Loss: 0.0710
Epoch [2/5], Step [700/938], Loss: 0.0397
Epoch [2/5], Step [800/938], Loss: 0.2745
Epoch [2/5], Step [900/938], Loss: 0.0465
Epoch [3/5], Step [100/938], Loss: 0.0607
Epoch [3/5], Step [200/938], Loss: 0.0131
Epoch [3/5], Step [300/938], Loss: 0.0750
Epoch [3/5], Step [400/938], Loss: 0.0068
Epoch [3/5], Step [500/938], Loss: 0.0070
Epoch [3/5], Step [600/938], Loss:

## Run MLflow Server

In [None]:
# Building and Managing a Simple CNN Model for MNIST with PyTorch and MLflow
# !mlflow ui