In [1]:
import mlflow
import os
import torch
import torch.nn as nn
import torch.optim as optim
import intel_extension_for_pytorch as ipex
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set MLflow tracking URI and experiment
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("FashionMNIST-CNN")

# Parameters
params = {
    "batch_size": 64,
    "epochs": 5,
    "learning_rate": 0.001
}

  import pkg_resources  # noqa: TID251


In [2]:
# Data Loaders
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.FashionMNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.FashionMNIST(root="./data", train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=params["batch_size"], shuffle=False)


In [3]:
# model definition 
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()

        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))  # [batch, 32, 14, 14]
        x = self.pool(self.relu(self.conv2(x)))  # [batch, 64, 7, 7]
        x = x.reshape(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [4]:
# Train and Evaluate the Model 
def train(model, epochs, optimizer, train_loader):
    criterion = nn.CrossEntropyLoss()
    train_losses = []

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in train_loader:
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)

    return model, train_losses

def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in test_loader:
            output = model(x)
            pred = output.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    return correct / total


In [5]:
# Mlflow logging and Execution 
with mlflow.start_run(run_name="CNN_Fashion"):
    try:
        for k, v in params.items():
            mlflow.log_param(k, v)

        model = CNN()
        optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"])
        model, optimizer = ipex.optimize(model, optimizer=optimizer)

        model, train_losses = train(model, params["epochs"], optimizer, train_loader)

        for epoch, loss in enumerate(train_losses, 1):
            mlflow.log_metric("train_loss", loss, step=epoch)

        test_accuracy = evaluate(model, test_loader)
        mlflow.log_metric("test_accuracy", test_accuracy)

        dummy_input = torch.randn(1, 1, 28, 28)
        dummy_output = model(dummy_input)
        signature = mlflow.models.signature.infer_signature(
            dummy_input.detach().numpy(), dummy_output.detach().numpy()
        )

        mlflow.pytorch.log_model(
            pytorch_model=model,
            artifact_path="model",
            signature=signature
        )

        print(f"✅ Final Test Accuracy: {test_accuracy:.4f}")

    except Exception as e:
        print(f"❌ Run failed: {e}")
        mlflow.set_tag("mlflow.runStatus", "FAILED")
        import traceback; traceback.print_exc()
    finally:
        print("✅ MLflow run completed.")




✅ Final Test Accuracy: 0.9070
✅ MLflow run completed.
