# Prequisites

In [1]:
import mlflow
import torchvision
from torchvision import transforms, datasets 
import torch
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torchinfo import summary
from mlflow.models import infer_signature
import numpy as np

https://mlflow.org/docs/latest/ml/deep-learning/pytorch/guide

Command to run in the parent "mlruns" folder - **mlflow server --host 127.0.0.1 --port 8080**

# Data

In [2]:
train_data_path = "..//..//train"

transformations = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

train_data = torchvision.datasets.ImageFolder(train_data_path, transform=transformations)

val_data_path = "..//..//val"
val_data = torchvision.datasets.ImageFolder(val_data_path, transform=transformations)

test_data_path = "..//..//test"
test_data = torchvision.datasets.ImageFolder(test_data_path, transform=transformations)

# Dataloader

batch_size = 1024 #Num of images sent to the network once before updating it

train_data_loader = DataLoader(train_data, batch_size=batch_size)
val_data_loader = DataLoader(val_data, batch_size=batch_size)
test_data_loader = DataLoader(test_data, batch_size=batch_size)

num_classes = 151

# MobileNetV3

In [3]:
model1 = torchvision.models.mobilenet_v3_small(weights=torchvision.models.MobileNet_V3_Small_Weights.DEFAULT)

In [4]:
for layer in model1.parameters():
    layer.requires_grad = False

for name, layer in model1.named_parameters():
    if "classifier" in name:
        layer.requires_grad = True

in_features = model1.classifier[3].in_features 
model1.classifier[3] = torch.nn.Linear(in_features, num_classes)
model1.classifier[3].requires_grad_ = True

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model1 = model1.to(device)

In [5]:
#setting up training parameters 

params = { 
    "epochs" : 5,
    "learning_rate" : 0.001,
    "batch_size" : batch_size,
    "optimizer" : "SGD",
    "model_type" : "Mobile_Net_V3_Small",
    "pretrained" : "True",
    "frozen_layers" : "True",
    "classifier_out_features" : num_classes
}

In [6]:
#Mlflow Training and logging

with mlflow.start_run(run_name="mobilenet_v3_small"):
    
    # 1 -> Logging the parameters
    mlflow.log_params(params) 

    # 2. -> Model preparation
    model = model1
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.classifier.parameters(), lr=params["learning_rate"])

    # 3 -> Log Model Architecture
    with open("mobilenet_v3_small_summary.txt", "w") as f:
        f.write(str(summary(model, input_size = (1, 3, 64, 64))))
    mlflow.log_artifact("mobilenet_v3_small_summary.txt")

    # 4 -> Training loop + Metric Logging
    for epoch in tqdm(range(params['epochs'])):
        model.train()
        train_loss = 0 
        correct = 0 
        total = 0

        for batch_idx, (data, target) in enumerate(train_data_loader):
            data = data.to(device)
            target = target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

            if batch_idx % 100: #logging every 100 batches
                batch_loss = train_loss / (batch_idx + 1)
                batch_acc = 100 * correct/total # Percentage
                mlflow.log_metrics({"batch_loss" : batch_loss,
                                    "batch_accuracy" : batch_acc},
                                    step = epoch*len(train_data_loader) + batch_idx,)
                
            epoch_loss = train_loss / len(train_data_loader)
            epoch_acc = 100 * correct/total

            model.eval()
            val_loss = 0 
            val_correct = 0 
            val_total = 0

        with torch.no_grad():
            for data, target in val_data_loader:
                data = data.to(device)
                target = target.to(device)
                output = model(data)
                loss = loss_fn(output, target)
                val_loss += loss.item()
                _, predicted = output.max(1)
                val_total += target.size(0)
                val_correct += predicted.eq(target).sum().item()

        val_loss = val_loss / len(val_data_loader)
        val_acc = 100 * val_correct/val_total 
        mlflow.log_metrics({"train_loss" : epoch_loss,
                            "train_accuracy" : epoch_acc,
                            "val_loss" : val_loss,
                            "val_accuracy" : val_acc},
                            step = epoch
                            )
        print(
            f"Epoch : {epoch+1}/{params['epochs']}",
            f"Train Loss : {epoch_loss:.4f}, Train Accuracy : {epoch_acc:.4f}%",
            f"Val Loss : {val_loss:.4f}, Val Accuracy : {val_acc:.4f}%"
            )
            
            #Logging the trained model 
            # model_info = mlflow.pytorch.log_model(model, name = "mobilenet_v3_small")

    model.eval()
    test_loss = 0
    test_correct = 0 
    test_total = 0 

    with torch.no_grad():
        for data, target in test_data_loader:
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            loss = loss_fn(output, target)

            test_loss += loss.item()
            _, predicted = output.max(1)
            test_total += target.size(0)
            test_correct += predicted.eq(target).sum().item()

    test_loss = test_loss / len(test_data_loader)
    test_acc = 100 * test_correct / test_total 
    mlflow.log_metrics({"test_loss" : test_loss,
                        "test_accuracy" : test_acc})
    print(f"Final Test Acc : {test_acc:.2f}%")

    sample_input = torch.randn(1, 3, 64, 64).to(device)
    output = model(sample_input)
    sample_output = output.cpu().detach().numpy()
    signature = infer_signature(sample_input.cpu().numpy(), sample_output)
    model_info = mlflow.pytorch.log_model(model,
                                          #artifact_path="mobilenet_v3_small", 
                                          name = "mobilenet_v3_small", 
                                          registered_model_name="mobilenet_v3_small",
                                          signature=signature)
            

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch : 1/5 Train Loss : 5.1343, Train Accuracy : 0.3839% Val Loss : 5.1048, Val Accuracy : 0.6623%
Epoch : 2/5 Train Loss : 5.1242, Train Accuracy : 0.4031% Val Loss : 5.1001, Val Accuracy : 0.5887%
Epoch : 3/5 Train Loss : 5.1156, Train Accuracy : 0.4031% Val Loss : 5.0957, Val Accuracy : 0.8094%
Epoch : 4/5 Train Loss : 5.1066, Train Accuracy : 0.4991% Val Loss : 5.0913, Val Accuracy : 0.8830%
Epoch : 5/5 Train Loss : 5.0986, Train Accuracy : 0.5375% Val Loss : 5.0871, Val Accuracy : 1.1038%




Final Test Acc : 0.84%


Successfully registered model 'mobilenet_v3_small'.
Created version '1' of model 'mobilenet_v3_small'.


In [None]:
sample_input.cpu().numpy()