In [48]:
DATASET_PATH = "../datasets/mobilenet_bowl"
MODEL_NAME = "mobilenetv2_bowl_level_classifier"
CLASS_NAMES = ["bowl_empty", "bowl_full", "bowl_half"]

In [49]:
######################
#### TRAINING LOOP
######################

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define transforms for training and validation
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load datasets with the defined transforms
train_dataset = datasets.ImageFolder(f"{DATASET_PATH}/train", transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(f"{DATASET_PATH}/val", transform=data_transforms['val'])
test_dataset = datasets.ImageFolder(f"{DATASET_PATH}/test", transform=data_transforms['test'])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# Load pretrained MobileNetV3 Large
# model = models.mobilenet_v3_large(pretrained=True)
# model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1)
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace the classifier for 3-class classification
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 3)  # 3 output classes
# model.classifier[3] = nn.Linear(model.classifier[3].in_features, 3)  # 3 output classes
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

# Save the model
torch.save(model.state_dict(), f"{MODEL_NAME}.pth")

Using device: cuda
Epoch 1, Loss: 0.9021
Epoch 2, Loss: 0.4751
Epoch 3, Loss: 0.3146
Epoch 4, Loss: 0.2470
Epoch 5, Loss: 0.2032
Epoch 6, Loss: 0.1852
Epoch 7, Loss: 0.1399
Epoch 8, Loss: 0.1441
Epoch 9, Loss: 0.1439
Epoch 10, Loss: 0.1286


In [50]:
######################
#### GENERATE METRICS 
######################
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, models
import cv2
from PIL import Image
import os
import random
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the trained MobileNetV2 model
model = models.mobilenet_v2(weights=None)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(CLASS_NAMES))   # 3 output classes
model.load_state_dict(torch.load(f"{MODEL_NAME}.pth", map_location=device))
model = model.to(device)
model.eval()

# Preprocess for MobileNetV2
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Class names
# class_names = ["bowl_empty", "bowl_full", "bowl_half"]

# Collect all test images and labels
test_dir = f"{DATASET_PATH}/test"
y_true = []
y_pred = []

for idx, class_name in enumerate(CLASS_NAMES):
    class_folder = os.path.join(test_dir, class_name)
    images = [img for img in os.listdir(class_folder) if img.lower().endswith(('.jpg', '.png'))]
    for img_name in images:
        img_path = os.path.join(class_folder, img_name)
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Could not read {img_path}")
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        input_tensor = preprocess(img).unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_tensor)
        _, predicted = torch.max(output, 1)
        y_true.append(idx)
        y_pred.append(predicted.item())

# KPIs
print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

Using device: cuda

--- Model Evaluation ---
Accuracy: 0.9750

Confusion Matrix:
[[ 6  0  1]
 [ 0 16  0]
 [ 0  0 17]]

Classification Report:
              precision    recall  f1-score   support

  bowl_empty       1.00      0.86      0.92         7
   bowl_full       1.00      1.00      1.00        16
   bowl_half       0.94      1.00      0.97        17

    accuracy                           0.97        40
   macro avg       0.98      0.95      0.96        40
weighted avg       0.98      0.97      0.97        40



In [51]:
######################
#### INFERENCE 
######################

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, models
import cv2
from PIL import Image
import os
import random

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the trained MobileNetV3 Large model
# model = models.mobilenet_v3_large(weights=None)
# model.classifier[3] = nn.Linear(model.classifier[3].in_features, 3)  # 3 classes
model = models.mobilenet_v2(weights=None)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(CLASS_NAMES))  # 3 output classes
model.load_state_dict(torch.load(f"{MODEL_NAME}.pth", map_location=device))
model = model.to(device)  # Move model to the same device
model.eval()

# Preprocess for MobileNetV3 Large
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Class names
# class_names = ["bowl_empty", "bowl_full", "bowl_half" ]

# Load and preprocess an image of a bowl
# bowl_img = cv2.imread("../datasets/mobilenet_bowl/train/bowl_half/2d20ca1b-PXL_20250703_095609216_0.jpg")  # NumPy array
bowl_folder = f"{DATASET_PATH}/test/bowl_full"
bowl_images = [img for img in os.listdir(bowl_folder) if img.lower().endswith(('.jpg', '.png'))]
random_img = random.choice(bowl_images)
bowl_img = cv2.imread(os.path.join(bowl_folder, random_img))  # NumPy array
# bowl_img = cv2.imread("../datasets/mobilenet_bowl/train/bowl_empty/0e37f9f6-PXL_20250628_155019197_0.jpg")  # NumPy array

bowl_img = cv2.cvtColor(bowl_img, cv2.COLOR_BGR2RGB)

bowl_img = Image.fromarray(bowl_img)  # Convert NumPy array to PIL Image
bowl_img = preprocess(bowl_img).unsqueeze(0).to(device)  # Move input to the same device

# Classify the bowl
with torch.no_grad():
    output = model(bowl_img)
_, predicted = torch.max(output, 1)

print(f"Predicted: {CLASS_NAMES[predicted.item()]}")


Using device: cuda
Predicted: bowl_full


In [52]:
######################
#### EXPORT TO ONNX
######################
import torch
from torchvision import models
import torch.nn as nn

device = torch.device("cpu")   # force CPU for export

dummy_input = torch.randn(1, 3, 224, 224)

# Load the trained MobileNetV3 Large model
# model = models.mobilenet_v3_large(weights=None)
# model.classifier[3] = nn.Linear(model.classifier[3].in_features, 3)  # 3 classes
model = models.mobilenet_v2(weights=None)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 3)  # 3 output classes
ckpt = torch.load(f"{MODEL_NAME}.pth", map_location=device)
model.load_state_dict(ckpt)
model = model.to(device)  # Move model to the same device
model.eval()

# def patch_model(m: nn.Module):
#     # Replace Hardswish/Hardsigmoid with export-friendly forms if present
#     for name, child in list(m.named_children()):
#         if isinstance(child, nn.Hardswish):
#             setattr(m, name, nn.Sequential())  # identity; MobileNetV3 uses inplace hardswish in blocks
#         elif isinstance(child, nn.Hardsigmoid):
#             setattr(m, name, nn.Sigmoid())     # workable approximation for many exporters
#         elif isinstance(child, nn.Conv2d) and getattr(child, "padding_mode", "zeros") != "zeros":
#             # Force zero-pad mode
#             child.padding_mode = "zeros"
#         # If any Conv2d used padding='same', rewrite to explicit ZeroPad2d + Conv2d
#         if isinstance(child, nn.Conv2d) and isinstance(child.padding, str):  # 'same' or 'valid'
#             kH, kW = child.kernel_size if isinstance(child.kernel_size, tuple) else (child.kernel_size, child.kernel_size)
#             sH, sW = child.stride if isinstance(child.stride, tuple) else (child.stride, child.stride)
#             # compute SAME padding explicitly
#             pad_h = max((sH - 1), 0) + max(kH - sH, 0)//2
#             pad_w = max((sW - 1), 0) + max(kW - sW, 0)//2
#             pad = nn.ZeroPad2d((pad_w, pad_w, pad_h, pad_h))
#             new_conv = nn.Conv2d(child.in_channels, child.out_channels, (kH, kW),
#                                  stride=(sH, sW), padding=0, dilation=child.dilation,
#                                  groups=child.groups, bias=(child.bias is not None))
#             new_conv.load_state_dict(child.state_dict(), strict=False)
#             setattr(m, name, nn.Sequential(pad, new_conv))
#         else:
#             patch_model(child)

# def patch_convs(m: nn.Module):
#     for name, child in m.named_children():
#         if isinstance(child, nn.Conv2d):
#             if isinstance(child.padding, str):  # 'same' or 'valid'
#                 # replace with explicit ZeroPad2d + Conv2d
#                 kH, kW = child.kernel_size
#                 sH, sW = child.stride
#                 pad_h = max((sH - 1), 0) + max(kH - sH, 0)//2
#                 pad_w = max((sW - 1), 0) + max(kW - sW, 0)//2
#                 pad = nn.ZeroPad2d((pad_w, pad_w, pad_h, pad_h))
#                 new_conv = nn.Conv2d(child.in_channels, child.out_channels,
#                                      (kH, kW), stride=(sH, sW), padding=0,
#                                      dilation=child.dilation, groups=child.groups,
#                                      bias=(child.bias is not None))
#                 new_conv.load_state_dict(child.state_dict(), strict=False)
#                 setattr(m, name, nn.Sequential(pad, new_conv))
#         else:
#             patch_convs(child)

# patch_model(model)
# patch_convs(model)

torch.onnx.export(
    model,
    dummy_input,
    f"{MODEL_NAME}.onnx",
    input_names=["input"],
    output_names=["output"],
    # dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
    opset_version=17,   # check AxeleraAI docs for required opset
    do_constant_folding=True,
    # dynamo=False
)


In [53]:
######################
#### CHECK ONNX FORMAT
######################
import onnx

onnx_model = onnx.load(f"{MODEL_NAME}.onnx")
onnx.checker.check_model(onnx_model)
print("ONNX model is valid!")

ONNX model is valid!


In [54]:
######################
#### CHECK THE ONNX OP
######################
import onnx

model = onnx.load(f"{MODEL_NAME}.onnx")
for node in model.graph.node:
    print(node.op_type)

Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Conv
Constant
Constant
Clip
Conv
Constant
Constant
Clip
Conv
Add
Conv
Constant
Constant
Clip
Conv
Constant
Constan