## Training using MobileNetV2

In [2]:
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
# os.environ['TORCH_USE_CUDA_DSA'] = "1"

In [3]:
import os
import torch
from PIL import Image
import numpy as np
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn, optim

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
torch.__version__

'2.9.1+cu126'

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
# models.resnet152(weights='IMAGENET1K_V2')
# models.convnext_small(weights='IMAGENET1K_V1')
# models.efficientnet_v2_s(weights='IMAGENET1K_V1')
# models.mobilenet_v2(weights='IMAGENET1K_V2')

In [8]:
class FoodDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i,cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    
    def __len__(self):
        return len(self.image_paths)

    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [9]:
input_size = 224

# ImageNet Normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((232, 232)),
    transforms.RandomRotation(15),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [10]:
train_dataset = FoodDataset(
    data_dir='./indfood-data/train',
    transform=train_transforms
)

val_dataset = FoodDataset(
    data_dir='./indfood-data/val',
    transform=val_transforms
)


In [11]:
total_train_len = len(train_dataset)
total_val_len = len(val_dataset)

# Define the length of the smaller dataset you want to use
subset_train_len = 5000
subset_val_len = 800

# Define the lengths for the split: [desired_length, remaining_length]
train_lengths = [subset_train_len, total_train_len - subset_train_len]
val_lengths = [subset_val_len, total_val_len - subset_val_len]

# Randomly split the original dataset into two new datasets, You get a list of datasets; take the first one [0]
smaller_train_dataset = random_split(train_dataset, train_lengths)[0]
smaller_val_dataset = random_split(val_dataset, val_lengths)[0]


In [12]:
train_loader = DataLoader(smaller_train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(smaller_val_dataset, batch_size=32, shuffle=False)

In [13]:
len(smaller_train_dataset), len(smaller_val_dataset)

(5000, 800)

In [14]:
classes = list(train_dataset.class_to_idx)
classes

['aloo_gobi',
 'aloo_matar',
 'aloo_methi',
 'aloo_paratha',
 'aloo_shimla_mirch',
 'aloo_tikki',
 'amritsari_kulcha',
 'anda_curry',
 'ariselu',
 'balushahi',
 'banana_chips',
 'bandar_laddu',
 'basundi',
 'besan_laddu',
 'bhindi_masala',
 'biryani',
 'boondi',
 'boondi_laddu',
 'butter_chicken',
 'chaas',
 'chak_hao_kheer',
 'cham_cham',
 'chana_masala',
 'chapati',
 'chicken_pizza',
 'chicken_razala',
 'chicken_tikka',
 'chicken_tikka_masala',
 'chicken_wings',
 'chikki',
 'chivda',
 'chole_bhature',
 'daal_baati_churma',
 'daal_puri',
 'dabeli',
 'dal_khichdi',
 'dal_makhani',
 'dal_tadka',
 'dharwad_pedha',
 'dhokla',
 'double_ka_meetha',
 'dum_aloo',
 'falooda',
 'fish_curry',
 'gajar_ka_halwa',
 'garlic_bread',
 'gavvalu',
 'ghevar',
 'grilled_sandwich',
 'gujhia',
 'gulab_jamun',
 'hara_bhara_kabab',
 'idiyappam',
 'idli',
 'imarti',
 'jalebi',
 'kachori',
 'kadai_paneer',
 'kadhi_pakoda',
 'kaju_katli',
 'kakinada_khaja',
 'kalakand',
 'karela_bharta',
 'khakhra',
 'kheer',
 '

In [15]:
num_classes = len(classes)
num_classes

131

In [39]:
class FoodClassifierMobileNet(nn.Module):
    def __init__(self, num_classes=131):
        super(FoodClassifierMobileNet, self).__init__()

        # load pre-trained mobilenet_v2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V2')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        self.output_layer = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.output_layer(x)
        return x
        

In [40]:
model = FoodClassifierMobileNet(num_classes=131)
model.to(device)

FoodClassifierMobileNet(
  (base_model): MobileNetV2(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 96,

In [18]:
criterion = nn.CrossEntropyLoss()

In [19]:
# tuning the learning rate
def make_model(learning_rate=0.01):
    model = FoodClassifierMobileNet(num_classes=131)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [20]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'food_mobilenet_v12_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

### Tuning the Learning Rate

In [44]:
num_epochs = 10
for lr in [0.1, 0.01, 0.001, 0.0001]:
    print("learning rate =", lr)
    model, optimizer = make_model(lr)
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()
    print()

learning rate = 0.1
Epoch 1/10
  Train Loss: 15.6091, Train Acc: 0.3356
  Val Loss: 12.1315, Val Acc: 0.4238
Checkpoint saved: food_mobilenet_v11_01_0.424.pth
Epoch 2/10
  Train Loss: 10.0580, Train Acc: 0.4986
  Val Loss: 12.7408, Val Acc: 0.4662
Checkpoint saved: food_mobilenet_v11_02_0.466.pth
Epoch 3/10
  Train Loss: 8.7136, Train Acc: 0.5838
  Val Loss: 15.1680, Val Acc: 0.4550
Epoch 4/10
  Train Loss: 8.0598, Train Acc: 0.6072
  Val Loss: 17.3433, Val Acc: 0.4575
Epoch 5/10
  Train Loss: 7.7045, Train Acc: 0.6482
  Val Loss: 17.6519, Val Acc: 0.5025
Checkpoint saved: food_mobilenet_v11_05_0.502.pth
Epoch 6/10
  Train Loss: 7.1766, Train Acc: 0.6606
  Val Loss: 20.0583, Val Acc: 0.4537
Epoch 7/10
  Train Loss: 7.9855, Train Acc: 0.6716
  Val Loss: 19.1760, Val Acc: 0.5175
Checkpoint saved: food_mobilenet_v11_07_0.517.pth
Epoch 8/10
  Train Loss: 6.6399, Train Acc: 0.7036
  Val Loss: 21.0723, Val Acc: 0.5062
Epoch 9/10
  Train Loss: 7.1124, Train Acc: 0.7024
  Val Loss: 19.7149, Va

In [46]:
# best lr : 0.001

### Tuning the Number of Inner Layers to be added

In [21]:
class FoodClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, num_classes=131):
        super(FoodClassifierMobileNet, self).__init__()

        # load pre-trained mobilenet_v2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V2')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        # add inner layers
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.output_layer(x)
        return x


def make_model(
        learning_rate=0.01,
        size_inner=100
):
    model = FoodClassifierMobileNet(
        num_classes=131,
        size_inner=size_inner
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [22]:
num_epochs = 10
for size_inner in [1000, 500, 200, 100]:
    print(f"size_inner : {size_inner}")
    model, optimizer = make_model(
        learning_rate=0.001,
        size_inner=size_inner
    )
    
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()

size_inner : 1000
Epoch 1/10
  Train Loss: 2.8275, Train Acc: 0.3644
  Val Loss: 2.1877, Val Acc: 0.4450
Checkpoint saved: food_mobilenet_v12_01_0.445.pth
Epoch 2/10
  Train Loss: 1.7432, Train Acc: 0.5574
  Val Loss: 1.7905, Val Acc: 0.5325
Checkpoint saved: food_mobilenet_v12_02_0.532.pth
Epoch 3/10
  Train Loss: 1.4374, Train Acc: 0.6176
  Val Loss: 1.6958, Val Acc: 0.5563
Checkpoint saved: food_mobilenet_v12_03_0.556.pth
Epoch 4/10
  Train Loss: 1.2261, Train Acc: 0.6646
  Val Loss: 1.6512, Val Acc: 0.5713
Checkpoint saved: food_mobilenet_v12_04_0.571.pth
Epoch 5/10
  Train Loss: 1.0648, Train Acc: 0.7038
  Val Loss: 1.5784, Val Acc: 0.5763
Checkpoint saved: food_mobilenet_v12_05_0.576.pth
Epoch 6/10
  Train Loss: 0.9597, Train Acc: 0.7276
  Val Loss: 1.6305, Val Acc: 0.5700
Epoch 7/10
  Train Loss: 0.8676, Train Acc: 0.7518
  Val Loss: 1.5895, Val Acc: 0.5737
Epoch 8/10
  Train Loss: 0.7869, Train Acc: 0.7716
  Val Loss: 1.6780, Val Acc: 0.5700
Epoch 9/10
  Train Loss: 0.7210, Tra

In [23]:
# best inner layers size : 1000

### Tuning the Drop Rate

In [26]:
class FoodClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, droprate=0.2, num_classes=131):
        super(FoodClassifierMobileNet, self).__init__()

        # load pre-trained mobilenet_v2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V2')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        # add inner layers
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        # add dropout
        self.dropout = nn.Dropout(droprate)
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)  # apply dropout
        x = self.output_layer(x)
        return x


def make_model(
        learning_rate=0.01,
        size_inner=1000,
        droprate=0.2
):
    model = FoodClassifierMobileNet(
        num_classes=131,
        size_inner=size_inner,
        droprate=droprate
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [27]:
num_epochs = 50
for drop_rate in [0.1, 0.2, 0.3, 0.4]:
    print(f"drop_rate : {drop_rate}")
    model, optimizer = make_model(
        learning_rate=0.001,
        size_inner=1000,
        droprate=drop_rate
    )
    
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()

drop_rate : 0.1
Epoch 1/50
  Train Loss: 2.8504, Train Acc: 0.3514
  Val Loss: 2.2383, Val Acc: 0.4363
Checkpoint saved: food_mobilenet_v12_01_0.436.pth
Epoch 2/50
  Train Loss: 1.7720, Train Acc: 0.5558
  Val Loss: 1.8477, Val Acc: 0.5363
Checkpoint saved: food_mobilenet_v12_02_0.536.pth
Epoch 3/50
  Train Loss: 1.4165, Train Acc: 0.6214
  Val Loss: 1.7015, Val Acc: 0.5613
Checkpoint saved: food_mobilenet_v12_03_0.561.pth
Epoch 4/50
  Train Loss: 1.2411, Train Acc: 0.6568
  Val Loss: 1.6817, Val Acc: 0.5413
Epoch 5/50
  Train Loss: 1.0813, Train Acc: 0.6900
  Val Loss: 1.6409, Val Acc: 0.5613
Epoch 6/50
  Train Loss: 0.9880, Train Acc: 0.7160
  Val Loss: 1.5979, Val Acc: 0.5962
Checkpoint saved: food_mobilenet_v12_06_0.596.pth
Epoch 7/50
  Train Loss: 0.8865, Train Acc: 0.7384
  Val Loss: 1.5938, Val Acc: 0.5988
Checkpoint saved: food_mobilenet_v12_07_0.599.pth
Epoch 8/50
  Train Loss: 0.8206, Train Acc: 0.7640
  Val Loss: 1.6362, Val Acc: 0.5850
Epoch 9/50
  Train Loss: 0.7505, Train

In [28]:
# best drop rate : 0.4

In [None]:
# num_epochs = 50
# model, optimizer = make_model(
#     learning_rate=0.001,
#     size_inner=1000,
#     droprate=0.4
# )

# train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

In [None]:
# from torchsummary import summary
# summary(model, input_size=(3, 224, 224))

In [None]:
# ## Exporting to ONNX for serverles model deployment
# dummy_input = torch.randn(1, 3, 224, 224).to(device)

# # Export to ONNX
# onnx_path = "food_classifier_mobilenet_v2.onnx"

# torch.onnx.export(
#     model,
#     dummy_input,
#     onnx_path,
#     verbose=True,
#     input_names=['input'],
#     output_names=['output'],
#     dynamic_axes={
#         'input': {0: 'batch_size'},
#         'output': {0: 'batch_size'}
#     },
# )

In [None]:
# ## load model from a checkpoint
# path = './clothing_v4_23_0.830.pth'
# model = ClothingClassifierMobileNet(size_inner=100, droprate=0.2, num_classes=10)
# model.load_state_dict(torch.load(path))
# model.to(device)
# model.eval();

In [None]:
# x = val_transforms(img)
# batch_t = torch.unsqueeze(x, 0).to(device)

# with torch.no_grad():
#     output = model(batch_t)

In [None]:
# dict(zip(classes, output[0].to('cpu')))