## Training using EfficientNet-V2-Small

In [1]:
import os
import torch
from PIL import Image
import numpy as np
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn, optim

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
# os.environ['TORCH_USE_CUDA_DSA'] = "1"

In [3]:
torch.__version__

'2.9.1+cu126'

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
# models.resnet152(weights='IMAGENET1K_V2')
# models.convnext_small(weights='IMAGENET1K_V1')
# models.efficientnet_v2_s(weights='IMAGENET1K_V1')
# models.mobilenet_v2(weights='IMAGENET1K_V2')

In [6]:
class FoodDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i,cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    
    def __len__(self):
        return len(self.image_paths)

    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
input_size = 384

# ImageNet Normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomRotation(15),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(384, scale=(0.0, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [8]:
train_dataset = FoodDataset(
    data_dir='./indfood-data/train',
    transform=train_transforms
)

val_dataset = FoodDataset(
    data_dir='./indfood-data/val',
    transform=val_transforms
)


In [9]:
total_train_len = len(train_dataset)
total_val_len = len(val_dataset)

# Define the length of the smaller dataset you want to use
subset_train_len = 6000
subset_val_len = 1500

# Define the lengths for the split: [desired_length, remaining_length]
train_lengths = [subset_train_len, total_train_len - subset_train_len]
val_lengths = [subset_val_len, total_val_len - subset_val_len]

# Randomly split the original dataset into two new datasets, You get a list of datasets; take the first one [0]
smaller_train_dataset = random_split(train_dataset, train_lengths)[0]
smaller_val_dataset = random_split(val_dataset, val_lengths)[0]


In [10]:
train_loader = DataLoader(smaller_train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(smaller_val_dataset, batch_size=32, shuffle=False)

In [11]:
len(train_dataset), len(val_dataset)

(114965, 20370)

In [12]:
len(smaller_train_dataset), len(smaller_val_dataset)

(6000, 1500)

In [13]:
classes = list(train_dataset.class_to_idx)
classes

['aloo_gobi',
 'aloo_matar',
 'aloo_methi',
 'aloo_paratha',
 'aloo_shimla_mirch',
 'aloo_tikki',
 'amritsari_kulcha',
 'anda_curry',
 'ariselu',
 'balushahi',
 'banana_chips',
 'bandar_laddu',
 'basundi',
 'besan_laddu',
 'bhindi_masala',
 'biryani',
 'boondi',
 'boondi_laddu',
 'butter_chicken',
 'chaas',
 'chak_hao_kheer',
 'cham_cham',
 'chana_masala',
 'chapati',
 'chicken_pizza',
 'chicken_razala',
 'chicken_tikka',
 'chicken_tikka_masala',
 'chicken_wings',
 'chikki',
 'chivda',
 'chole_bhature',
 'daal_baati_churma',
 'daal_puri',
 'dabeli',
 'dal_khichdi',
 'dal_makhani',
 'dal_tadka',
 'dharwad_pedha',
 'dhokla',
 'double_ka_meetha',
 'dum_aloo',
 'falooda',
 'fish_curry',
 'gajar_ka_halwa',
 'garlic_bread',
 'gavvalu',
 'ghevar',
 'grilled_sandwich',
 'gujhia',
 'gulab_jamun',
 'hara_bhara_kabab',
 'idiyappam',
 'idli',
 'imarti',
 'jalebi',
 'kachori',
 'kadai_paneer',
 'kadhi_pakoda',
 'kaju_katli',
 'kakinada_khaja',
 'kalakand',
 'karela_bharta',
 'khakhra',
 'kheer',
 '

In [14]:
num_classes = len(classes)
num_classes

131

In [25]:
class FoodClassifierEffNet(nn.Module):
    def __init__(self, num_classes=131):
        super(FoodClassifierEffNet, self).__init__()

        # load pre-trained eff_net
        self.base_model = models.efficientnet_v2_s(weights='IMAGENET1K_V1')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        self.output_layer = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.output_layer(x)
        return x
        

In [26]:
model = FoodClassifierEffNet(num_classes=131)
model.to(device)

FoodClassifierEffNet(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): 

In [27]:
criterion = nn.CrossEntropyLoss()

In [28]:
# tuning the learning rate
def make_model(learning_rate=0.01):
    model = FoodClassifierEffNet(num_classes=131)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [39]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'food_effnet_v23_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

### Tuning the Learning Rate

In [30]:
num_epochs = 10
for lr in [0.1, 0.01, 0.001, 0.0001]:
    print("learning rate =", lr)
    model, optimizer = make_model(lr)
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()
    print()

learning rate = 0.1
Epoch 1/10
  Train Loss: 14.8507, Train Acc: 0.3152
  Val Loss: 10.8857, Val Acc: 0.4593
Checkpoint saved: food_effnet_v21_01_0.459.pth
Epoch 2/10
  Train Loss: 14.5278, Train Acc: 0.4203
  Val Loss: 12.2458, Val Acc: 0.4980
Checkpoint saved: food_effnet_v21_02_0.498.pth
Epoch 3/10
  Train Loss: 14.6965, Train Acc: 0.4550
  Val Loss: 12.4444, Val Acc: 0.5300
Checkpoint saved: food_effnet_v21_03_0.530.pth
Epoch 4/10
  Train Loss: 15.1088, Train Acc: 0.4860
  Val Loss: 13.9250, Val Acc: 0.5320
Checkpoint saved: food_effnet_v21_04_0.532.pth
Epoch 5/10
  Train Loss: 15.2407, Train Acc: 0.5065
  Val Loss: 13.1603, Val Acc: 0.5613
Checkpoint saved: food_effnet_v21_05_0.561.pth
Epoch 6/10
  Train Loss: 14.6913, Train Acc: 0.5232
  Val Loss: 14.9979, Val Acc: 0.5293
Epoch 7/10
  Train Loss: 15.2218, Train Acc: 0.5287
  Val Loss: 14.2781, Val Acc: 0.5567
Epoch 8/10
  Train Loss: 14.8524, Train Acc: 0.5530
  Val Loss: 15.6292, Val Acc: 0.5633
Checkpoint saved: food_effnet_v21

In [46]:
# best lr : 0.001

### Tuning the Number of Inner Layers to be added

In [40]:
class FoodClassifierEffNet(nn.Module):
    def __init__(self, size_inner=100, num_classes=131):
        super(FoodClassifierEffNet, self).__init__()

        # load pre-trained effnet
        self.base_model = models.efficientnet_v2_s(weights='IMAGENET1K_V1')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        # add inner layers
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.output_layer(x)
        return x


def make_model(
        learning_rate=0.001,
        size_inner=100
):
    model = FoodClassifierEffNet(
        num_classes=131,
        size_inner=size_inner
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [38]:
num_epochs = 10
for size_inner in [1000, 500, 200, 100]:
    print(f"size_inner : {size_inner}")
    model, optimizer = make_model(
        learning_rate=0.001,
        size_inner=size_inner
    )
    
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()

size_inner : 1000
Epoch 1/10
  Train Loss: 2.8462, Train Acc: 0.3553
  Val Loss: 1.8857, Val Acc: 0.5453
Checkpoint saved: food_effnet_v22_01_0.545.pth
Epoch 2/10
  Train Loss: 1.9414, Train Acc: 0.5120
  Val Loss: 1.5681, Val Acc: 0.6080
Checkpoint saved: food_effnet_v22_02_0.608.pth
Epoch 3/10
  Train Loss: 1.7070, Train Acc: 0.5578
  Val Loss: 1.4477, Val Acc: 0.6260
Checkpoint saved: food_effnet_v22_03_0.626.pth
Epoch 4/10
  Train Loss: 1.6109, Train Acc: 0.5755
  Val Loss: 1.4325, Val Acc: 0.6100
Epoch 5/10
  Train Loss: 1.5295, Train Acc: 0.5890
  Val Loss: 1.4073, Val Acc: 0.6340
Checkpoint saved: food_effnet_v22_05_0.634.pth
Epoch 6/10
  Train Loss: 1.4479, Train Acc: 0.6077
  Val Loss: 1.3562, Val Acc: 0.6547
Checkpoint saved: food_effnet_v22_06_0.655.pth
Epoch 7/10
  Train Loss: 1.3825, Train Acc: 0.6270
  Val Loss: 1.3834, Val Acc: 0.6447
Epoch 8/10
  Train Loss: 1.3320, Train Acc: 0.6313
  Val Loss: 1.4052, Val Acc: 0.6473
Epoch 9/10
  Train Loss: 1.2942, Train Acc: 0.6477


In [None]:
# best inner layers size : 500

### Tuning the Drop Rate

In [41]:
class FoodClassifierEffNet(nn.Module):
    def __init__(self, size_inner=500, droprate=0.2, num_classes=131):
        super(FoodClassifierEffNet, self).__init__()

        # load pre-trained eff_net
        self.base_model = models.efficientnet_v2_s(weights='IMAGENET1K_V1')

        # freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # remove original classifier
        self.base_model.classifier = nn.Identity()

        # add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1,1))
        # add inner layers
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        # add dropout
        self.dropout = nn.Dropout(droprate)
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)  # apply dropout
        x = self.output_layer(x)
        return x


def make_model(
        learning_rate=0.01,
        size_inner=500,
        droprate=0.2
):
    model = FoodClassifierEffNet(
        num_classes=131,
        size_inner=size_inner,
        droprate=droprate
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer


In [42]:
num_epochs = 40
for drop_rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
    print(f"drop_rate : {drop_rate}")
    model, optimizer = make_model(
        learning_rate=0.001,
        size_inner=500,
        droprate=drop_rate
    )
    
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)
    print()

drop_rate : 0.1
Epoch 1/40
  Train Loss: 3.0088, Train Acc: 0.3297
  Val Loss: 2.0578, Val Acc: 0.5213
Checkpoint saved: food_effnet_v23_01_0.521.pth
Epoch 2/40
  Train Loss: 2.0867, Train Acc: 0.4882
  Val Loss: 1.6176, Val Acc: 0.6060
Checkpoint saved: food_effnet_v23_02_0.606.pth
Epoch 3/40
  Train Loss: 1.8119, Train Acc: 0.5423
  Val Loss: 1.4795, Val Acc: 0.6293
Checkpoint saved: food_effnet_v23_03_0.629.pth
Epoch 4/40
  Train Loss: 1.7139, Train Acc: 0.5578
  Val Loss: 1.3892, Val Acc: 0.6513
Checkpoint saved: food_effnet_v23_04_0.651.pth
Epoch 5/40
  Train Loss: 1.5703, Train Acc: 0.5853
  Val Loss: 1.3855, Val Acc: 0.6407
Epoch 6/40
  Train Loss: 1.5266, Train Acc: 0.5930
  Val Loss: 1.3304, Val Acc: 0.6607
Checkpoint saved: food_effnet_v23_06_0.661.pth
Epoch 7/40
  Train Loss: 1.4455, Train Acc: 0.6098
  Val Loss: 1.3359, Val Acc: 0.6507
Epoch 8/40
  Train Loss: 1.3987, Train Acc: 0.6202
  Val Loss: 1.2725, Val Acc: 0.6747
Checkpoint saved: food_effnet_v23_08_0.675.pth
Epoch 

In [43]:
# best drop rate : 0.3

In [None]:
### Final training on best hyperparameters
# num_epochs = 50
# model, optimizer = make_model(
#     learning_rate=0.001,
#     size_inner=500,
#     droprate=0.3
# )

# train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)


# ## Exporting to ONNX for serverles model deployment
# dummy_input = torch.randn(1, 3, 224, 224).to(device)

# # Export to ONNX
# onnx_path = "food_classifier_mobilenet_v2.onnx"

# torch.onnx.export(
#     model,
#     dummy_input,
#     onnx_path,
#     verbose=True,
#     input_names=['input'],
#     output_names=['output'],
#     dynamic_axes={
#         'input': {0: 'batch_size'},
#         'output': {0: 'batch_size'}
#     },
# )

In [None]:
# from torchsummary import summary
# summary(model, input_size=(3, 224, 224))

In [None]:
# ## load model from a checkpoint
# path = './clothing_v4_23_0.830.pth'
# model = FoodClassifierEffNet(size_inner=100, droprate=0.2, num_classes=131)
# model.load_state_dict(torch.load(path))
# model.to(device)
# model.eval();

In [None]:
# img

In [None]:
# x = val_transforms(img)
# batch_t = torch.unsqueeze(x, 0).to(device)

# with torch.no_grad():
#     output = model(batch_t)

In [None]:
# dict(zip(classes, output[0].to('cpu')))