In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torchvision.transforms.v2 as transforms
from PIL import Image
from tqdm import tqdm

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.backends.cudnn.benchmark = True
print(device)

cuda


In [None]:
class FruitVegDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_to_idx = {}

        print(f"Initializing dataset from directory: {root_dir}")
        if not os.path.exists(root_dir):
            print(f"ERROR: Directory {root_dir} does not exist")
            return

        class_folders = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        print(f"Found {len(class_folders)} class folders: {class_folders}")

        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(sorted(class_folders))}

        for class_name in class_folders:
            class_dir = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]

            for img_name in os.listdir(class_dir):
                if img_name.endswith('.jpg') or img_name.endswith('.jpeg') or img_name.endswith('.png'):
                    img_path = os.path.join(class_dir, img_name)
                    self.images.append(img_path)
                    self.labels.append(class_idx)

        self.idx_to_class = {idx: class_name for class_name, idx in self.class_to_idx.items()}
        print(f"Dataset created with {len(self.images)} images")
        print(f"Found {len(self.class_to_idx)} classes: {', '.join(self.class_to_idx.keys())}")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]

        try:
            img = Image.open(img_path).convert('RGB')

            if self.transform:
                img = self.transform(img)

            return img, label

        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return label

In [None]:
class FruitVegClassifier(nn.Module):
    def __init__(self, num_classes):
        super(FruitVegClassifier, self).__init__()

        self.backbone = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)

        if hasattr(self.backbone, 'fc'):
            num_ftrs = self.backbone.fc.in_features
            self.backbone.fc = nn.Sequential(
                nn.Linear(num_ftrs, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(0.3),
                nn.Linear(512, num_classes)
            )
        elif hasattr(self.backbone, 'classifier'):
            num_ftrs = self.backbone.classifier[1].in_features
            self.backbone.classifier = nn.Sequential(
                nn.Dropout(p=0.3, inplace=True),
                nn.Linear(num_ftrs, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(0.3),
                nn.Linear(512, num_classes)
            )
        self.extra_compute = False

    def forward(self, x):
        return self.backbone(x)

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer,
                         num_epochs=10, device='cuda', patience=5,
                         checkpoint_path='best_model.pth'):
    best_val_accuracy = 0.0
    no_improve_epochs = 0

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=2
    )

    scaler = torch.amp.GradScaler() if device == 'cuda' else None

    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0

        print(f"VRAM Usage: {torch.cuda.memory_allocated() / 1e9:.2f} GB / {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad(set_to_none=True)

            # Most of the below were just forced configurations/computations
            # to force some workload on my GPU (suggested online)
            if scaler:
                with torch.amp.autocast():
                    inputs = inputs + 0.001 * torch.sin(inputs * 10) 

                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                scaler.scale(loss).backward()
                torch.cuda.synchronize()
                scaler.step(optimizer)
                scaler.update()
            else:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            running_loss += loss.item() * inputs.size(0)

            if torch.cuda.is_available() and torch.cuda.memory_allocated() > 5e9:  # 5 GB threshold
                torch.cuda.empty_cache()

        epoch_loss = running_loss / len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = correct / total
        avg_val_loss = val_loss / len(val_loader.dataset)

        scheduler.step(val_accuracy)

        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {epoch_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
        print(f"  Val Accuracy: {val_accuracy:.4f}")

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            no_improve_epochs = 0

            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_accuracy': val_accuracy,
                'class_mapping': train_dataset.class_to_idx
            }, checkpoint_path)

            print(f"  Saved new best model with accuracy: {val_accuracy:.4f}")
        else:
            no_improve_epochs += 1
            print(f"  No improvement for {no_improve_epochs} epochs. Best accuracy: {best_val_accuracy:.4f}")

        if no_improve_epochs >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    return model

In [None]:
def main():
    train_dir = 'Train'
    val_dir = 'val' 

    # Data transforms
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToImage(),
        transforms.ToDtype(torch.float32, scale=True),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToImage(),
        transforms.ToDtype(torch.float32, scale=True),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    global train_dataset
    train_dataset = FruitVegDataset(
        root_dir=train_dir,
        transform=train_transform
    )

    val_dataset = FruitVegDataset(
        root_dir=val_dir,
        transform=val_transform
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        prefetch_factor=2
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )

    # Create model
    num_classes = len(train_dataset.class_to_idx)
    model = FruitVegClassifier(num_classes)

    # Initially added this as True to try to "force" my system's 
    # GPU to get to work, but that didn't work.
    model.extra_compute = False

    model = model.to(device)


    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
    # ^ One of many attempts of optimizing the training to get faster speeds
    # as our systems weren't properly utilizing our GPUs for some reason.

    trained_model = train_model(
        model,
        train_loader,
        val_loader,
        criterion,
        optimizer,
        num_epochs=20,
        device=device,
        patience=5,
        checkpoint_path='ResNet34.pth'
    )

    # Model renamed in post for clarity
    print("Model training complete and saved to ResNet32.pth")

In [8]:
if __name__ == "__main__":
    main()

Initializing dataset from directory: Train
Found 51 class folders: ['Onion', 'Cabbage', 'Pumpkin', 'Potato', 'Corn', 'Garlic', 'Dragon_fruit', 'Raddish', 'Amaranth', 'Bottle Gourd', 'Watermelon', 'Beetroot', 'Tomato', 'Eggplant', 'Paprika', 'Bell pepper', 'Bitter Gourd', 'Ridge Gourd', 'Jalepeno', 'Lemon', 'Cantaloupe', 'Strawberry', 'Mango', 'Spiny Gourd', 'Apple', 'Cauliflower', 'Okra', 'Kiwi', 'Banana', 'Blueberry', 'Sweetcorn', 'Pineapple', 'Peas', 'Grapes', 'Orange', 'Ginger', 'Sweetpotato', 'Turnip', 'Capsicum', 'Raspberry', 'Pomegranate', 'Soy beans', 'Fig', 'Spinach', 'Broccoli', 'Coconut', 'Cucumber', 'Pear', 'Carrot', 'Sponge Gourd', 'Chilli pepper']
Dataset created with 3990 images
Found 51 classes: Amaranth, Apple, Banana, Beetroot, Bell pepper, Bitter Gourd, Blueberry, Bottle Gourd, Broccoli, Cabbage, Cantaloupe, Capsicum, Carrot, Cauliflower, Chilli pepper, Coconut, Corn, Cucumber, Dragon_fruit, Eggplant, Fig, Garlic, Ginger, Grapes, Jalepeno, Kiwi, Lemon, Mango, Okra, On

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 186MB/s]


VRAM Usage: 0.09 GB / 15.83 GB


Epoch 1/20 - Training: 100%|██████████| 125/125 [02:05<00:00,  1.00s/it]
Epoch 1/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.08s/it]


Epoch 1/20:
  Train Loss: 3.5038, Val Loss: 3.2553
  Val Accuracy: 0.0909
  Saved new best model with accuracy: 0.0909
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 2/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.03it/s]
Epoch 2/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.05s/it]


Epoch 2/20:
  Train Loss: 3.2356, Val Loss: 3.0563
  Val Accuracy: 0.1636
  Saved new best model with accuracy: 0.1636
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 3/20 - Training: 100%|██████████| 125/125 [02:01<00:00,  1.03it/s]
Epoch 3/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.00it/s]


Epoch 3/20:
  Train Loss: 3.1543, Val Loss: 2.9442
  Val Accuracy: 0.1673
  Saved new best model with accuracy: 0.1673
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 4/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 4/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.07it/s]


Epoch 4/20:
  Train Loss: 3.1124, Val Loss: 2.9424
  Val Accuracy: 0.1636
  No improvement for 1 epochs. Best accuracy: 0.1673
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 5/20 - Training: 100%|██████████| 125/125 [02:01<00:00,  1.03it/s]
Epoch 5/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.11it/s]


Epoch 5/20:
  Train Loss: 2.9931, Val Loss: 3.0609
  Val Accuracy: 0.1564
  No improvement for 2 epochs. Best accuracy: 0.1673
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 6/20 - Training: 100%|██████████| 125/125 [02:01<00:00,  1.03it/s]
Epoch 6/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.11it/s]


Epoch 6/20:
  Train Loss: 2.8951, Val Loss: 2.7499
  Val Accuracy: 0.1527
  No improvement for 3 epochs. Best accuracy: 0.1673
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 7/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 7/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.10it/s]


Epoch 7/20:
  Train Loss: 2.6804, Val Loss: 2.0992
  Val Accuracy: 0.3491
  Saved new best model with accuracy: 0.3491
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 8/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.05it/s]
Epoch 8/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.10it/s]


Epoch 8/20:
  Train Loss: 2.5985, Val Loss: 2.0667
  Val Accuracy: 0.3855
  Saved new best model with accuracy: 0.3855
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 9/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 9/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.01s/it]


Epoch 9/20:
  Train Loss: 2.5092, Val Loss: 2.1522
  Val Accuracy: 0.3273
  No improvement for 1 epochs. Best accuracy: 0.3855
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 10/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.05it/s]
Epoch 10/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.01s/it]


Epoch 10/20:
  Train Loss: 2.4429, Val Loss: 1.9390
  Val Accuracy: 0.3891
  Saved new best model with accuracy: 0.3891
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 11/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.04it/s]
Epoch 11/20 - Validation: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it]


Epoch 11/20:
  Train Loss: 2.4278, Val Loss: 1.9377
  Val Accuracy: 0.4364
  Saved new best model with accuracy: 0.4364
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 12/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.05it/s]
Epoch 12/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.04s/it]


Epoch 12/20:
  Train Loss: 2.3644, Val Loss: 2.1493
  Val Accuracy: 0.3345
  No improvement for 1 epochs. Best accuracy: 0.4364
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 13/20 - Training: 100%|██████████| 125/125 [01:58<00:00,  1.05it/s]
Epoch 13/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.07s/it]


Epoch 13/20:
  Train Loss: 2.3025, Val Loss: 1.8926
  Val Accuracy: 0.3491
  No improvement for 2 epochs. Best accuracy: 0.4364
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 14/20 - Training: 100%|██████████| 125/125 [02:02<00:00,  1.02it/s]
Epoch 14/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.03s/it]


Epoch 14/20:
  Train Loss: 2.2883, Val Loss: 2.2344
  Val Accuracy: 0.2764
  No improvement for 3 epochs. Best accuracy: 0.4364
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 15/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.04it/s]
Epoch 15/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.01s/it]


Epoch 15/20:
  Train Loss: 2.1080, Val Loss: 1.5461
  Val Accuracy: 0.5273
  Saved new best model with accuracy: 0.5273
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 16/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 16/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.11it/s]


Epoch 16/20:
  Train Loss: 2.0380, Val Loss: 1.5347
  Val Accuracy: 0.4836
  No improvement for 1 epochs. Best accuracy: 0.5273
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 17/20 - Training: 100%|██████████| 125/125 [02:01<00:00,  1.03it/s]
Epoch 17/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.10it/s]


Epoch 17/20:
  Train Loss: 2.0053, Val Loss: 1.5422
  Val Accuracy: 0.5345
  Saved new best model with accuracy: 0.5345
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 18/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 18/20 - Validation: 100%|██████████| 9/9 [00:08<00:00,  1.03it/s]


Epoch 18/20:
  Train Loss: 1.9462, Val Loss: 1.4898
  Val Accuracy: 0.6036
  Saved new best model with accuracy: 0.6036
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 19/20 - Training: 100%|██████████| 125/125 [01:59<00:00,  1.05it/s]
Epoch 19/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.02s/it]


Epoch 19/20:
  Train Loss: 1.9381, Val Loss: 1.4375
  Val Accuracy: 0.5709
  No improvement for 1 epochs. Best accuracy: 0.6036
VRAM Usage: 0.38 GB / 15.83 GB


Epoch 20/20 - Training: 100%|██████████| 125/125 [02:00<00:00,  1.04it/s]
Epoch 20/20 - Validation: 100%|██████████| 9/9 [00:09<00:00,  1.02s/it]


Epoch 20/20:
  Train Loss: 1.8866, Val Loss: 1.4127
  Val Accuracy: 0.5418
  No improvement for 2 epochs. Best accuracy: 0.6036
Model training complete and saved to fruit_veg_model.pth


For this model, we used this particular kaggle dataset:

https://www.kaggle.com/datasets/sunnyagarwal427444/food-ingredient-dataset-51

This was the first model we worked on, and it was a bit of a learning experience. Initially, we had a different dataset we were going to use:

https://www.kaggle.com/datasets/pes12017000148/food-ingredients-and-recipe-dataset-with-images/data

It wasn't until we got to training this model that we realized--if our goal is to have our model(s) identify, from an image of ingredients that a person uploaded (maybe they just bought from the grocery store or they grouped together what ingredients they already had), which ingredients are in that image and feed back recipes containing those ingredients--then this dataset wouldn't work at all. This dataset contains images of recipes themselves; we can't use that to train our model for ingredient recognition. 

It was a tad bit of a grave oversight, and so we ultimately landed on the former dataset. If you're familiar with ResNet, you'd know that it is an "image classification" network--and we need "object detection" models. Networks like ResNet take an image as input and output a single class prediction for the whole image. That doesn't give us what we need for ingredient detection. Nonetheless, mistakes are part of the experience, so we figure instead of just scrapping it we may as well keep it and document it.

Output Metrics:

Peak validation accuracy was obtained in Epoch 18/20 (60.36%); accuracy decreased after the subsequent final epochs, showing potential overfitting. 

This is probably because our training set has 3990 images across 51 classes, while our validation set only has 275 (~7%) images across 28 classes. 

I attempted retraining the model with these changes:

* Creating a random split from the overall data set and creating train/validation subsets based off that (w/ torch .random_split()) to ensure all 51 classes are represented in both the training and validation splits

* Increased dropout rate to reduce the model's tendency to memorize the training data and actually recognize patterns

With that said, my ideas did not work as the peak accuracy for this version was 38.6%. Just under half of the first version. The train and validation loss values were fluctuating, unlike the first version, where it was consistently decreasing with each epoch too, likely meaning the model was not learning efficiently.