# FoodVision has 101 food categories. 
We've had much success with FoodVision mini, it's now time to tackle the big guy.

In [1]:
import os
from pathlib import Path

import sys
sys.path.append("../")
from src.utils import get_device, plot_loss_curves
from going_modular import data_setup, engine
from going_modular.get_data import get_data
from going_modular.utils import save_model

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchinfo import summary

from tqdm.notebook import tqdm

print("PyTorch version: ",torch.__version__)
print("Torchvision version: ",torchvision.__version__)

PyTorch version:  2.0.0
Torchvision version:  0.15.2a0


## Load model and transforms
Since we're working with a large dataset, we may want to introduce data augmentation techniques. Since we are working with a large number fo classes,let's use TrivialAugment as the data augmentation technique during model training.

In [6]:
def create_effnetb2_model(num_classes: int = 3,
                          seed: int = 42):
    effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    test_transforms = effnetb2_weights.transforms()
    train_transforms = torchvision.transforms.Compose([
        torchvision.transforms.TrivialAugmentWide(),
        test_transforms])
    effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights)
    for param in effnetb2.parameters():
        param.requires_grad = False
    torch.manual_seed(seed)
    effnetb2.classifier = nn.Sequential(
        nn.Dropout(p = 0.3, inplace = True),
        nn.Linear(in_features = 1408, out_features = num_classes)
    )
    return effnetb2, train_transforms, test_transforms

model, train_transforms, test_transforms = create_effnetb2_model(num_classes = 101)
summary(model, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 101]             --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1408, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

In [7]:
train_transforms, test_transforms

(Compose(
     TrivialAugmentWide(num_magnitude_bins=31, interpolation=InterpolationMode.NEAREST, fill=None)
     ImageClassification(
     crop_size=[288]
     resize_size=[288]
     mean=[0.485, 0.456, 0.406]
     std=[0.229, 0.224, 0.225]
     interpolation=InterpolationMode.BICUBIC
 )
 ),
 ImageClassification(
     crop_size=[288]
     resize_size=[288]
     mean=[0.485, 0.456, 0.406]
     std=[0.229, 0.224, 0.225]
     interpolation=InterpolationMode.BICUBIC
 ))

## Get FoodVision 101 data

In [9]:
# Setup data directory
data_dir = Path("data")

# Get training data (750 images per class for 101 classes)
train_data = datasets.Food101(root = data_dir,
                              split = "train",
                              transform = train_transforms,
                              download = True)

# Get testing data (250 images per class for 101 classes)
test_data = datasets.Food101(root = data_dir,
                             split = "test",
                             transform = test_transforms,
                             download = True)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data/food-101.tar.gz


100%|██████████| 4996278331/4996278331 [19:44<00:00, 4219134.36it/s] 


Extracting data/food-101.tar.gz to data


In [11]:
food101_class_names = train_data.classes
food101_class_names[:10]

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito']