Loading the dataset


In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("dansbecker/food-101")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/food-101


Arrangement of Dataset

In [2]:
import shutil
import os

source_path = "/kaggle/input/food-101"
target_path = "datasets/food-101"

# Create target folder if not exist
os.makedirs(target_path, exist_ok=True)

# Copy all files and folders from source to target
print(f"Copying data from {source_path} to {target_path}...")

for item in os.listdir(source_path):
    s = os.path.join(source_path, item)
    d = os.path.join(target_path, item)
    if os.path.isdir(s):
        shutil.copytree(s, d, dirs_exist_ok=True)
    else:
        shutil.copy2(s, d)

print("Copy complete.")


Copying data from /kaggle/input/food-101 to datasets/food-101...
Copy complete.


Libraries

In [3]:
import os
import time
import shutil
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

files path parameters predefine

In [4]:
# Enable cuDNN auto-tuner for better performance on fixed-size input
torch.backends.cudnn.benchmark = True

# Paths
DATA_DIR = "/content/datasets/food-101/food-101/food-101/images"
TRAIN_META = "/content/datasets/food-101/food-101/food-101/meta/train.txt"
TEST_META = "/content/datasets/food-101/food-101/food-101/meta/test.txt"
SPLIT_DIR = "datasets/food-101_split"

# Hyperparameters
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 1e-4
NUM_CLASSES = 101

Autoswitch according device

In [5]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


Test train split

In [6]:
# Split folders creation
def create_split_folders(root_dir, train_file, test_file, output_dir):
    train_output = os.path.join(output_dir, "train")
    val_output = os.path.join(output_dir, "val")

    os.makedirs(train_output, exist_ok=True)
    os.makedirs(val_output, exist_ok=True)

    def copy_files(file_list_path, dest_root):
        with open(file_list_path, 'r') as f:
            for line in f:
                class_name, img_file = line.strip().split('/')
                src_path = os.path.join(root_dir, class_name, img_file + ".jpg")
                class_folder = os.path.join(dest_root, class_name)
                os.makedirs(class_folder, exist_ok=True)
                dst_path = os.path.join(class_folder, img_file + ".jpg")
                if not os.path.exists(dst_path):
                    shutil.copy2(src_path, dst_path)

    copy_files(train_file, train_output)
    copy_files(test_file, val_output)
    print(f"Created training folder at: {train_output}")
    print(f"Created validation folder at: {val_output}")

 Hold On step

In [7]:
# Only run once
create_split_folders(DATA_DIR, TRAIN_META, TEST_META, SPLIT_DIR)

Created training folder at: datasets/food-101_split/train
Created validation folder at: datasets/food-101_split/val


loading splitted datasets

In [8]:
num_workers = os.cpu_count()
train_dataset = datasets.ImageFolder(os.path.join(SPLIT_DIR, "train"), transform=train_transforms)
val_dataset = datasets.ImageFolder(os.path.join(SPLIT_DIR, "val"), transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=True)

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")

Number of training samples: 75750
Number of validation samples: 25250


using RESNET50

In [9]:
# Model setup
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 133MB/s]


Train Function with split validation

In [10]:
# Training loop
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs):
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        start_time = time.time()

        for inputs, labels in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        val_corrects = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels)

        val_loss /= len(val_loader.dataset)
        val_acc = val_corrects.double() / len(val_loader.dataset)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_food101_resnet50.pth")

        elapsed = time.time() - start_time
        print(f"Epoch {epoch+1}/{num_epochs} | "
              f"Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc:.4f} | "
              f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | "
              f"Time: {elapsed:.1f}s")

    print(f"Best validation accuracy: {best_acc:.4f}")

Feature engineering

In [11]:
# Feature extractor class
class FeatureExtractor(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.features = nn.Sequential(*list(base_model.children())[:-1])  # remove fc layer
        self.flatten = nn.Flatten()

    def forward(self, x):
        with torch.no_grad():
            x = self.features(x)
            x = self.flatten(x)  # [B, 2048]
        return x

# Extract features from dataloader
def extract_features(model, dataloader):
    model.eval()
    all_features = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device, non_blocking=True)
            features = model(inputs)
            all_features.append(features.cpu())
            all_labels.append(labels)

    features_tensor = torch.cat(all_features)
    labels_tensor = torch.cat(all_labels)
    return features_tensor, labels_tensor


training model using extracted features

In [12]:
# Train model
train_model(model, criterion, optimizer, train_loader, val_loader, NUM_EPOCHS)

# Feature extraction after training
feature_model = FeatureExtractor(models.resnet50(pretrained=True).to(device))
train_feats, train_lbls = extract_features(feature_model, train_loader)
val_feats, val_lbls = extract_features(feature_model, val_loader)

torch.save((train_feats, train_lbls), "train_features.pt")
torch.save((val_feats, val_lbls), "val_features.pt")
print("Extracted and saved feature embeddings.")

Epoch 1/10 | Train Loss: 2.0729 | Train Acc: 0.4980 | Val Loss: 1.0413 | Val Acc: 0.7179 | Time: 857.3s
Epoch 2/10 | Train Loss: 1.4989 | Train Acc: 0.6203 | Val Loss: 0.8457 | Val Acc: 0.7653 | Time: 852.1s
Epoch 3/10 | Train Loss: 1.3403 | Train Acc: 0.6584 | Val Loss: 0.7847 | Val Acc: 0.7824 | Time: 850.4s
Epoch 4/10 | Train Loss: 1.2386 | Train Acc: 0.6816 | Val Loss: 0.7152 | Val Acc: 0.8019 | Time: 852.8s
Epoch 5/10 | Train Loss: 1.1737 | Train Acc: 0.6944 | Val Loss: 0.6895 | Val Acc: 0.8101 | Time: 851.3s
Epoch 6/10 | Train Loss: 1.1046 | Train Acc: 0.7123 | Val Loss: 0.6703 | Val Acc: 0.8160 | Time: 852.5s
Epoch 7/10 | Train Loss: 1.0565 | Train Acc: 0.7232 | Val Loss: 0.6502 | Val Acc: 0.8222 | Time: 852.7s
Epoch 8/10 | Train Loss: 1.0235 | Train Acc: 0.7327 | Val Loss: 0.6274 | Val Acc: 0.8290 | Time: 852.7s
Epoch 9/10 | Train Loss: 0.9848 | Train Acc: 0.7414 | Val Loss: 0.6433 | Val Acc: 0.8258 | Time: 847.6s
Epoch 10/10 | Train Loss: 0.9404 | Train Acc: 0.7513 | Val Loss:

Test on external and unseen data

In [3]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os

In [4]:
# Parameters
MODEL_PATH = "best_food101_resnet50.pth"
NUM_CLASSES = 101
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Image transform (must match validation transforms)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

In [6]:
# Load model
def load_model(model_path):
    model = models.resnet50(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()
    return model

In [7]:
# Predict function
def predict(image_path, model, class_names):
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted = torch.max(outputs, 1)
        return class_names[predicted.item()]

In [8]:
# Load class names (from folder structure)
def load_class_names(train_dir):
    return sorted(os.listdir(train_dir))

In [9]:

model = load_model(MODEL_PATH)
class_names = load_class_names("datasets/food-101_split/train")

test_image_path = "/content/datasets/food-101_split/train/beet_salad/1003501.jpg"
prediction = predict(test_image_path, model, class_names)
print(f"Predicted class: {prediction}")



Predicted class: beet_salad


creating classes(classify)

In [10]:
import os
import json

# Path to training dataset
TRAIN_DIR = "datasets/food-101_split/train"
OUTPUT_JSON = "class_names.json"

def save_class_names(train_dir, output_path):
    # Get sorted list of class names
    class_names = sorted(os.listdir(train_dir))

    # Save to JSON
    with open(output_path, 'w') as f:
        json.dump(class_names, f, indent=2)

    print(f"Saved {len(class_names)} class names to {output_path}")

# Run
if __name__ == "__main__":
    save_class_names(TRAIN_DIR, OUTPUT_JSON)


Saved 101 class names to class_names.json
