In [None]:
import torch
from torchvision import models
from torch import nn
import os
import json
import csv
import pandas as pd
import tarfile
from torchvision import transforms
from PIL import Image
import torch.optim as optim
from torch.utils.data import DataLoader
import shutil


In [None]:
# @dataset{Food Recognition 2022,
# 	author={AIcrowd},
# 	title={Food Recognition 2022},
# 	year={2022},
# 	url={https://www.kaggle.com/datasets/awsaf49/food-recognition-2022-dataset}
# }


##This is citation of our datasource

In [None]:
#Paths for csv/sorted data
tar_path = "./food-recognition-DatasetNinja.tar"
training_csv = "train/training_annotations.csv"
validation_csv = "val/validation_annotations.csv"
testing_csv = "test/testing_annotations.csv"
extracted_dir = "./extracted_ann"
output_meta_json = "meta.json"
training_image_dir = "train/images"
validation_image_dir = "val/images"
testing_image_dir = "test/images"


training_data = []
validation_data = []
testing_data = []

os.makedirs(extracted_dir, exist_ok=True)
os.makedirs(training_image_dir, exist_ok=True)
os.makedirs(validation_image_dir, exist_ok=True)
os.makedirs(testing_image_dir, exist_ok=True)

with tarfile.open(tar_path, "r") as tar:
    tar_contents = tar.getnames()
    for name in tar_contents:
        if name.endswith(".json"):
            tar.extract(name, path=extracted_dir)
            try:
                if "meta" in name:
                    extracted_meta_path = os.path.join(extracted_dir, name)
                    os.rename(extracted_meta_path, output_meta_json)
                    print("Meta Data Saved!")
                    continue
                if "test" in name:
                    current_list = testing_data
                elif name.startswith("training/"):
                    current_list = training_data
                elif name.startswith("validation/"):
                    current_list = validation_data
                else:
                    continue

                extracted_path = os.path.join(extracted_dir, name)
                with open(extracted_path, 'r') as f:
                    annotation = json.load(f)

                    image_name = os.path.basename(name).replace(".json", "")
                    image_size = annotation.get("size", {})
                    image_height = image_size.get("height", None)
                    image_width = image_size.get("width", None)

                    objects = annotation.get("objects", [])
                    for obj in objects:
                        class_title = obj.get("classTitle", "unknown")
                        points = obj.get("points", {}).get("exterior", [])
                        points_str = ";".join([f"({x},{y})" for x, y in points])

                        current_list.append({
                            "filename": image_name,
                            "labels": class_title,
                            "polygon": points_str,
                            "image_width": image_width,
                            "image_height": image_height
                        })
            except json.JSONDecodeError as e:
                print("Error message:", e)
                continue

        elif name.endswith(".jpg"):
            if "test" in name:
                image_dir = testing_image_dir
            elif name.startswith("training/"):
                image_dir = training_image_dir
            elif name.startswith("validation/"):
                image_dir = validation_image_dir
            else:
                continue

            tar.extract(name, path=extracted_dir)
            extracted_image_path = os.path.join(extracted_dir, name)
            destination_path = os.path.join(image_dir, os.path.basename(name))
            shutil.move(extracted_image_path, destination_path)


training_df = pd.DataFrame(training_data)
validation_df = pd.DataFrame(validation_data)
testing_df = pd.DataFrame(testing_data)

training_df.to_csv(training_csv, index=False)
validation_df.to_csv(validation_csv, index=False)
testing_df.to_csv(testing_csv, index=False)

print("Data saved:")
print("Number of Training Samples: ", len(training_df))
print("Number of Validation Samples: ", len(validation_df))
print("Number of Testing Samples: ", len(testing_df))


Meta Data Saved!
Data saved:
Number of Training Samples:  15561
Number of Validation Samples:  468
Number of Testing Samples:  0


In [None]:
#Inspecting the labels for the data model
annotations = pd.read_csv("train/training_annotations.csv")
print(annotations['labels'].head())


0        chips-french-fries
1                 hamburger
2             hamburger-bun
3             hamburger-bun
4    salad-leaf-salad-green
Name: labels, dtype: object


In [None]:
#Label Encoder Class for Encoding the specific food labels
#Encoder makes string labels into numeric values
class LabelEncoder:
    def __init__(self, labels):
        self.classes = sorted(labels)
        self.label_to_index = {label: idx for idx, label in enumerate(self.classes)}
        self.index_to_label = {idx: label for idx, label in enumerate(self.classes)}

    def encode(self, label):
        #Converts the labels into numbers
        return self.label_to_index[label]

    def decode(self, index):
        #Converts the labels back into string variables
        return self.index_to_label[index]


MODEL DEVELOPMENT!

In [None]:
#Dataset is based on documentation from resnet18
class Image_Data(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.annotations = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        unique_labels = self.annotations['labels'].unique()
        self.encoder = LabelEncoder(unique_labels)

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = self.annotations.iloc[idx, 1]
        encoded_label = self.encoder.encode(label)
        #given the transformation, apply to the image
        if self.transform:
            image = self.transform(image)
        return image, encoded_label


In [None]:
#Defining transformations for both the training and validations set
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    #resnet18 needs images to be 256,256
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
#We encounter the error that there are some mis labeled files so we
#are removing them from our datasets

#Start with the training dataset
annotations = "train/training_annotations.csv"
img_dir = "train/images"

filtered_metadata = []
with open(annotations, 'r') as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        file_path = os.path.join(img_dir, row["filename"])
        if os.path.exists(file_path):
            filtered_metadata.append(row)


filtered_metadata_file = "train/train_annotations_filtered.csv"
with open(filtered_metadata_file, 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=filtered_metadata[0].keys())
    writer.writeheader()
    writer.writerows(filtered_metadata)

print(f"Filtered metadata saved to {filtered_metadata_file}.")
print(f"Number of valid entries: {len(filtered_metadata)}")



Filtered metadata saved to train/train_annotations_filtered.csv.
Number of valid entries: 15545


In [None]:
#Now we are doing the same thing for the validation set
annotations = "val/validation_annotations.csv"
img_dir = "val/images"

filtered_metadata = []
with open(annotations, 'r') as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        file_path = os.path.join(img_dir, row["filename"])
        if os.path.exists(file_path):
            filtered_metadata.append(row)


# Save the filtered metadata
filtered_metadata_file = "val/val_annotations_filtered.csv"
with open(filtered_metadata_file, 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=filtered_metadata[0].keys())
    writer.writeheader()
    writer.writerows(filtered_metadata)

print(f"Filtered metadata saved to {filtered_metadata_file}.")
print(f"Number of valid entries: {len(filtered_metadata)}")

Filtered metadata saved to val/val_annotations_filtered.csv.
Number of valid entries: 468


In [None]:
#We really don't need this function since it was created when we thought our dataset was multi-label
def custom_collate_fn(batch):
    images, targets = zip(*batch)
    images = torch.stack(images)  
    targets = torch.tensor(targets)  
    return images, targets

train_dataset = Image_Data(annotations_file="train/train_annotations_filtered.csv",
                                  img_dir="train/images",
                                  transform=train_transform)

val_dataset = Image_Data(annotations_file="val/val_annotations_filtered.csv",
                                img_dir="val/images",
                                transform=val_transform)

## Creating Dataloaders to put into our model
train_loader = DataLoader( train_dataset,
    batch_size=40,
    shuffle=True,
    collate_fn=custom_collate_fn)

val_loader = DataLoader( val_dataset,
    batch_size=40,
    shuffle=True,
    collate_fn=custom_collate_fn)

In [None]:
model = models.resnet18(pretrained=True)

num_classes = 498

num_classes_calculated = len(train_dataset.encoder.classes)  
model.fc = nn.Linear(model.fc.in_features, num_classes_calculated)

for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True




device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 173MB/s]


In [None]:
# Some Training parameters
batch_size = 20
num_epochs = 10
learning_rate = 0.001  ##Started off with a very small learning rate
criterion = nn.CrossEntropyLoss()  ##We googled best loss function for single labeled image recognition
optimizer = optim.Adam(model.parameters(), lr=0.001)




In [None]:
#DEBUGGING:
missing_files = []
print(len(train_dataset))
for i in range(len(train_dataset)):
    img_path = os.path.join(train_dataset.img_dir, train_dataset.annotations.iloc[i, 0])
    if not os.path.exists(img_path):
        missing_files.append(img_path)

print(f"Missing files: {len(missing_files)}")
if missing_files:
    print("Titles:", missing_files[:5])


15545
Missing files: 0


In [None]:
##TRAINING CELL, LOTS OF COMPUTE
num_epochs = 10

for epoch in range(num_epochs):
    #Starting the Training Phase
    model.train()
    train_loss = 0.0
    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}")

    #Starting the evaluation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            loss = criterion(outputs, targets)

            val_loss += loss.item()

    print(f"Validation Loss: {val_loss/len(val_loader):.4f}")



Epoch [1/10], Train Loss: 5.5887
Validation Loss: 7.3996
Epoch [2/10], Train Loss: 4.4455
Validation Loss: 7.9944
Epoch [3/10], Train Loss: 3.9524
Validation Loss: 8.5747
Epoch [4/10], Train Loss: 3.6555
Validation Loss: 8.8458
Epoch [5/10], Train Loss: 3.4404
Validation Loss: 9.2469
Epoch [6/10], Train Loss: 3.2976
Validation Loss: 9.4683
Epoch [7/10], Train Loss: 3.1864
Validation Loss: 9.8549
Epoch [8/10], Train Loss: 3.0844
Validation Loss: 9.8250
Epoch [9/10], Train Loss: 2.9938
Validation Loss: 10.0877
Epoch [10/10], Train Loss: 2.9283
Validation Loss: 10.3156


In [None]:
##SAVING OUR MODEL

torch.save(model.state_dict(), "resnet18_images.pth")
print("Model saved!")

Model saved!


In [1]:

#Testing out the model with image from our test dataset
model.load_state_dict(torch.load("resnet18_images.pth"))
model.eval()

image_path = "test/images/041845.jpg"

image = Image.open(image_path).convert("RGB")
transform = val_transform
input_tensor = transform(image).unsqueeze(0).to(device)


with torch.no_grad():
    outputs = model(input_tensor)
    _, predicted_index = torch.max(outputs, 1)

predicted_label = train_dataset.encoder.decode(predicted_index.item())

print(f"Predicted label: {predicted_label}")


NameError: name 'model' is not defined