In [1]:
import polars as pl
import io
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder

In [2]:
!pip install datasets



In [3]:
from datasets import load_dataset

ds = load_dataset("Scuccorese/food-ingredients-dataset")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
# Removing uneccessary data
ds = ds.remove_columns(["subcategory", "ingredient"])
ds = ds['train'].train_test_split(test_size=0.2, seed=42)
print(ds)

DatasetDict({
    train: Dataset({
        features: ['category', 'image'],
        num_rows: 5340
    })
    test: Dataset({
        features: ['category', 'image'],
        num_rows: 1336
    })
})


In [5]:
from PIL import Image
import numpy as np
import pandas as pd
import torch
from datasets import load_dataset
from sklearn.preprocessing import LabelEncoder
from transformers import AutoProcessor
from transformers import AutoImageProcessor, AutoModelForImageClassification


# Initialize the processor
processor = AutoProcessor.from_pretrained("Kaludi/food-category-classification-v2.0")

label_encoder = LabelEncoder()
label_encoder.fit(ds["train"]["category"])

# Function to encode labels for a batch of examples
def encode_labels(examples):
    examples["label"] = label_encoder.transform(examples["category"])
    return examples

def preprocess_images_and_labels(examples):

    # Convert images to RGB format
    examples["image"] = [
        img.convert("RGB") if img.mode != "RGB" else img
        for img in examples["image"]
    ]

    examples["image"] = [
        img.resize((512, 512)) for img in examples["image"]
    ]

    # Convert images to numpy arrays for processing
    examples["image"] = [
        np.array(img) for img in examples["image"]
    ]

    inputs = processor(images=examples["image"], return_tensors="pt")

    inputs["label"] = examples["label"]
    return inputs


ds = ds.map(encode_labels, batched=True, batch_size=256)
ds = ds.with_transform(preprocess_images_and_labels)

# Check the dataset
print(ds)


DatasetDict({
    train: Dataset({
        features: ['category', 'image', 'label'],
        num_rows: 5340
    })
    test: Dataset({
        features: ['category', 'image', 'label'],
        num_rows: 1336
    })
})


In [6]:
# Freeze all layers except the classifier (output layer)
model = AutoModelForImageClassification.from_pretrained("Kaludi/food-category-classification-v2.0", num_labels=12)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the classifier layer (final classification head)
model.classifier.requires_grad = True

In [7]:
import torch
import torch.nn as nn

if isinstance(model.classifier, nn.Sequential):
    in_features = model.classifier[-1].in_features

    model.classifier[-1] = nn.Linear(in_features, 12)

print(model)


SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0-1): 2 x SwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfO

In [8]:
from torch.utils.data import DataLoader

# Create a DataLoader for the validation dataset
val_loader = DataLoader(ds["test"], batch_size=16)

In [9]:
from transformers import AutoProcessor
from sklearn.metrics import accuracy_score
import torch


def evaluate_model_batch(model, dataloader):
    all_preds = []
    all_labels = []

    for batch in dataloader:
        pixel_values = batch["pixel_values"].to("cuda") if torch.cuda.is_available() else batch["pixel_values"]
        labels = torch.tensor(batch["label"]).to("cuda") if torch.cuda.is_available() else torch.tensor(batch["label"])

        with torch.no_grad():
            outputs = model(pixel_values=pixel_values)
            preds = torch.argmax(outputs.logits, dim=-1)

        # Collect predictions and labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    # Compute accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy


In [10]:
accuracy = evaluate_model_batch(model, val_loader)

  labels = torch.tensor(batch["label"]).to("cuda") if torch.cuda.is_available() else torch.tensor(batch["label"])


In [11]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.26047904191616766
