# Image classification

In [1]:
# !pip install -U -q datasets
# !pip install -U -q accelerate
# !pip install -U -q evaluate

import warnings
warnings.filterwarnings('ignore')
import os
os.environ["HF_HUB_ETAG_TIMEOUT"]="86400"
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"]="86400"
os.environ["HF_ENDPOINT"]="http://localhost:8081/artifactory/api/huggingfaceml/mldemo-hg-remote"
os.environ["HF_TOKEN"]=os.environ["localtoken"]


### Before training the model 

In [2]:
from transformers import pipeline
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = pipeline("image-classification", model='google/vit-base-patch16-224-in21k', device=device)
import requests
from PIL import Image
from io import BytesIO

# url = 'https://www.indianhealthyrecipes.com/wp-content/uploads/2015/10/pizza-recipe-1.jpg'
url = 'https://parafit.in/wp-content/uploads/2019/03/Tawa-Roti-600x500.jpg'
response = requests.get(url,verify=False)
image = Image.open(BytesIO(response.content))

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
Device set to use cpu


In [3]:
pipe(image)

[{'label': 'LABEL_0', 'score': 0.5102763175964355},
 {'label': 'LABEL_1', 'score': 0.48972368240356445}]

In [4]:
image.show()

### Train the model with custom Data

In [5]:
from datasets import load_dataset
food = load_dataset("rajistics/indian_food_images",token=True)


In [6]:
food

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5328
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 941
    })
})

In [7]:
type(food['train'].features['label'])

datasets.features.features.ClassLabel

In [8]:
food['train'].features['label']

ClassLabel(names=['burger', 'butter_naan', 'chai', 'chapati', 'chole_bhature', 'dal_makhani', 'dhokla', 'fried_rice', 'idli', 'jalebi', 'kaathi_rolls', 'kadai_paneer', 'kulfi', 'masala_dosa', 'momos', 'paani_puri', 'pakode', 'pav_bhaji', 'pizza', 'samosa'], id=None)

In [9]:
labels = food['train'].features['label'].names
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

print(label2id)
print(id2label)

{'burger': 0, 'butter_naan': 1, 'chai': 2, 'chapati': 3, 'chole_bhature': 4, 'dal_makhani': 5, 'dhokla': 6, 'fried_rice': 7, 'idli': 8, 'jalebi': 9, 'kaathi_rolls': 10, 'kadai_paneer': 11, 'kulfi': 12, 'masala_dosa': 13, 'momos': 14, 'paani_puri': 15, 'pakode': 16, 'pav_bhaji': 17, 'pizza': 18, 'samosa': 19}
{0: 'burger', 1: 'butter_naan', 2: 'chai', 3: 'chapati', 4: 'chole_bhature', 5: 'dal_makhani', 6: 'dhokla', 7: 'fried_rice', 8: 'idli', 9: 'jalebi', 10: 'kaathi_rolls', 11: 'kadai_paneer', 12: 'kulfi', 13: 'masala_dosa', 14: 'momos', 15: 'paani_puri', 16: 'pakode', 17: 'pav_bhaji', 18: 'pizza', 19: 'samosa'}


## Preprocessing and Evaluator

In [10]:
from transformers import AutoImageProcessor

model_ckpt = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(model_ckpt, use_fast=True)

In [11]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)

size = (
    image_processor.size['shorted_edge']
    if "shorted_edge" in image_processor.size
    else (image_processor.size['height'], image_processor.size['width'])
)


_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

def transforms(examples):
    examples['pixel_values'] = [_transforms(img.convert('RGB')) for img in examples['image']]
    del examples['image']

    return examples


In [12]:
food = food.with_transform(transforms)

## Evaluate the model

In [None]:
# Ensure the evaluate library is installed
!git clone https://github.com/huggingface/evaluate.git

import evaluate
import numpy as np

# Load the accuracy metric
accuracy = evaluate.load('evaluate/metrics/accuracy/accuracy.py')

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    return accuracy.compute(predictions=predictions, references=labels)


UsageError: Line magic function `%git` not found.


## Vision Transformer (ViT) Fine Tuning

In [27]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForImageClassification.from_pretrained(
    model_ckpt,
    num_labels = len(labels),
    id2label=id2label,
    label2id=label2id
).to(device)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

args = TrainingArguments(
    output_dir = "train_dir",
    remove_unused_columns=False,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=4,
    num_train_epochs=4,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy'
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=food['train'],
    eval_dataset=food['test'],
    tokenizer=image_processor,
    compute_metrics=compute_metrics
)

trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.633377,0.825717
2,No log,1.098817,0.892667
3,No log,0.864141,0.895855


TrainOutput(global_step=332, training_loss=1.3279238781297062, metrics={'train_runtime': 850.4617, 'train_samples_per_second': 25.059, 'train_steps_per_second': 0.39, 'total_flos': 1.635654783678677e+18, 'train_loss': 1.3279238781297062, 'epoch': 3.960960960960961})

In [29]:
trainer.save_model('naga-food_classification')

### Upload the model to JFrog ML repo

In [30]:
import os
os.environ["JF_URL"] = "http://localhost:8081/artifactory"
os.environ["JF_ACCESS_TOKEN"] = os.environ["localtoken"]


In [31]:
import frogml
frogml.files.log_model(
   source_path="./naga-food_classification-to-mlrepo",
   repository="mldemo-ml-local",
   model_name="naga-food_classification-to-mlrepo",
   version="2.0.0", # optional
   properties={"usage": "fordemo"} # optional
)


./naga-food_classification-to-mlrepo/training_args.bin: 100%|██████████| 5.30k/5.30k [00:00<00:00, 4.21MB/s]
./naga-food_classification-to-mlrepo/config.json: 100%|██████████| 1.47k/1.47k [00:00<00:00, 1.16MB/s]
./naga-food_classification-to-mlrepo/preprocessor_config.json: 100%|██████████| 431/431 [00:00<00:00, 477kB/s]
./naga-food_classification-to-mlrepo/model.safetensors: 100%|██████████| 343M/343M [00:00<00:00, 9.00TB/s]

2025-03-19 11:30:34,572 - INFO - frogml_storage._log_config.frog_ml.__upload_model:523 - Model: "naga-food_classification-to-mlrepo", version: "2.0.0" has been uploaded successfully





### Load the model and validate it

In [34]:
import frogml
import os
from pathlib import Path
os.environ["JF_URL"] = "http://localhost:8081/artifactory"
os.environ["JF_ACCESS_TOKEN"] = os.environ["localtoken"]

loaded_model: Path = frogml.files.load_model(
    repository="mldemo-ml-local",
    model_name="naga-food_classification-to-mlrepo",
    version="1.0.0",
    target_path="./naga-food_classification-to-mlrepo"
)



2025-03-19 11:31:00,728 - INFO - frogml_storage._log_config.frog_ml.__download_entity_version:314 - File './naga-food_classification-to-mlrepo/config.json' already exists locally, will not download it.
2025-03-19 11:31:00,729 - INFO - frogml_storage._log_config.frog_ml.__download_entity_version:314 - File './naga-food_classification-to-mlrepo/training_args.bin' already exists locally, will not download it.
2025-03-19 11:31:00,729 - INFO - frogml_storage._log_config.frog_ml.__download_entity_version:314 - File './naga-food_classification-to-mlrepo/preprocessor_config.json' already exists locally, will not download it.
2025-03-19 11:31:00,730 - INFO - frogml_storage._log_config.frog_ml.__download_entity_version:314 - File './naga-food_classification-to-mlrepo/model.safetensors' already exists locally, will not download it.


In [35]:
from transformers import pipeline
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = pipeline("image-classification", model='./naga-food_classification-to-mlrepo', device=device)

In [36]:
import requests
from PIL import Image
from io import BytesIO

# url = 'https://www.indianhealthyrecipes.com/wp-content/uploads/2015/10/pizza-recipe-1.jpg'
url = 'https://parafit.in/wp-content/uploads/2019/03/Tawa-Roti-600x500.jpg'
# url='https://images.pexels.com/photos/1460872/pexels-photo-1460872.jpeg'
response = requests.get(url,verify=False)
image = Image.open(BytesIO(response.content))
image.show()

In [37]:
pipe(image)

[{'label': 'chapati', 'score': 0.5631769299507141},
 {'label': 'butter_naan', 'score': 0.05679315701127052},
 {'label': 'chole_bhature', 'score': 0.03309432417154312},
 {'label': 'kaathi_rolls', 'score': 0.02934757061302662},
 {'label': 'fried_rice', 'score': 0.02585352398455143}]