Hi there.
This is the demo of creating a simple custom text classicifcaition model. 


In [27]:
import warnings
warnings.filterwarnings("ignore")

In [1]:
# Install dependencies (this is mostly for Google Colab, as the other dependences are available by default in Colab)
try:
  import datasets, evaluate, accelerate
  import gradio as gr
except ModuleNotFoundError:
  !pip install -U datasets evaluate accelerate gradio # -U stands for "upgrade" so we'll get the latest version by default
  import datasets, evaluate, accelerate
  import gradio as gr

import random

import numpy as np
import pandas as pd

import torch
import transformers

print(f"Using transformers version: {transformers.__version__}")
print(f"Using datasets version: {datasets.__version__}")
print(f"Using torch version: {torch.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


Using transformers version: 4.49.0
Using datasets version: 3.4.1
Using torch version: 2.6.0+cu126


Loading the dataset from HuggingfaceHub 

Dataset credits : Daniel Brouke 

In [2]:
dataset = datasets.load_dataset(path="mrdbourke/learn_hf_food_not_food_image_captions")

dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 250
    })
})

In [3]:
len(dataset["train"])

250

In [4]:
range(len(dataset["train"]))

range(0, 250)

In [5]:
#visualizing the data 

import random 

# creates a sequences of items in training data and iteraties through.

random_indexs = random.sample(range(len(dataset["train"])),5)
random_samples = dataset["train"][random_indexs]

for item in zip(random_samples["text"],random_samples["label"]):
    print(f"Text: {item[0]} | Label: {item[1]}")

Text: Cucumbers on a plate, served with a side of tangy tzatziki sauce. | Label: food
Text: A bowl of sliced bananas with a sprinkle of cocoa powder and a side of peanut butter | Label: food
Text: Sushi roll with premium ingredients like uni or wagyu beef. | Label: food
Text: Surfboard leaning against a fence | Label: not_food
Text: A slice of pepperoni pizza with a layer of melted cheese | Label: food


In [6]:
# Check number of each label
from collections import Counter

Counter(dataset["train"]["label"])

Counter({'food': 125, 'not_food': 125})

In [7]:
data_df = pd.DataFrame(dataset['train'])
data_df.sample(5)

Unnamed: 0,text,label
150,Plate of sushi served with pickled ginger and ...,food
53,"A slice of pizza from a Detroit-style pie, wit...",food
0,"Creamy cauliflower curry with garlic naan, fea...",food
60,A close-up of a family playing a board game wi...,not_food
50,Red brick fireplace with a mantel serving as a...,not_food


Tokenization 

In [8]:
# Create mappings programmatically from dataset
id2label = {idx: label for idx, label in enumerate(dataset["train"].unique("label")[::-1])} 
label2id = {label: idx for idx, label in id2label.items()}

print(f"Label to ID mapping: {label2id}")
print(f"ID to Label mapping: {id2label}")

Label to ID mapping: {'not_food': 0, 'food': 1}
ID to Label mapping: {0: 'not_food', 1: 'food'}


In [9]:
def map_label_to_number(example):
    example["label"] = label2id[example["label"]]
    return example

example_sample = {"text": "This is a sentence about my favourite food: Biriyani.", "label": "food"}

# Test the function
map_label_to_number(example_sample)

{'text': 'This is a sentence about my favourite food: Biriyani.', 'label': 1}

In [10]:
# we map the labels to numbers

dataset = dataset["train"].map(map_label_to_number)
dataset[:5]

{'text': ['Creamy cauliflower curry with garlic naan, featuring tender cauliflower in a rich sauce with cream and spices, served with garlic naan bread.',
  'Set of books stacked on a desk',
  'Watching TV together, a family has their dog stretched out on the floor',
  'Wooden dresser with a mirror reflecting the room',
  'Lawn mower stored in a shed'],
 'label': [1, 0, 0, 0, 0]}

Creating a test dataset to evaluate the performance

In [11]:
# Create train/test splits
dataset = dataset.train_test_split(test_size=0.2, seed=42) 
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 200
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 50
    })
})

In [12]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="distilbert/distilbert-base-uncased",
                                          use_fast=True) 


In [13]:
tokenizer("I love pizza")

{'input_ids': [101, 1045, 2293, 10733, 102], 'attention_mask': [1, 1, 1, 1, 1]}

In [14]:
tokenizer("Sreedeep")

{'input_ids': [101, 5034, 13089, 4402, 2361, 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}

In [15]:

tokenizer.convert_ids_to_tokens(tokenizer("sreedeep").input_ids)

['[CLS]', 'sr', '##eed', '##ee', '##p', '[SEP]']

In [16]:
def tokenize_text(examples):
    """
    Tokenize given example text and return the tokenized text.
    """
    return tokenizer(examples["text"],
                     padding=True, # pad short sequences to longest sequence in the batch
                     truncation=True)

In [17]:
tokenized_dataset = dataset.map(function=tokenize_text,
                                batched=True,
                                batch_size=1000)


tokenized_dataset

Map: 100%|██████████| 50/50 [00:00<00:00, 1780.43 examples/s]


DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 200
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 50
    })
})

In [18]:
import evaluate
import numpy as np 
from typing import Tuple

accuracy_metric = evaluate.load("accuracy")

def compute_accuracy(predictions_and_labels: Tuple[np.array, np.array]):
    
    predictions, labels = predictions_and_labels
    
    if len(predictions.shape) >= 2:
        predictions = np.argmax(predictions, axis=1)
        
        return accuracy_metric.compute(predictions=predictions,references=labels)
    

Creating the model 

In [19]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path="distilbert/distilbert-base-uncased",
                                                           num_labels=2,
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [21]:

from pathlib import Path

models_dir = Path("models")
models_dir.mkdir(exist_ok=True)


model_save_name = "food-not-food-model"

model_save_dir = Path(models_dir, model_save_name)
model_save_dir

WindowsPath('models/food-not-food-model')

In [22]:
from transformers import TrainingArguments

print(f"[INFO] Saving model checkpoints to: {model_save_dir}")

# Create training arguments
training_args = TrainingArguments(
    output_dir=model_save_dir,
    learning_rate=0.0001,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=10,
    eval_strategy="epoch", 
    save_strategy="epoch",
    save_total_limit=3,
    use_cpu=False,
    seed=42, 
    load_best_model_at_end=True, 
    logging_strategy="epoch", 
    report_to="none", 
    hub_private_repo=False 
)

[INFO] Saving model checkpoints to: models\food-not-food-model


In [23]:
from transformers import Trainer

# Setup Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
   
    tokenizer=tokenizer, 
    compute_metrics=compute_accuracy
)

  trainer = Trainer(


In [24]:
results = trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.46,0.154046,1.0
2,0.0963,0.012809,1.0
3,0.0097,0.003567,1.0
4,0.003,0.002182,1.0
5,0.0017,0.001513,1.0
6,0.0011,0.001116,1.0
7,0.001,0.000911,1.0
8,0.0008,0.000802,1.0
9,0.0008,0.000749,1.0
10,0.0007,0.000732,1.0


In [25]:
for key, value in results.metrics.items():
    print(f"{key}: {value}")

train_runtime: 41.9408
train_samples_per_second: 47.686
train_steps_per_second: 1.669
total_flos: 18110777160000.0
train_loss: 0.05750087087840906
epoch: 10.0


In [26]:
print(f"Saving model to {model_save_dir}")
trainer.save_model(output_dir=model_save_dir)

Saving model to models\food-not-food-model


In [31]:
model_upload_url = trainer.push_to_hub(
    commit_message="Upload food-not-food text classification model",
)
print(f"[INFO] Model successfully uploaded to Hugging Face Hub with at URL: {model_upload_url}")

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]
training_args.bin: 100%|██████████| 5.30k/5.30k [00:00<00:00, 5.41kB/s]
model.safetensors: 100%|██████████| 268M/268M [00:37<00:00, 7.05MB/s] 
Upload 2 LFS files: 100%|██████████| 2/2 [00:39<00:00, 19.64s/it]


[INFO] Model successfully uploaded to Hugging Face Hub with at URL: https://huggingface.co/sreedeepEK/food-not-food-model/tree/main/


In [32]:
predictions = trainer.predict(tokenized_dataset["test"])
prediction_values = predictions.predictions
predictions_metrics = predictions.metrics

predictions_metrics

{'test_loss': 0.0007317797862924635,
 'test_accuracy': 1.0,
 'test_runtime': 0.8504,
 'test_samples_per_second': 58.798,
 'test_steps_per_second': 2.352}

In [34]:
import torch
from sklearn.metrics import accuracy_score

# 1. Get prediction probabilities 
pred_probs = torch.softmax(torch.tensor(prediction_values), dim=1)

# 2. Get the predicted labels
pred_labels = torch.argmax(pred_probs, dim=1)

# 3. Get the true labels
true_labels = dataset["test"]["label"]

# 4. Compare predicted labels to true labels to get the test accuracy
test_accuracy = accuracy_score(y_true=true_labels, 
                               y_pred=pred_labels)

print(f"[INFO] Test accuracy: {test_accuracy*100}%")

[INFO] Test accuracy: 100.0%


In [36]:
test_predictions_df = pd.DataFrame({"text": dataset["test"]["text"],
                                    "true_labels" : true_labels,
                                    "pred_label": pred_labels,
                                    "pred_prob": torch.max(pred_probs,dim=1).values})

test_predictions_df.head(5)

Unnamed: 0,text,true_labels,pred_label,pred_prob
0,A slice of pepperoni pizza with a layer of mel...,1,1,0.999438
1,Red brick fireplace with a mantel serving as a...,0,0,0.999365
2,A bowl of sliced bell peppers with a sprinkle ...,1,1,0.999447
3,Set of mugs hanging on a hook,0,0,0.99942
4,Standing floor lamp providing light next to an...,0,0,0.999406


Loading the model both from local path and huggingface path

In [38]:
local_model_path = "models/food-not-food-model/"
huggingface_model_path = "sreedeepEK/food-not-food-model"

We can make predicitons using Pipeline mode and Pytorch mode 

1. transformers.pipeline 
2. transformers.AutoTokenizer

In [39]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [41]:
import torch
from transformers import pipeline 

BATCH_SIZE = 32

food_not_food_classifier = pipeline(task="text-classification",
                                    model = local_model_path,
                                    device=device,
                                    top_k=1,
                                    batch_size = BATCH_SIZE)

food_not_food_classifier

#this means we've created an instance 

Device set to use cuda


<transformers.pipelines.text_classification.TextClassificationPipeline at 0x179cf2527f0>

In [42]:
sample_text_food = "A delicious photo of a plate of Biriyani"
food_not_food_classifier(sample_text_food)

[[{'label': 'food', 'score': 0.9989838004112244}]]

In [44]:
sample_text_not_food = "A yellow tractor driving over the hill"
food_not_food_classifier(sample_text_food)

[[{'label': 'not_food', 'score': 0.9993836879730225}]]

Making predictions with PyTorch

Performing predictions with PyTorch requires an extra step compared to pipeline, we have to prepare our inputs first (turn the text into numbers).

We can prepare our inputs with the tokenizer that got automatically saved with our model.

In [46]:
from transformers import AutoTokenizer

model_path = huggingface_model_path

sample_text_food = "A delicious photo of a plate of scrambled eggs, bacon and toast"

# Prepare the tokenizer and tokenize the inputs
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_path)
inputs = tokenizer(sample_text_food, 
                   return_tensors="pt") # return the output as PyTorch tensors 
inputs

{'input_ids': tensor([[  101,  1037, 12090,  6302,  1997,  1037,  5127,  1997, 13501,  6763,
          1010, 11611,  1998, 15174,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [47]:
from transformers import AutoModelForSequenceClassification

# Load our text classification model
model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=model_path)


In [49]:
import torch 
with torch.no_grad():
    outputs = model(**inputs)
    
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-3.9462,  3.4914]]), hidden_states=None, attentions=None)

In [50]:
predicted_class_id = outputs.logits.argmax().item()
prediction_probability = torch.softmax(outputs.logits, dim=1).max().item()

print(f"Text: {sample_text_food}")
print(f"Predicted label: {model.config.id2label[predicted_class_id]}")
print(f"Prediction probability: {prediction_probability}")

Text: A delicious photo of a plate of scrambled eggs, bacon and toast
Predicted label: food
Prediction probability: 0.9994117021560669


Building Gradio interface 

In [51]:
from typing import Dict 

def food_not_food_classifier(text:str) -> Dict[str, float]:
    
        food_not_food_classifier = pipeline(task="text-classification", 
                                        model=local_model_path,
                                        batch_size=32,
                                        device="cuda" if torch.cuda.is_available() else "cpu", 
                                        top_k=None)
        
        
        outputs = food_not_food_classifier(text)[0]
        
        output_dict = {}
        for item in outputs:
            output_dict[item["label"]] = item["score"]
            
        
        return output_dict
    

food_not_food_classifier("My lunch was chicken biriyani")

Device set to use cuda


{'food': 0.9992489218711853, 'not_food': 0.0007510686991736293}

In [None]:
import gradio as gr

# 2. Setup a Gradio interface to accept text and output labels
demo = gr.Interface(
    fn=food_not_food_classifier, 
    inputs="text", 
    outputs=gr.Label(num_top_classes=2), # show top 2 classes (that's all we have)
    title="Food or Not Food Classifier",
    description="A text classifier to determine if a sentence is about food or not food.",
    examples=[["I whipped up a fresh batch of code, but it seems to have a syntax error."],
              ["A delicious photo of a plate of scrambled eggs, bacon and toast."]])

# 3. Launch the interface
demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Device set to use cuda
Device set to use cuda
Device set to use cuda
Device set to use cuda


In [55]:
from pathlib import Path

# Make a directory for demos
demos_dir = Path("./demos")
demos_dir.mkdir(exist_ok=True)

# Create a folder for the food_not_food_text_classifer demo
food_not_food_text_classifier_demo_dir = Path(demos_dir, "food_not_food_text_classifier")
food_not_food_text_classifier_demo_dir.mkdir(exist_ok=True)

In [59]:
%%writefile ./demos/food_not_food_text_classifier/app.py
# 1. Import the required packages
import torch
import gradio as gr

from typing import Dict
from transformers import pipeline

# 2. Define function to use our model on given text 
def food_not_food_classifier(text: str) -> Dict[str, float]:
    # Set up text classification pipeline
    food_not_food_classifier = pipeline(task="text-classification", 
                                        # Because our model is on Hugging Face already, we can pass in the model name directly
                                        model="mrdbourke/learn_hf_food_not_food_text_classifier-distilbert-base-uncased", # link to model on HF Hub
                                        device="cuda" if torch.cuda.is_available() else "cpu",
                                        top_k=None) # return all possible scores (not just top-1)
    
    # Get outputs from pipeline (as a list of dicts)
    outputs = food_not_food_classifier(text)[0]

    # Format output for Gradio (e.g. {"label_1": probability_1, "label_2": probability_2})
    output_dict = {}
    for item in outputs:
        output_dict[item["label"]] = item["score"]

    return output_dict

# 3. Create a Gradio interface with details about our app
description = """
A text classifier to determine if a sentence is about food or not food. 

Fine-tuned from [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) on a [small dataset of food and not food text](https://huggingface.co/datasets/mrdbourke/learn_hf_food_not_food_image_captions).

See [source code](https://github.com/sreedeepEK/food-not-food/blob/main/file.ipynb).
"""

demo = gr.Interface(fn=food_not_food_classifier, 
             inputs="text", 
             outputs=gr.Label(num_top_classes=2), # show top 2 classes (that's all we have)
             title="🍗🚫🥑 Food or Not Food Text Classifier",
             description=description,
             examples=[["I whipped up a fresh batch of code, but it seems to have a syntax error."],
                       ["A delicious photo of a plate of scrambled eggs, bacon and toast."]])

# 4. Launch the interface
if __name__ == "__main__":
    demo.launch()

Writing ./demos/food_not_food_text_classifier/app.py


In [60]:
# 1. Import the required methods for uploading to the Hugging Face Hub
from huggingface_hub import (
    create_repo,
    get_full_repo_name,
    upload_file, 
    upload_folder 
)

# 2. Define the parameters we'd like to use for the upload
LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD = "./demos/food_not_food_text_classifier"
HF_TARGET_SPACE_NAME = "food-not-food"
HF_REPO_TYPE = "space" 
HF_SPACE_SDK = "gradio"

# 3. Create a Space repository on Hugging Face Hub 
print(f"[INFO] Creating repo on Hugging Face Hub with name: {HF_TARGET_SPACE_NAME}")
create_repo(
    repo_id=HF_TARGET_SPACE_NAME,
    
    repo_type=HF_REPO_TYPE,
    private=False, 
    space_sdk=HF_SPACE_SDK,
    exist_ok=True, 
)

# 4. Get the full repository name (e.g. {username}/{model_id} or {username}/{space_name})
full_hf_repo_name = get_full_repo_name(model_id=HF_TARGET_SPACE_NAME)
print(f"[INFO] Full Hugging Face Hub repo name: {full_hf_repo_name}")

# 5. Upload our demo folder
print(f"[INFO] Uploading {LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD} to repo: {full_hf_repo_name}")
folder_upload_url = upload_folder(
    repo_id=full_hf_repo_name,
    folder_path=LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD,
    path_in_repo=".",
    # token=HF_TOKEN,
    repo_type=HF_REPO_TYPE,
    commit_message="Uploading food not food text classifier demo app.py"
)
print(f"[INFO] Demo folder successfully uploaded with commit URL: {folder_upload_url}")

[INFO] Creating repo on Hugging Face Hub with name: food-not-food
[INFO] Full Hugging Face Hub repo name: sreedeepEK/food-not-food
[INFO] Uploading ./demos/food_not_food_text_classifier to repo: sreedeepEK/food-not-food
[INFO] Demo folder successfully uploaded with commit URL: https://huggingface.co/spaces/sreedeepEK/food-not-food/tree/main/.
