In [1]:
# Fine Tuning :
"""
Fine-tuning means taking a pre-trained model (like gpt-3.5-turbo) and training it further on custom examples so it better understands your specific task, tone, or domain.

When to Fine-Tune?

You want a specific tone, style, or format in every output.

You want the model to follow strict instructions (e.g., output tables).

You want it to remember patterns (e.g., specific Q&A style).
"""

'\nFine-tuning means taking a pre-trained model (like gpt-3.5-turbo) and training it further on custom examples so it better understands your specific task, tone, or domain.\n\nWhen to Fine-Tune?\n\nYou want a specific tone, style, or format in every output.\n\nYou want the model to follow strict instructions (e.g., output tables).\n\nYou want it to remember patterns (e.g., specific Q&A style).\n'

In [None]:
#huggingface api key => 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

In [2]:
# Simple Example using Huggingface LLM

In [6]:
# STEP 1: Install dependencies (only once)
!pip install -q transformers datasets

# STEP 2: Import required libraries
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch

# STEP 3: Define a small sentiment dataset
train_data = {
    "text": [
        "I love this product!",
        "This is the worst experience ever.",
        "It was okay, not great.",
        "Absolutely fantastic!",
        "I will never buy this again.",
        "Such a waste of money.",
        "I'm really happy with the service.",
        "Terrible support, very disappointed."
    ],
    "label": [1, 0, 1, 1, 0, 0, 1, 0]  # 1 = Positive, 0 = Negative
}

dataset = Dataset.from_dict(train_data)

# STEP 4: Load tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# STEP 5: Tokenize dataset
def preprocess(examples):
    return tokenizer(examples["text"], padding=True, truncation=True)

tokenized_dataset = dataset.map(preprocess, batched=True)

# STEP 6: Training arguments (NO wandb)
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    logging_dir="./logs",
    report_to="none"  # 👈 Disables wandb
)

# STEP 7: Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# STEP 8: Train
trainer.train()


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Step,Training Loss


TrainOutput(global_step=12, training_loss=0.6796274185180664, metrics={'train_runtime': 33.0574, 'train_samples_per_second': 0.726, 'train_steps_per_second': 0.363, 'total_flos': 68303502432.0, 'train_loss': 0.6796274185180664, 'epoch': 3.0})

In [8]:
def chat():
    print("🗨️  Sentiment Bot is ready! Type 'exit' to quit.")
    while True:
        text = input("You: ")
        if text.lower() == "exit":
            print("👋 Goodbye!")
            break
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            logits = model(**inputs).logits
        prediction = torch.argmax(logits, dim=1).item()
        sentiment = "Positive 😊" if prediction == 1 else "Negative 😞"
        print("Sentiment:", sentiment)

chat()



🗨️  Sentiment Bot is ready! Type 'exit' to quit.
You: i am awsom
Sentiment: Positive 😊
You: i m hungry
Sentiment: Positive 😊
You: i am happy with product
Sentiment: Positive 😊
You: I am not happy with service
Sentiment: Negative 😞
You: I am happy with product but not happy with service
Sentiment: Positive 😊
You: exit
👋 Goodbye!


In [None]:
# Multiclass Sentiment: Negative, Neutral, Positive

In [10]:
# STEP 1: Install required packages
!pip install -q transformers datasets

# STEP 2: Imports
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import Dataset
import torch

# STEP 3: Prepare labeled dataset with 3 classes
data = {
    "text": [
        "I love this product!",
        "This is the worst experience ever.",
        "It was okay, not great.",
        "Absolutely fantastic!",
        "I will never buy this again.",
        "Such a waste of money.",
        "I'm really happy with the service.",
        "Terrible support, very disappointed.",
        "I don’t feel strongly about this.",
        "It’s neither good nor bad."
    ],
    "label": [2, 0, 1, 2, 0, 0, 2, 0, 1, 1]  # 0 = Negative, 1 = Neutral, 2 = Positive
}
dataset = Dataset.from_dict(data)

# STEP 4: Tokenizer and preprocessing
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

def preprocess(example):
    return tokenizer(example["text"], truncation=True)

tokenized_dataset = dataset.map(preprocess)

# STEP 5: Load model for 3-label classification
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)

# STEP 6: Use data collator for padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# STEP 7: Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=2,
    logging_strategy="no",
    save_strategy="no",
    report_to="none"
)

# STEP 8: Train model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)
trainer.train()

# STEP 9: Simple inference function
def chat():
    label_map = {0: "Negative 😞", 1: "Neutral 😐", 2: "Positive 😊"}
    print("🗨️  Sentiment Bot is ready! Type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("👋 Goodbye!")
            break
        inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            outputs = model(**inputs)
            prediction = torch.argmax(outputs.logits, dim=1).item()
        print("Sentiment:", label_map[prediction])

# STEP 10: Run chatbot
chat()


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


🗨️  Sentiment Bot is ready! Type 'exit' to quit.
You: service was good
Sentiment: Neutral 😐
You: service was okay
Sentiment: Neutral 😐
You: service was excellent
Sentiment: Negative 😞
You: nice product
Sentiment: Positive 😊
You: ok product
Sentiment: Negative 😞
You: exit
👋 Goodbye!


In [11]:
# End of the Notebook