In [3]:
# ==========================================
# Flipkart Product Review Classifier
# DistilBERT + Focal Loss + Gradio UI
# ==========================================
!pip install -q transformers datasets scikit-learn pandas torch evaluate gradio joblib

import os, shutil, joblib
import pandas as pd
import numpy as np
import torch
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate
import inspect

# -------------------------
# 0. Settings
# -------------------------
CSV_PATH = "flipkart_reviews.csv"   # <-- Flipkart file
text_column = "review_body"              # change if needed
label_column = "sentiment"          # change if needed
MODEL_NAME = "distilbert-base-uncased"
MODEL_DIR = "fine-tuned-flipkart-model"
MAX_LENGTH = 128
NUM_EPOCHS = 12
BATCH_SIZE = 8

# -------------------------
# 1. Load CSV & basic checks
# -------------------------
df = pd.read_csv(CSV_PATH)
print("Columns found:", df.columns.tolist())

# Keep only expected labels
df = df[df[label_column].isin(["positive", "neutral", "negative"])].copy()
print("Value counts:", df[label_column].value_counts())

# -------------------------
# 2. Balance dataset (optional)
# -------------------------
samples_per_class = min(df[label_column].value_counts().min(), 200)
df_balanced = (
    df.groupby(label_column, group_keys=False)
      .apply(lambda x: x.sample(samples_per_class, random_state=42))
      .reset_index(drop=True)
)
print("Balanced counts:", df_balanced[label_column].value_counts())

# -------------------------
# 3. Encode labels
# -------------------------
label_encoder = LabelEncoder()
df_balanced["label"] = label_encoder.fit_transform(df_balanced[label_column])
joblib.dump(label_encoder, "label_encoder.pkl")
print("Classes:", label_encoder.classes_)

# -------------------------
# 4. Train/test split
# -------------------------
train_df, test_df = train_test_split(
    df_balanced, test_size=0.2, stratify=df_balanced["label"], random_state=42
)

# -------------------------
# 5. Tokenize
# -------------------------
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def tokenize_function(batch):
    return tokenizer(batch[text_column], truncation=True, padding="max_length", max_length=MAX_LENGTH)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

remove_cols = [c for c in [label_column, text_column, "__index_level_0__"] if c in train_dataset.column_names]
train_dataset = train_dataset.remove_columns(remove_cols)
test_dataset = test_dataset.remove_columns(remove_cols)

# -------------------------
# 6. Class weights
# -------------------------
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_df["label"]),
    y=train_df["label"]
)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print("Class weights:", class_weights)

# -------------------------
# 7. Model
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(label_encoder.classes_)
)
model.to(device)

# -------------------------
# 8. Focal Loss
# -------------------------
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        ce_loss = torch.nn.functional.cross_entropy(logits, targets, weight=self.alpha, reduction='none')
        pt = torch.exp(-ce_loss)
        focal = ((1 - pt) ** self.gamma) * ce_loss
        return focal.mean() if self.reduction == 'mean' else focal.sum()

focal_loss_fn = FocalLoss(alpha=class_weights.to(device), gamma=2.0, reduction='mean')

class FocalTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels").to(device)
        inputs_for_model = {k: v.to(device) for k, v in inputs.items() if k != "labels"}
        outputs = model(**inputs_for_model)
        logits = outputs.logits
        loss = focal_loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

print("\nBase Trainer compute_loss signature:")
print(inspect.signature(Trainer.compute_loss))

# -------------------------
# 9. Metrics
# -------------------------
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_metric.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1_metric.compute(predictions=preds, references=labels, average="weighted")["f1"]
    }

training_args = TrainingArguments(
    output_dir="./results_flipkart",
    eval_strategy="epoch",
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    weight_decay=0.01,
    logging_dir="./logs_flipkart",
    load_best_model_at_end=False,
    report_to="none"
)

# -------------------------
# 10. Train
# -------------------------
trainer = FocalTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()
eval_results = trainer.evaluate()
print("Eval results:", eval_results)

# -------------------------
# 11. Save
# -------------------------
shutil.rmtree(MODEL_DIR, ignore_errors=True)
model.save_pretrained(MODEL_DIR)
tokenizer.save_pretrained(MODEL_DIR)
print(f"Model saved to {MODEL_DIR}")

# -------------------------
# 12. Inference
# -------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
inference_model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
inference_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inference_model.to(inference_device)
inference_model.eval()
label_encoder = joblib.load("label_encoder.pkl")

def predict_sentiment(review_text: str):
    if (not isinstance(review_text, str)) or (len(review_text.strip()) == 0):
        return {c: 0.0 for c in label_encoder.classes_}
    inputs = tokenizer(
        review_text,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=MAX_LENGTH
    )
    inputs = {k: v.to(inference_device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = inference_model(**inputs)
        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()[0]
    return {label_encoder.classes_[i]: float(probs[i]) for i in range(len(probs))}

print("Local test output:", predict_sentiment("The product quality is excellent and delivery was fast"))

# -------------------------
# 13. Gradio UI
# -------------------------
demo = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter a Flipkart product review..."),
    outputs=gr.Label(num_top_classes=3),
    title="Flipkart Product Review Sentiment Classifier",
    description="Predicts if a Flipkart review is Positive, Neutral, or Negative."
)

demo.launch(share=True)

Columns found: ['product_id', 'product_name', 'review_title', 'review_body', 'Score', 'sentiment', 'review_date', 'Unnamed: 7']
Value counts: sentiment
positive    393
neutral      43
negative     24
Name: count, dtype: int64
Balanced counts: sentiment
negative    24
neutral     24
positive    24
Name: count, dtype: int64
Classes: ['negative' 'neutral' 'positive']


  .apply(lambda x: x.sample(samples_per_class, random_state=42))


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class weights: tensor([1., 1., 1.])

Base Trainer compute_loss signature:
(self, model: torch.nn.modules.module.Module, inputs: dict[str, typing.Union[torch.Tensor, typing.Any]], return_outputs: bool = False, num_items_in_batch: Optional[torch.Tensor] = None)


  trainer = FocalTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.458902,0.4,0.28655
2,No log,0.389244,0.8,0.805387
3,No log,0.299646,0.733333,0.724868
4,No log,0.241686,0.8,0.805387
5,No log,0.203672,0.866667,0.865993
6,No log,0.171975,0.866667,0.865993
7,No log,0.153888,0.866667,0.865993
8,No log,0.136672,0.866667,0.865993
9,No log,0.124442,0.866667,0.865993
10,No log,0.117766,0.933333,0.93266


Eval results: {'eval_loss': 0.11078289896249771, 'eval_accuracy': 0.9333333333333333, 'eval_f1': 0.9326599326599326, 'eval_runtime': 3.5346, 'eval_samples_per_second': 4.244, 'eval_steps_per_second': 0.566, 'epoch': 12.0}
Model saved to fine-tuned-flipkart-model
Local test output: {'negative': 0.274962455034256, 'neutral': 0.2421865016222, 'positive': 0.4828510582447052}
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3280661549785e9d3b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


