In [None]:
!pip install -U "transformers[torch]" datasets evaluate accelerate scikit-learn

In [None]:
import os
import numpy as np
from datasets import load_dataset
import evaluate

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
    set_seed,
)

def main():
    set_seed(42)

    # Small, fast BERT variant. You can swap this to:
    # - "bert-base-uncased" (bigger)
    # - "prajjwal1/bert-tiny" (tiny, very fast)
    # - "distilbert-base-uncased" (fast & decent)
    model_name = "prajjwal1/bert-mini"

    # Dataset: GLUE SST-2 (binary sentiment)
    dataset = load_dataset("glue", "sst2")

    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

    def tokenize_fn(batch):
        return tokenizer(batch["sentence"], truncation=True, max_length=128)

    tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=["sentence", "idx"])
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    metric = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        preds = np.argmax(logits, axis=-1)
        return metric.compute(predictions=preds, references=labels)

    # Keep it small + fast for PoC
    args = TrainingArguments(
        output_dir="bert_sst2_poc",
        evaluation_strategy="steps",
        eval_steps=200,
        save_strategy="steps",
        save_steps=200,
        logging_steps=50,
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=32,
        num_train_epochs=1,              # PoC speed
        weight_decay=0.01,
        warmup_ratio=0.06,
        fp16=True if os.environ.get("CUDA_VISIBLE_DEVICES") is not None else False,
        report_to="none",                # disable wandb etc
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        save_total_limit=2,
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tokenized["train"],
        eval_dataset=tokenized["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    results = trainer.evaluate()
    print("Eval:", results)

    # Save final artifacts
    trainer.save_model("bert_sst2_poc/final_model")
    tokenizer.save_pretrained("bert_sst2_poc/final_model")

    # Quick inference demo
    demo_texts = [
        "This movie was surprisingly good!",
        "I regret watching this, it was awful.",
    ]
    enc = tokenizer(demo_texts, return_tensors="pt", padding=True, truncation=True)
    with trainer.accelerator.autocast():
        out = model(**enc)
    probs = out.logits.softmax(dim=-1).detach().cpu().numpy()
    for t, p in zip(demo_texts, probs):
        print(f"\nText: {t}\nProb(neg, pos): {p}")

if __name__ == "__main__":
    main()
