<a href="https://colab.research.google.com/github/snithshibu/cl-genai-bertsentiment/blob/main/bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q torch torchvision torchaudio
!pip install -q transformers datasets accelerate scikit-learn wandb

In [2]:
import torch
import numpy as np
import pandas as pd

from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

import wandb
wandb.login()

Torch: 2.9.0+cu126
CUDA available: True
GPU: Tesla T4


[34m[1mwandb[0m: Currently logged in as: [33msnithshibu[0m ([33msnithshibu-mar-baselios-college-of-engineering-and-techn[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
dataset = load_dataset("syedkhalid076/Sentiment-Analysis")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 83989
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 10499
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 10499
    })
})

In [4]:
print(dataset["train"][0])
print(dataset["train"][1])
print(dataset["train"][2])

{'text': 'almost got in a giant car accident on the 101', 'label': 0}
{'text': 'like something wholly original ', 'label': 2}
{'text': 'b.s. one another ', 'label': 0}


In [5]:
model_name = "bert-base-uncased"
num_labels = 3  # negative, neutral, positive

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
)

id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {v: k for k, v in id2label.items()}
model.config.id2label = id2label
model.config.label2id = label2id

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128,
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["text"])
tokenized_dataset.set_format("torch")
tokenized_dataset["train"][0]

Map:   0%|          | 0/83989 [00:00<?, ? examples/s]

Map:   0%|          | 0/10499 [00:00<?, ? examples/s]

Map:   0%|          | 0/10499 [00:00<?, ? examples/s]

{'label': tensor(0),
 'input_ids': tensor([ 101, 2471, 2288, 1999, 1037, 5016, 2482, 4926, 2006, 1996, 7886,  102,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0]),
 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [7]:
train_subset_size = 40000
val_subset_size = 8000

small_train = tokenized_dataset["train"].select(range(train_subset_size))
small_val = tokenized_dataset["validation"].select(range(val_subset_size))

len(small_train), len(small_val)

(40000, 8000)

In [8]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    f1_macro = f1_score(labels, preds, average="macro")
    return {"accuracy": acc, "f1_macro": f1_macro}

In [9]:
batch_size = 16
logging_dir = "./logs"
output_dir = "./bert_sentiment_output"

training_args = TrainingArguments(
    output_dir=output_dir,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=2,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir=logging_dir,
    logging_steps=50,
    report_to=["wandb"],
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer

  trainer = Trainer(


<transformers.trainer.Trainer at 0x79b106e3c740>

In [11]:
train_result = trainer.train()
train_result

trainer.save_model("./best_bert_sentiment")
tokenizer.save_pretrained("./best_bert_sentiment")

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.3554,0.309181,0.87625,0.839421
2,0.206,0.349637,0.883875,0.844584


('./best_bert_sentiment/tokenizer_config.json',
 './best_bert_sentiment/special_tokens_map.json',
 './best_bert_sentiment/vocab.txt',
 './best_bert_sentiment/added_tokens.json',
 './best_bert_sentiment/tokenizer.json')

In [12]:
val_metrics = trainer.evaluate(eval_dataset=tokenized_dataset["validation"])
print("Validation metrics:", val_metrics)

test_metrics = trainer.evaluate(eval_dataset=tokenized_dataset["test"])
print("Test metrics:", test_metrics)

Validation metrics: {'eval_loss': 0.30910947918891907, 'eval_accuracy': 0.879226593008858, 'eval_f1_macro': 0.8421492918197452, 'eval_runtime': 75.2635, 'eval_samples_per_second': 139.497, 'eval_steps_per_second': 8.729, 'epoch': 2.0}
Test metrics: {'eval_loss': 0.3101738393306732, 'eval_accuracy': 0.882560243832746, 'eval_f1_macro': 0.8465617150292654, 'eval_runtime': 74.6547, 'eval_samples_per_second': 140.634, 'eval_steps_per_second': 8.801, 'epoch': 2.0}


In [13]:
predictions = trainer.predict(tokenized_dataset["test"])
pred_labels = np.argmax(predictions.predictions, axis=-1)
true_labels = predictions.label_ids

cm = confusion_matrix(true_labels, pred_labels)
label_names = ["negative", "neutral", "positive"]
cm_df = pd.DataFrame(cm, index=label_names, columns=label_names)
cm_df

Unnamed: 0,negative,neutral,positive
negative,3722,202,263
neutral,163,970,122
positive,265,218,4574


In [14]:
label_names = ["negative", "neutral", "positive"]

def predict_sentiment(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128,
    )
    if torch.cuda.is_available():
        model.to("cuda")
        inputs = {k: v.to("cuda") for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
    pred_id = int(np.argmax(probs))
    return label_names[pred_id], probs

print(predict_sentiment("This movie was absolutely amazing!"))
print(predict_sentiment("It was okay, nothing special."))
print(predict_sentiment("Terrible experience, I hated it."))

('positive', array([0.00180659, 0.00238805, 0.99580526], dtype=float32))
('neutral', array([0.37343866, 0.56381404, 0.06274729], dtype=float32))
('negative', array([0.98805445, 0.00563746, 0.00630815], dtype=float32))


In [15]:
import shutil

shutil.make_archive("best_bert_sentiment", "zip", "best_bert_sentiment")

'/content/best_bert_sentiment.zip'

In [16]:
from google.colab import files
files.download("best_bert_sentiment.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>