In [1]:
# Importing all required Libraries
import torch
from datasets import load_dataset
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score
from datasets import Dataset, DatasetDict
from torch.nn import CrossEntropyLoss
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Loading the dataset from the "imbd"

dataset = load_dataset("imdb")

In [3]:
# Training and validating the dataset
train_data = dataset["train"].shuffle(seed=42)
positive_train = train_data.filter(lambda x: x["label"] == 1).select(range(500))
negative_train = train_data.filter(lambda x: x["label"] == 0).select(range(500))
balanced_train = Dataset.from_dict(
    {k: positive_train[k] + negative_train[k] for k in positive_train.features}
)


test_data = dataset["test"].shuffle(seed=42)
positive_test = test_data.filter(lambda x: x["label"] == 1).select(range(250))
negative_test = test_data.filter(lambda x: x["label"] == 0).select(range(250))
balanced_test = Dataset.from_dict(
    {k: positive_test[k] + negative_test[k] for k in positive_test.features}
)

Filter: 100%|██████████| 25000/25000 [00:00<00:00, 108694.97 examples/s]
Filter: 100%|██████████| 25000/25000 [00:00<00:00, 125849.10 examples/s]


In [5]:

dataset = DatasetDict({"train": balanced_train, "test": balanced_test})

# Converting the lable column to "labels"
dataset = dataset.rename_column("label", "labels")

# Checking the Training labels and validation labels
print("Training labels:", Counter(dataset["train"]["labels"]))
print("Validation labels:", Counter(dataset["test"]["labels"]))

Training labels: Counter({1: 500, 0: 500})
Validation labels: Counter({1: 250, 0: 250})


In [6]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "token_type_ids", "labels"])

Map: 100%|██████████| 1000/1000 [00:06<00:00, 142.97 examples/s]
Map: 100%|██████████| 500/500 [00:02<00:00, 188.14 examples/s]


In [7]:
# Loading the BERT model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [9]:
# Function to check the accuracy and F1 Score
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1).numpy()
    labels = labels.numpy() if isinstance(labels, torch.Tensor) else labels
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, zero_division=0)
    }

In [10]:
# Training all the arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    warmup_steps=100,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    no_cuda=True
)



In [12]:
# Function to Train with class weights
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        class_weights = torch.tensor([1.0, 1.0]).to(model.device)  # Adjust if imbalanced
        loss_fct = CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [14]:
# Setting the training and testing range of labels
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_datasets["test"].select(range(500)),
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = CustomTrainer(


In [15]:
# Training the model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.3746,0.35179,0.87,0.877127
2,0.3345,0.31416,0.898,0.901734
3,0.1204,0.349927,0.904,0.90625


TrainOutput(global_step=375, training_loss=0.3316861193180084, metrics={'train_runtime': 7801.3883, 'train_samples_per_second': 0.385, 'train_steps_per_second': 0.048, 'total_flos': 733734511194240.0, 'train_loss': 0.3316861193180084, 'epoch': 3.0})

In [16]:
# Saving the Model
model.save_pretrained("./sentiment-model")
tokenizer.save_pretrained("./sentiment-model")

('./sentiment-model\\tokenizer_config.json',
 './sentiment-model\\special_tokens_map.json',
 './sentiment-model\\vocab.txt',
 './sentiment-model\\added_tokens.json')

In [17]:
# Testing the Examples and checking the prediction

from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="./sentiment-model", tokenizer="./sentiment-model")

print(sentiment_pipeline("This movie was absolutely amazing!"))
print(sentiment_pipeline("The plot was dull and the acting was terrible."))

# Check prediction distribution
eval_results = trainer.predict(tokenized_datasets["test"].select(range(500)))
predictions = torch.argmax(torch.tensor(eval_results.predictions), dim=-1).numpy()
print("Prediction distribution:", Counter(predictions))

Device set to use cpu


[{'label': 'LABEL_1', 'score': 0.9899222254753113}]
[{'label': 'LABEL_0', 'score': 0.9911323189735413}]


Prediction distribution: Counter({np.int64(1): 262, np.int64(0): 238})


In [None]:
# Here in output the LABEL_1 tells the prediction of review is positive and Label_0 tells the prediction of review is negative
# Here the score represents the confidence of the review