In [1]:
!pip install torch transformers datasets tqdm --quiet

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cu

In [3]:
import torch
from torch.utils.data import DataLoader
from transformers import BertForSequenceClassification, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from datasets import load_dataset
from tqdm import tqdm
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

# Load IMDb dataset
dataset = load_dataset("imdb")
train_dataset = dataset["train"]
test_dataset = dataset["test"]

# Initialize tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Tokenize and prepare the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_test = test_dataset.map(tokenize_function, batched=True, remove_columns=test_dataset.column_names)

# Add labels to the tokenized datasets
tokenized_train = tokenized_train.add_column("labels", train_dataset["label"])
tokenized_test = tokenized_test.add_column("labels", test_dataset["label"])

# Set format for PyTorch
tokenized_train.set_format("torch")
tokenized_test.set_format("torch")

# Create DataLoaders
train_dataloader = DataLoader(tokenized_train, shuffle=True, batch_size=8)
eval_dataloader = DataLoader(tokenized_test, batch_size=8)

# Set up optimizer and learning rate scheduler
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
num_warmup_steps = int(0.1 * num_training_steps)  # 10% warmup

#comment out below scheduler to see the difference and also the scheduler.step() in the training loop
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def compute_metrics(pred_labels, true_labels):
    pred_labels = pred_labels.cpu().numpy()
    true_labels = true_labels.cpu().numpy()
    accuracy = np.mean(pred_labels == true_labels)
    return accuracy

for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    train_accuracy = 0
    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_train_loss += loss.item()

        pred_labels = outputs.logits.argmax(dim=-1)
        train_accuracy += compute_metrics(pred_labels, batch["labels"])

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping

        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_accuracy = train_accuracy / len(train_dataloader)
    print(f"Average training loss: {avg_train_loss:.4f}")
    print(f"Average training accuracy: {avg_train_accuracy:.4f}")

    # Evaluation
    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    with torch.no_grad():
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)

            loss = outputs.loss
            logits = outputs.logits

            total_eval_loss += loss.item()
            pred_labels = logits.argmax(dim=-1)
            total_eval_accuracy += compute_metrics(pred_labels, batch["labels"])

    avg_eval_accuracy = total_eval_accuracy / len(eval_dataloader)
    avg_eval_loss = total_eval_loss / len(eval_dataloader)

    print(f"Epoch {epoch + 1}: Eval Accuracy: {avg_eval_accuracy:.4f}, Eval Loss: {avg_eval_loss:.4f}")

print("Training completed!")

# Save the model
torch.save(model.state_dict(), 'bert_sentiment_model.pth')
print("Model saved!")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Epoch 1: 100%|██████████| 3125/3125 [41:40<00:00,  1.25it/s]


Average training loss: 0.3301
Average training accuracy: 0.8823


Evaluating: 100%|██████████| 3125/3125 [12:50<00:00,  4.06it/s]


Epoch 1: Eval Accuracy: 0.9116, Eval Loss: 0.3276


Epoch 2: 100%|██████████| 3125/3125 [41:36<00:00,  1.25it/s]


Average training loss: 0.1792
Average training accuracy: 0.9556


Evaluating: 100%|██████████| 3125/3125 [12:48<00:00,  4.07it/s]


Epoch 2: Eval Accuracy: 0.9393, Eval Loss: 0.2067


Epoch 3: 100%|██████████| 3125/3125 [41:37<00:00,  1.25it/s]


Average training loss: 0.0812
Average training accuracy: 0.9834


Evaluating: 100%|██████████| 3125/3125 [12:48<00:00,  4.07it/s]


Epoch 3: Eval Accuracy: 0.9398, Eval Loss: 0.3190
Training completed!
Model saved!
