In [None]:
import os
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
import wandb

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
os.environ["WANDB_DISABLED"] = "false"
wandb.init(project="bert-sentiment-analysis")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Load dataset
train_df = pd.read_csv('IMDB Dataset.csv')
train_df['sentiment'] = train_df['sentiment'].apply(lambda x: 1 if x == 'positive' else 0)

In [None]:
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_df['review'].tolist(),
    train_df['sentiment'].tolist(),
    test_size=0.2,
    random_state=42
)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



In [None]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=512)

In [None]:
train_data = Dataset.from_dict({'text': train_texts, 'label': train_labels})
val_data = Dataset.from_dict({'text': val_texts, 'label': val_labels})

In [12]:
train_data = train_data.map(tokenize, batched=True, batch_size=len(train_data))
val_data = val_data.map(tokenize, batched=True, batch_size=len(val_data))

Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [13]:
training_args = TrainingArguments(
    output_dir='./results',  # Directory to save model checkpoints
    evaluation_strategy="epoch",  # Evaluate after every epoch
    save_strategy="epoch",  # Save checkpoints at the end of each epoch
    learning_rate=2e-5,  # Reduced learning rate for BERT fine-tuning
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,  # Adjust epochs to prevent overfitting
    weight_decay=1e-4,
    logging_dir='./logs',  # Directory for logs
    logging_steps=10,
    save_total_limit=1,  # Only keep the latest checkpoint
    load_best_model_at_end=True,  # Restore best model based on validation
    report_to="wandb"  # Enable wandb logging
)



In [15]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [16]:
for param in model.bert.encoder.layer[-2:].parameters():
    param.requires_grad = True

In [17]:
for param in model.bert.encoder.layer[:-2].parameters():
    param.requires_grad = False

In [18]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
)

In [19]:
trainer.train()



Epoch,Training Loss,Validation Loss
1,0.2721,0.227991
2,0.172,0.200357
3,0.1225,0.213096


TrainOutput(global_step=7500, training_loss=0.19578389985958736, metrics={'train_runtime': 10094.4319, 'train_samples_per_second': 11.888, 'train_steps_per_second': 0.743, 'total_flos': 3.15733266432e+16, 'train_loss': 0.19578389985958736, 'epoch': 3.0})

In [24]:
for key, value in results.items():
    print(f"{key}: {value:.4f}")

eval_loss: 0.2004
eval_runtime: 313.4228
eval_samples_per_second: 31.9060
eval_steps_per_second: 1.9940
epoch: 3.0000


In [25]:
wandb.finish()

0,1
eval/loss,█▁▄▁
eval/runtime,▄▁▃█
eval/samples_per_second,▅█▆▁
eval/steps_per_second,▅█▆▁
train/epoch,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
train/grad_norm,▂▂▄▃▄▃█▃▄▃▃▁▂▃▃▁▃▃▃▂▅▅▃▄▂▃▄▁▃▃▃▃▃▃▄▃▄▁▄▁
train/learning_rate,████▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁
train/loss,▇█▆▇█▅▅▅▇▅▅▇▆▅▄▅▃█▄▅▄▅▅▆▄▄▅▃▁▅▆▁▄▃▅▅▅█▇▅

0,1
eval/loss,0.20036
eval/runtime,313.4228
eval/samples_per_second,31.906
eval/steps_per_second,1.994
total_flos,3.15733266432e+16
train/epoch,3.0
train/global_step,7500.0
train/grad_norm,3.99228
train/learning_rate,0.0
train/loss,0.1225


In [26]:
# I will use additional metrics
from sklearn.metrics import precision_recall_fscore_support


In [27]:
def compute_metrics(p):
    predictions, labels = p
    preds = predictions.argmax(axis=-1)  # Convert logits to predicted class

    # Calculate precision, recall, and F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')

    # Calculate accuracy
    accuracy = (preds == labels).mean()

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }


In [28]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    compute_metrics=compute_metrics  # Pass the custom metric function
)


In [29]:
# Evaluate the model
results = trainer.evaluate()

# Print the evaluation results
print(f"Validation Accuracy: {results['eval_accuracy'] * 100:.2f}%")
print(f"Validation Precision: {results['eval_precision'] * 100:.2f}%")
print(f"Validation Recall: {results['eval_recall'] * 100:.2f}%")
print(f"Validation F1 Score: {results['eval_f1'] * 100:.2f}%")

[34m[1mwandb[0m: Currently logged in as: [33mkaranlvm123[0m ([33mkaranlvm123-ut-arlington-uta-the-university-of-texas-at-[0m). Use [1m`wandb login --relogin`[0m to force relogin


Validation Accuracy: 92.99%
Validation Precision: 91.42%
Validation Recall: 95.00%
Validation F1 Score: 93.18%
