In [29]:
import torch
import numpy as np
from torch.utils.data import DataLoader, RandomSampler, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from datasets import load_dataset
from tqdm.notebook import tqdm

# Function to calculate accuracy
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

# Load a pre-trained model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Load and preprocess the MRPC dataset
dataset = load_dataset('glue', 'mrpc')
texts = [(tokenizer(example['sentence1'], example['sentence2'], truncation=True, padding='max_length', max_length=128), example['label']) for example in dataset['train']]
input_ids = torch.tensor([t[0]['input_ids'] for t in texts])
attention_masks = torch.tensor([t[0]['attention_mask'] for t in texts])
labels = torch.tensor([t[1] for t in texts])

# Split the dataset into training and validation sets
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, random_state=42, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels, random_state=42, test_size=0.1)

# Convert all of our data into torch tensors, the required datatype for our model
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)
train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)
train_masks = torch.tensor(train_masks)
validation_masks = torch.tensor(validation_masks)

# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=32)

# Evaluation before fine-tuning
model.eval()
eval_dataloader = DataLoader(TensorDataset(validation_inputs, validation_masks, validation_labels), batch_size=32)
total_eval_accuracy = 0
for batch in eval_dataloader:
    b_input_ids, b_input_mask, b_labels = batch
    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    logits = outputs.logits
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    total_eval_accuracy += flat_accuracy(logits, label_ids)
pre_fine_tune_accuracy = total_eval_accuracy / len(eval_dataloader)
print(f'Accuracy before fine-tuning: {pre_fine_tune_accuracy:.4f}')

# Fine-tune the model
model.train()
optimizer = AdamW(model.parameters(), lr=0.0001, eps=1e-8)
epochs = 3  # Can be adjusted
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
for epoch in tqdm(range(epochs), desc='Epoch'):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        b_input_ids, b_input_mask, b_labels = batch
        model.zero_grad()
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()

# Evaluation after fine-tuning
model.eval()
total_eval_accuracy = 0
for batch in eval_dataloader:
    b_input_ids, b_input_mask, b_labels = batch
    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    logits = outputs.logits
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    total_eval_accuracy += flat_accuracy(logits, label_ids)
post_fine_tune_accuracy = total_eval_accuracy / len(eval_dataloader)
print(f'Accuracy after fine-tuning: {post_fine_tune_accuracy:.4f}')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_inputs = torch.tensor(train_inputs)
  validation_inputs = torch.tensor(validation_inputs)
  train_labels = torch.tensor(train_labels)
  validation_labels = torch.tensor(validation_labels)
  train_masks = torch.tensor(train_masks)
  validation_masks = torch.tensor(validation_masks)


Accuracy before fine-tuning: 0.6493




Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

KeyboardInterrupt: 