In [1]:
import pandas as pd

import torch.nn as nn
from transformers import BertTokenizer, BertModel
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os, sys
project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)

from transfer_learning.bert_plus import *

In [3]:
dataset = load_dataset('imdb')
train_dataset = dataset['train']
test_dataset = dataset['test']


In [4]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
tokenize = lambda x: tokenizer(x["text"], padding="max_length", truncation=True, max_length=512)
import torch.nn as nn


In [5]:
train_tokenized = train_dataset.map(tokenize, batched=True)
test_tokenized = test_dataset.map(tokenize, batched=True)

In [6]:
train_tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [7]:
from torch.utils.data import DataLoader
batch_size = 64
train_loader = DataLoader(train_tokenized, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_tokenized, batch_size=batch_size)


In [8]:
for batch in train_loader:
    print(batch.keys())
    break

dict_keys(['label', 'input_ids', 'attention_mask'])


In [9]:
embedding_dim = 32
hidden_size = 64
output_size = 2
seq_len = 10
learning_rate = 1e-3
max_iter = 5000
eval_interval = 500

import torch
if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [10]:
# 1-batch overfit
batch = next(iter(train_loader))
model = FrozenBERTClassifier().to(device)
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

for step in range(100):
    model.train()
    logits = model(batch['input_ids'].to(device), batch['attention_mask'].to(device))
    labels = batch['label'].to(device)
    loss = criterion(logits, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(loss.item())


KeyboardInterrupt: 

In [14]:
model = FrozenBERTClassifier().to(device)
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()


model.train()
step = 0
for batch in train_loader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['label'].to(device)

    logits = model(input_ids, attention_mask)
    loss = criterion(logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if step % 100 == 0:
        print(f"Step {step} Loss: {loss.item():.4f}")
    step += 1

Step 0 Loss: 0.6896
Step 100 Loss: 0.3358
Step 200 Loss: 0.2901
Step 300 Loss: 0.3526


In [17]:
from sklearn.metrics import accuracy_score

@torch.no_grad()
def compute_accuracy(model, dataloader):
    model.eval()
    all_predictions = []
    all_labels = []

    for batch in dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        logits = model(input_ids, attention_mask)
        predictions = torch.argmax(logits, dim=-1)
        all_predictions.extend(predictions.cpu().tolist())
        all_labels.extend(labels.cpu().tolist())
    accuracy = accuracy_score(all_labels, all_predictions)
    return accuracy

compute_accuracy(model, test_loader)

KeyboardInterrupt: 