In [1]:
!pip install datasets
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting xxhash
  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess
  Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer
from torch.utils.data import DataLoader


def tokenize_function(examples):
    max_len = 0
    for sent in examples["text"]:
      max_len = max(max_len, len(sent))
    return tokenizer(examples["text"], padding=True, max_length=max_len, truncation=True)


def dataloader(name, num_train, num_test, batch_size):
    # Download dataset
    datasets = load_dataset(name, cache_dir="./dataset")
    # Tokenizing dataset
    tokenized_datasets = datasets.map(tokenize_function, batched=True)
    #tokenized_datasets = tokenized_datasets.map(map_to_zero_label)
    tokenized_datasets = tokenized_datasets.remove_columns(["text"])
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
    tokenized_datasets.set_format("torch")
    # print("train_datasets: \n", tokenized_datasets)
    train_datasets = tokenized_datasets["train"].select(range(num_train))
    test_datasets = tokenized_datasets["test"].select(range(num_test))

    # print("train_datasets: \n", train_datasets)
    # print(train_datasets["input_ids"].shape)

    # Here we shuffle our train dataloader
    train_dataloader = DataLoader(train_datasets, shuffle=True, batch_size=batch_size)
    test_dataloader = DataLoader(test_datasets, batch_size=batch_size)

    return train_dataloader, test_dataloader


In [4]:
# IMPORT function and class from our own files

from transformers import AutoModelForSequenceClassification
import torch
# from torch.distributed.pipeline.sync import Pipe
# from torch.distributed import rpc
import argparse

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def calcuate_accuracy(preds, labels):
  idx_max = torch.argmax(preds, dim=-1)
  n_correct = (idx_max==labels).sum().item()
  return n_correct


def train(model, train_loader, optimizer, scheduler):
    model.train()
    total_loss = 0
    num_correct = 0
    num_total = 0
    for batch in train_loader:
        labels = batch['labels'].to(device)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        outputs = model(input_ids, attention_mask).logits
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        num_correct += calcuate_accuracy(outputs, labels)
        total_loss += loss.item()
        num_total += labels.size(0)
        
    avg_train_loss = total_loss / num_total
    avg_train_acc = num_correct / num_total
    return avg_train_loss, avg_train_acc, outputs


def evaluate(model, test_loader):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    num_total = 0
    with torch.no_grad():
        for batch in test_loader:
            
            labels = batch['labels'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            output = model(input_ids, attention_mask).logits
            loss = criterion(output, labels)

            total_loss += loss.item()
            total_correct += calcuate_accuracy(output, labels)
            total_samples += labels.size(0)
            num_total += labels.size(0)

    average_loss = total_loss / num_total
    accuracy = total_correct / num_total

    return average_loss, accuracy


parser = argparse.ArgumentParser()
parser.add_argument('--pretrained_model_name', type=str, default='bert-base-uncased', help='Name of the pre-trained BERT model')
parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs')
parser.add_argument('--num_classes', type=int, default=4, help='Number of classes')
parser.add_argument('--lr', type=int, default=5e-5, help='Learning Rate')
#opts = parser.parse_args()
args = parser.parse_args(args=[])

tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_name)

train_loader, test_loader = dataloader(name="ag_news", num_train=1000, num_test=1000, batch_size=8)
model = AutoModelForSequenceClassification.from_pretrained(args.pretrained_model_name, num_labels=args.num_classes)
criterion = torch.nn.CrossEntropyLoss()

criterion = criterion.to(device)
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

for epoch in range(args.epochs):
    avg_train_loss, avg_train_acc, aaa = train(model, train_loader, optimizer, scheduler)
    avg_test_loss, avg_test_acc = evaluate(model, test_loader)
    print("Epoch: ", epoch)
    print(f'\tTrain Loss: {avg_train_loss:.5f} | Train Acc: {avg_train_acc:.2f}%')
    print(f'\tTest. Loss: {avg_test_loss:.5f} |  Test Acc: {avg_test_acc:.2f}%')

print(aaa)



  0%|          | 0/2 [00:00<?, ?it/s]



Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

RuntimeError: ignored