In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score

from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_data(content_list):
    input_ids = []
    attention_masks = []
    for content in content_list:
        content_str = ' '.join(content)  # Join the tokenized content
        encoded = tokenizer(content_str, truncation=True, padding='max_length', max_length=512)
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
    return torch.tensor(input_ids), torch.tensor(attention_masks)

train_data = read_train_data()
train_inputs, train_masks = tokenize_data(train_data['content'].values)
train_labels = torch.tensor(train_data['label'].values)



In [3]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_index, val_index) in enumerate(skf.split(train_inputs, train_labels)):
    print(f"Fold {fold + 1}")

    # Split data into training and validation sets
    train_inputs_fold = train_inputs[train_index].to(device)
    train_masks_fold = train_masks[train_index].to(device)
    train_labels_fold = train_labels[train_index].to(device)

    val_inputs_fold = train_inputs[val_index].to(device)
    val_masks_fold = train_masks[val_index].to(device)
    val_labels_fold = train_labels[val_index].to(device)

    # Create Data Loaders
    train_data_fold = TensorDataset(train_inputs_fold, train_masks_fold, train_labels_fold)
    train_loader_fold = DataLoader(train_data_fold, batch_size=32, shuffle=True)

    val_data_fold = TensorDataset(val_inputs_fold, val_masks_fold, val_labels_fold)
    val_loader_fold = DataLoader(val_data_fold, batch_size=32, shuffle=False)

    # Define the Model
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs")
        model = nn.DataParallel(model)
    model.to(device)


    # Train the Model
    optimizer = optim.AdamW(model.parameters(), lr=2e-5)
    loss_function = nn.CrossEntropyLoss()

    for epoch in range(10):
        model.train()
        for batch in train_loader_fold:
            inputs, masks, labels = batch
            inputs, masks, labels = inputs.to(device), masks.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, attention_mask=masks, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
        
        print(f'Epoch: {epoch + 1}, Loss: {loss}')

    # Evaluate the Model on Validation Set
    model.eval()
    total_correct = 0
    for batch in val_loader_fold:
        inputs, masks, labels = batch
        inputs, masks, labels = inputs.to(device), masks.to(device), labels.to(device)
        with torch.no_grad():
            outputs = model(inputs, attention_mask=masks)
            predictions = torch.argmax(outputs.logits, dim=1)
            total_correct += (predictions == labels).sum().item()

    accuracy = total_correct / len(val_data_fold)
    print(f'Accuracy: {accuracy}')

    # Save the model, log results, etc.


Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using 2 GPUs


OutOfMemoryError: Caught OutOfMemoryError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 1562, in forward
    outputs = self.bert(
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 1022, in forward
    encoder_outputs = self.encoder(
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 612, in forward
    layer_outputs = layer_module(
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward
    self_attention_outputs = self.attention(
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward
    self_outputs = self.self(
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/suzhao/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py", line 325, in forward
    attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 10.75 GiB total capacity; 7.97 GiB already allocated; 58.75 MiB free; 8.02 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
