In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

# Load the pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3, return_dict=False)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Define the dataset
sentences = ['This is a positive sentence.', 'This is a negative sentence.', 'I am neutral.']
labels = [1, 0, 2]  # 1 for positive, 0 for negative, and 2 for neutral

# Tokenize the sentences and convert them to PyTorch tensors
input_ids = []
attention_masks = []
for sent in sentences:
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 64,           # Pad or truncate to this length
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attention masks
                        return_tensors = 'pt',      # Return PyTorch tensors
                   )
    
    # Add the encoded sentence to the list of input_ids
    input_ids.append(encoded_dict['input_ids'])
    
    # Add its attention mask (also encoded) to the list of attention_masks
    attention_masks.append(encoded_dict['attention_mask'])
    
# Convert the lists to tensors
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Combine the input_ids and attention_masks into a TensorDataset
dataset = TensorDataset(input_ids, attention_masks, labels)

# Split the dataset into a training set and a validation set
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Define a batch size and create the DataLoader for the training set
batch_size = 32
train_dataloader = DataLoader(
            train_dataset,  
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size
        )

# Create the DataLoader for the validation set
validation_dataloader = DataLoader(
            val_dataset, 
            sampler = SequentialSampler(val_dataset), # Evaluate batches sequentially
            batch_size = batch_size
        )

# Set the device to GPU, if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the optimizer and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr = 2e-5)

# Set the number of training epochs
epochs = 4

# Set the seed value for reproducibility
seed_val = 42
torch.manual_seed(seed_val)

# Define a function for calculating accuracy
def flat_accuracy(preds, labels):
    pred_flat = torch.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return torch.sum(pred_flat == labels_flat).item() / len(labels_flat)



total_train_loss = 0
total_train_accuracy = 0
# Train the model
for epoch in range(epochs):


    model.train()

    for step, batch in enumerate(train_dataloader):
        batch_input_ids = batch[0].to(device)
        batch_attention_masks = batch[1].to(device)
        batch_labels = batch[2].to(device)
        
        optimizer.zero_grad()

        loss, logits = model(batch_input_ids, attention_mask=batch_attention_masks, labels=batch_labels)

        total_train_loss += loss.item()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

        total_train_accuracy += flat_accuracy(logits, batch_labels)

# avg_train_loss = total_train_loss / len(train_dataloader)
# avg_train_accuracy = total_train_accuracy / len(train_dataloader)
        
        

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [2]:
avg_train_loss = total_train_loss / len(train_dataloader)
avg_train_accuracy = total_train_accuracy / len(train_dataloader)

print("Epoch:", epoch+1)
print("Training loss:", avg_train_loss)
print("Training accuracy:", avg_train_accuracy)

# Evaluate the model on the validation set
model.eval()

total_val_loss = 0
total_val_accuracy = 0

for batch in validation_dataloader:
    batch_input_ids = batch[0].to(device)
    batch_attention_masks = batch[1].to(device)
    batch_labels = batch[2].to(device)

    with torch.no_grad():
        loss, logits = model(batch_input_ids, attention_mask=batch_attention_masks, labels=batch_labels)

    total_val_loss += loss.item()
    total_val_accuracy += flat_accuracy(logits, batch_labels)

avg_val_loss = total_val_loss / len(validation_dataloader)
avg_val_accuracy = total_val_accuracy / len(validation_dataloader)

print("Validation loss:", avg_val_loss)
print("Validation accuracy:", avg_val_accuracy)

Epoch: 4
Training loss: 4.397352695465088
Training accuracy: 2.0
Validation loss: 1.3158397674560547
Validation accuracy: 0.0
