In [None]:
!pip install transformers torch

In [2]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset

#  data for sentiment analysis (20 observations)
dummy_data = [
    ("I love this product!", 1), 
    ("This product is great.", 1),
    ("This product is terrible.", 0),
    ("I hate this product.", 0),
    ("This product is okay.", 1),
    ("The quality is amazing.", 1),
    ("Not recommended.", 0),
    ("Excellent service!", 1),
    ("The worst experience ever.", 0),
    ("Highly disappointing.", 0),
    ("Good value for money.", 1),
    ("Could be better.", 0),
    ("Very satisfied.", 1),
    ("Totally worth it.", 1),
    ("Poor performance.", 0),
    ("Superb!", 1),
    ("Bad customer service.", 0),
    ("Extremely unhappy.", 0),
    ("Amazing!", 1),
    ("Not worth it.", 0)
]

# Define the BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize input text and add special tokens
input_texts = [text for text, _ in dummy_data]
input_ids = tokenizer(input_texts, padding=True, truncation=True, return_tensors='pt')['input_ids']

# Add dummy labels
labels = torch.tensor([label for _, label in dummy_data])

# Create DataLoader
dataset = TensorDataset(input_ids, labels)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Fine-tuning BERT
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for epoch in range(3):
    model.train()
    total_loss = 0
    for batch in dataloader:
        batch = tuple(t.to(device) for t in batch)
        inputs, labels = batch

        optimizer.zero_grad()

        outputs = model(inputs, labels=labels)
        loss = outputs[0]
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()

    print("Epoch {} Loss: {}".format(epoch+1, total_loss))

# Save the fine-tuned model
model.save_pretrained("fine_tuned_bert")

print("Fine-tuning completed and model saved.")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Epoch 1 Loss: 3.5283971428871155
Epoch 2 Loss: 3.0824933648109436
Epoch 3 Loss: 2.5538235902786255
Fine-tuning completed and model saved.


In [3]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset

# Dummy evaluation data for sentiment analysis (10 observations)
evaluation_data = [
    ("This product is amazing!", 1), 
    ("I'm satisfied with this purchase.", 1),
    ("Poor quality.", 0),
    ("Don't waste your money.", 0),
    ("Great experience overall.", 1),
    ("Could be better.", 0),
    ("Very happy with the service.", 1),
    ("Disappointing.", 0),
    ("Impressed with the results.", 1),
    ("Not recommended at all.", 0)
]

# Tokenize input text and add special tokens
evaluation_texts = [text for text, _ in evaluation_data]
evaluation_labels = torch.tensor([label for _, label in evaluation_data])

# Load fine-tuned model
model = BertForSequenceClassification.from_pretrained('fine_tuned_bert')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize input text and add special tokens
evaluation_input_ids = tokenizer(evaluation_texts, padding=True, truncation=True, return_tensors='pt')['input_ids']

# Create DataLoader
evaluation_dataset = TensorDataset(evaluation_input_ids, evaluation_labels)
evaluation_dataloader = DataLoader(evaluation_dataset, batch_size=1)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Evaluation metric: Accuracy
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs[0], 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Evaluate the model
accuracy = evaluate(model, evaluation_dataloader)
print("Accuracy:", accuracy)


Accuracy: 0.8
