In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch
from torch.optim import AdamW
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = '/content/BenchmarkUddinSO-ConsoliatedAspectSentiment.xls'
data = pd.read_excel(file_path)

# Selecting and preparing the relevant columns
data = data[['codes', 'sent', 'ManualLabel']]
data['codes'] = data['codes'].apply(eval)
data = data.explode('codes')
data = data[data['codes'].str.contains('Performance')]  # Filter to only 'Bug' aspect

# Mapping labels to new non-negative numerical values
label_mapping = {'p': 2, 'n': 1, 'o': 0}  # Positive, Negative, Other
data['ManualLabel'] = data['ManualLabel'].map(label_mapping)

In [10]:
# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(data['sent'], data['ManualLabel'], test_size=0.3, random_state=42)

# Tokenizer and model setup
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # for three labels p, n, o

# Dataset class
class ReviewDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_attention_mask=True,
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# DataLoader setup
def create_data_loader(X, y, tokenizer, batch_size, max_len):
    ds = ReviewDataset(
        texts=X.to_numpy(),
        labels=y.to_numpy(),
        tokenizer=tokenizer,
        max_len=max_len
    )
    return DataLoader(
        ds,
        batch_size=batch_size,
        num_workers=0  # Set workers to 0 to avoid multiprocessing issues in certain environments
    )

batch_size = 16
train_data_loader = create_data_loader(X_train, y_train, tokenizer, batch_size, max_len=128)
test_data_loader = create_data_loader(X_test, y_test, tokenizer, batch_size, max_len=128)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training Loop
def train_epoch(model, data_loader, optimizer, device, n_examples):
    model = model.train()
    losses = []
    correct_predictions = 0

    for d in data_loader:
        input_ids = d["input_ids"].to(device)
        attention_mask = d["attention_mask"].to(device)
        labels = d["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        preds = torch.argmax(outputs.logits, dim=1)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    return correct_predictions.double() / n_examples, sum(losses) / len(losses)

# Train the model
for epoch in range(3):  # Number of epochs
    print(f'Epoch {epoch + 1}')
    train_acc, train_loss = train_epoch(
        model,
        train_data_loader,
        optimizer,
        device,
        len(X_train)
    )
    print(f'Train loss {train_loss} accuracy {train_acc}')

Epoch 1
Train loss 1.1335205286741257 accuracy 0.2757201646090535
Epoch 2
Train loss 1.0727288611233234 accuracy 0.37448559670781895
Epoch 3
Train loss 1.0426955707371235 accuracy 0.411522633744856


In [12]:
import time
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

# Set the model to evaluation mode
model.eval()
predictions, true_labels = [], []
total_inference_time = 0
total_samples = 0

# Ensure that 'no_grad' is used to prevent updating the model during inference
with torch.no_grad():
    for i, batch in enumerate(test_data_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        print(f"Processing batch {i+1}/{len(test_data_loader)}...")  # Log current batch number

        start_time = time.time()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        inference_time = time.time() - start_time

        total_inference_time += inference_time
        total_samples += input_ids.size(0)

        logits = outputs.logits
        preds = logits.argmax(dim=1).cpu().numpy()
        labels = batch['labels'].cpu().numpy()

        predictions.extend(preds)
        true_labels.extend(labels)

        if i == 3:  # for instance, break after 3 batches
            break

# Compute metrics
overall_accuracy = accuracy_score(true_labels, predictions)
overall_recall = recall_score(true_labels, predictions, average='macro')
overall_precision = precision_score(true_labels, predictions, average='macro')
overall_f1_score = f1_score(true_labels, predictions, average='macro')

print(f'Overall Accuracy: {overall_accuracy:.4f}')
print(f'Overall Recall: {overall_recall:.4f}')
print(f'Overall Precision: {overall_precision:.4f}')
print(f'Overall F1 Score: {overall_f1_score:.4f}')

# Calculate total and average inference times
total_time_seconds = total_inference_time
sample_latency_seconds = total_inference_time / total_samples
print(f'Total Time (seconds): {total_time_seconds:.6f} seconds')
print(f'Samples Processed: {total_samples}')
print(f'Latency Per Sample (seconds): {sample_latency_seconds:.6f} seconds')

Processing batch 1/7...
Processing batch 2/7...
Processing batch 3/7...
Processing batch 4/7...
Overall Accuracy: 0.4219
Overall Recall: 0.3531
Overall Precision: 0.2548
Overall F1 Score: 0.2596
Total Time (seconds): 26.362602 seconds
Samples Processed: 64
Latency Per Sample (seconds): 0.411916 seconds


  _warn_prf(average, modifier, msg_start, len(result))
