In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix
import torch
import time
import numpy as np
import os
from torch.utils.data import DataLoader

start_time = time.time()

# Load dataset
data = pd.read_csv('datasetofsenti.csv').drop(columns=["Unnamed: 0"], errors="ignore")

# Split dataset
train_texts, test_texts, train_labels, test_labels = train_test_split(
    data["text"], data["label"], test_size=0.375, random_state=42
)

# Tokenizer initialization
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

# Prepare datasets
train_dataset = Dataset.from_dict({"text": train_texts, "label": train_labels})
test_dataset = Dataset.from_dict({"text": test_texts, "label": test_labels})
train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
test_dataset = test_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
train_dataset.set_format("torch")
test_dataset.set_format("torch")

# DataLoader for efficient data loading
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Model and device setup
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=len(data["label"].unique()))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training arguments and settings
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    save_total_limit=1,
    fp16=True,
    gradient_accumulation_steps=2,  # Gradient accumulation
    report_to=[]
)

def compute_metrics(pred):
    logits, labels = pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Training
train_start_time = time.time()
trainer.train()
train_end_time = time.time()

# Evaluation
eval_start_time = time.time()
results = trainer.evaluate()
eval_end_time = time.time()

predictions = trainer.predict(test_dataset)
logits = predictions.predictions
true_labels = test_labels.to_numpy()
predicted_probs = torch.nn.functional.softmax(torch.tensor(logits), dim=-1).numpy()

if predicted_probs.shape[1] == 2:
    auc_roc = roc_auc_score(true_labels, predicted_probs[:, 1])
else:
    true_labels_one_hot = np.eye(predicted_probs.shape[1])[true_labels]
    auc_roc = roc_auc_score(true_labels_one_hot, predicted_probs, average="weighted", multi_class="ovr")

conf_matrix = confusion_matrix(true_labels, np.argmax(predicted_probs, axis=1))

metrics = {
    "Training Time (s)": train_end_time - train_start_time,
    "Evaluation Time (s)": eval_end_time - eval_start_time,
    "Accuracy": f"{results['eval_accuracy']:.2%}",
    "F1 Score": f"{results['eval_f1']:.4f}",
    "Precision": f"{results['eval_precision']:.4f}",
    "Recall": f"{results['eval_recall']:.4f}",
    "AUC-ROC": f"{auc_roc:.4f}",
    "Confusion Matrix": conf_matrix,
    "Model Size (MB)": os.path.getsize("./results") / (1024 * 1024),
    "Device": device,
    "Total Execution Time (s)": time.time() - start_time
}

for key, value in metrics.items():
    if key == "Confusion Matrix":
        print(f"{key}:\n{value}")
    else:
        print(f"{key}: {value}")

Map:   0%|          | 0/260505 [00:00<?, ? examples/s]

Map:   0%|          | 0/156304 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1731,0.101104,0.941153,0.9402,0.946144,0.941153
2,0.1634,0.090646,0.941863,0.942491,0.948851,0.941863


Training Time (s): 2079.88729429245
Evaluation Time (s): 96.07790231704712
Accuracy: 94.19%
F1 Score: 0.9425
Precision: 0.9489
Recall: 0.9419
AUC-ROC: 0.9983
Confusion Matrix:
[[43364    33     0  1077   820    46]
 [   17 48620  3837    41    17   270]
 [    5     7 12808     1     0     0]
 [   49    45     1 21460    50     0]
 [   29    21     0   923 17078     0]
 [    2   223     0     0  1573  3887]]
Model Size (MB): 0.0
Device: cuda
Total Execution Time (s): 2331.8662889003754
