In [1]:
import pandas as pd
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import os
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm
2025-05-11 11:24:20.646523: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-11 11:24:20.658667: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746937460.673781   12362 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746937460.679052   12362 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746937460.695087   12362 computation_placer.cc:177] computation placer already r

In [2]:
# Download VADER lexicon
nltk.download("vader_lexicon")

# Load CSV
df = pd.read_csv("../data/test.csv")
texts = df["user_review"].astype(str).tolist()
labels = df["user_suggestion"].tolist()

### 1. DISTILBERT Evaluation
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)  # Use GPU if available

predictions = classifier(texts, truncation=True, padding=True)
label_map = {"NEGATIVE": 0, "POSITIVE": 1}
bert_preds = [label_map[p["label"]] for p in predictions]

print("Distilbert")
print(classification_report(labels, bert_preds, digits=4))

### 2. Finetuned DistilBERT

load_dotenv()
hf_token=os.getenv("HUGGINGFACE_TOKEN")

checkpoint = "GaaS-Team/DistilBERT-finetuned-GaaS"
ft_tokenizer = AutoTokenizer.from_pretrained(checkpoint, token=hf_token)
ft_model = AutoModelForSequenceClassification.from_pretrained(checkpoint, token=hf_token)
ft_classifier = pipeline("sentiment-analysis", model=ft_model, tokenizer=ft_tokenizer)  # Use GPU if available

ft_predictions = ft_classifier(texts, truncation=True, padding=True)
ft_preds = [label_map[p["label"]] for p in ft_predictions]

print("Finetuned Distilbert")
print(classification_report(labels, ft_preds, digits=4))

### 3. VADER Evaluation
vader = SentimentIntensityAnalyzer()

def vader_to_label(text):
    score = vader.polarity_scores(text)["compound"]
    return 1 if score >= 0 else 0  # 1 = positive, 0 = negative

vader_preds = [vader_to_label(text) for text in texts]

print("Vader")
print(classification_report(labels, vader_preds, digits=4))

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/huhyhuvinh/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Device set to use cuda:0


Distilbert
              precision    recall  f1-score   support

           0     0.7747    0.7703    0.7725      3348
           1     0.6149    0.6208    0.6179      1978

    accuracy                         0.7148      5326
   macro avg     0.6948    0.6956    0.6952      5326
weighted avg     0.7154    0.7148    0.7151      5326



Device set to use cuda:0


Finetuned Distilbert
              precision    recall  f1-score   support

           0     0.8944    0.9259    0.9099      3348
           1     0.8667    0.8150    0.8400      1978

    accuracy                         0.8847      5326
   macro avg     0.8805    0.8704    0.8750      5326
weighted avg     0.8841    0.8847    0.8839      5326

Vader
              precision    recall  f1-score   support

           0     0.9040    0.4050    0.5594      3348
           1     0.4794    0.9272    0.6320      1978

    accuracy                         0.5989      5326
   macro avg     0.6917    0.6661    0.5957      5326
weighted avg     0.7463    0.5989    0.5864      5326

