In [11]:
import pandas as pd
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from transformers import pipeline
import torch
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

In [12]:
# Download VADER lexicon
nltk.download("vader_lexicon")

# Load CSV
df = pd.read_csv("/kaggle/input/test-csv/test.csv")
texts = df["user_review"].astype(str).tolist()
labels = df["user_suggestion"].tolist()

### 1. DISTILBERT Evaluation
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0)  # Use GPU if available

predictions = classifier(texts, truncation=True, padding=True)
label_map = {"NEGATIVE": 0, "POSITIVE": 1}
bert_preds = [label_map[p["label"]] for p in predictions]

print("Distilbert")
print(classification_report(labels, bert_preds, digits=4))

### 2. VADER Evaluation
vader = SentimentIntensityAnalyzer()

def vader_to_label(text):
    score = vader.polarity_scores(text)["compound"]
    return 1 if score >= 0 else 0  # 1 = positive, 0 = negative

vader_preds = [vader_to_label(text) for text in texts]

print("Vader")
print(classification_report(labels, vader_preds, digits=4))

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Device set to use cuda:0


Distilbert
              precision    recall  f1-score   support

           0     0.5676    0.9253    0.7036      1084
           1     0.9268    0.5729    0.7081      1789

    accuracy                         0.7059      2873
   macro avg     0.7472    0.7491    0.7059      2873
weighted avg     0.7913    0.7059    0.7064      2873

Vader
              precision    recall  f1-score   support

           0     0.7650    0.5406    0.6335      1084
           1     0.7636    0.8994    0.8260      1789

    accuracy                         0.7640      2873
   macro avg     0.7643    0.7200    0.7297      2873
weighted avg     0.7642    0.7640    0.7534      2873

