In [None]:
import pandas as pd
import transformers, re, os, torch
import numpy as np

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

print("torch version :", torch.__version__)
print("torch use cuda :", torch.cuda.is_available())
print("transformers version :", transformers.__version__)

os.environ["USE_TORCH"] = "1"

distilbert_model_name = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
finiteautomata_model_name = "finiteautomata/bertweet-base-sentiment-analysis"
model_name = finiteautomata_model_name

# Force model and tokenizer download
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

print("Model and tokenizer loaded successfully.")



In [None]:
data = pd.read_csv("book_reviews.csv")
data.head()

In [None]:
data.info()

In [None]:
data["reviewText"][0]

In [None]:
data['reviewText_clean'] = data.apply(lambda row: re.sub(r'[^\w\s]', '', row['reviewText']).lower(), axis=1)
data.head()

In [None]:
vader_sentiment = SentimentIntensityAnalyzer()
data['vader_sentiment_score'] = data['reviewText_clean'].apply(lambda review: vader_sentiment.polarity_scores(review)['compound'])
data.head()

In [None]:
bins = [-1, -0.1, 0.1, 1]
names = ['negative', 'neutral', 'positive']
data['vader_sentiment_label'] = pd.cut(data['vader_sentiment_score'], bins, labels=names)
data['vader_sentiment_label'].value_counts().plot.bar()

In [None]:
transformer_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
    framework="pt",
)

In [None]:
transformer_labels = []

for review in data['reviewText_clean'].values:
    sentiment_list = transformer_pipeline(review)
    sentiment_label = [sent['label'] for sent in sentiment_list]
    transformer_labels.append(sentiment_label)
    
data['transformer_sentiment_label'] = transformer_labels
data['transformer_sentiment_label'].value_counts().plot.bar()