In [None]:
import pandas as pd
import string
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("book_review_sample.csv")

print("Sample data:\n", data.head(), "\n")

In [None]:
data["review_text_clean"] = data["review_text"].apply(
    lambda x: ''.join([char for char in x if char not in string.punctuation]).lower()
)

print("Cleaned text sample:\n", data["review_text_clean"].head(), "\n")

In [None]:
vader = SentimentIntensityAnalyzer()

In [None]:
data["Vader_Sentiment_Score"] = data["review_text_clean"].apply(
    lambda review: vader.polarity_scores(review)["compound"]
)

In [None]:
bins = [-1, -0.1, 0.1, 1]
labels = ["Negative", "Neutral", "Positive"]
data["Vader_Sentiment_Label"] = pd.cut(
    data["Vader_Sentiment_Score"], bins=bins, labels=labels
)


In [None]:
print("VADER Sentiment Distribution:\n")
data["Vader_Sentiment_Label"].value_counts().plot.bar(color=["red", "gray", "green"])
plt.title("VADER Sentiment Distribution")
plt.xlabel("Sentiment Type")
plt.ylabel("Count")
plt.show()

In [None]:
transformer_pipeline = pipeline("sentiment-analysis")


In [None]:
results = transformer_pipeline(list(data["review_text_clean"]), batch_size=32)

In [None]:
data["Transformer_Sentiment_Labels"] = [r["label"] for r in results]

In [None]:
print("Transformer Sentiment Distribution:\n")
data["Transformer_Sentiment_Labels"].value_counts().plot.bar(color=["pink", "lightblue"])
plt.title("Transformer Sentiment Distribution")
plt.xlabel("Sentiment Type")
plt.ylabel("Count")
plt.show()

In [None]:
print("Final Data with Sentiment Columns:\n", data.head())
