In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [None]:
# 1. VADER Sentiment Analysis & Categorization
sia = SentimentIntensityAnalyzer()

def categorize_netflix_sentiment(score):
    if score >= 0.05:
        return "positive"
    elif score <= -0.05:
        return "negative"
    else:
        return "neutral"


In [None]:
# 2. Apply VADER to each review
df["vader_score"] = df["clean_review"].apply(lambda x: sia.polarity_scores(str(x))["compound"])
df["sentiment"] = df["vader_score"].apply(categorize_netflix_sentiment)

In [None]:
# 3. Sentiment from star ratings
# -----------------------------
def sentiment_from_rating(rating):
    if rating >= 4:   # 4 or 5 stars
        return "positive"
    elif rating == 3: # middle rating
        return "neutral"
    else:             # 1 or 2 stars
        return "negative"

In [None]:
# 4. Apply to dataset
df["sentiment_rating"] = df["rating"].apply(sentiment_from_rating)


In [None]:
# 5. Combined sentiment function
def combined_sentiment(vader, rating):
    if vader == rating:
        return vader  # Both agree
    elif vader == "neutral":
        return rating  # Trust rating if VADER is neutral
    elif rating == "neutral":
        return vader  # Trust VADER if rating is neutral
    else:
        # If one says positive and the other says negative → neutral
        return "neutral"


In [None]:
# 6. Apply to dataset
df["sentiment_combined"] = df.apply(lambda x: combined_sentiment(x["sentiment"], x["sentiment_rating"]), axis=1)

In [None]:
# 7. Compare VADER vs Rating
# -----------------------------
comparison = (df["sentiment"] == df["sentiment_rating"]).mean()
print(f"✅ VADER matches rating-based sentiment {comparison*100:.2f}% of the time.")

In [None]:
# 8. Comapre Sentiment Combined vs Rating
comparison = (df["sentiment_combined"] == df["sentiment_rating"]).mean()
print(f"✅ VADER matches rating-based sentiment {comparison*100:.2f}% of the time.")

In [None]:
# 9. Summary statistics
print("\n📊 Sentiment Summary")
print(df['sentiment'].value_counts())

In [None]:
# 10. Summary statistics for rating-based sentiment
print("\n📊 Rating-based Sentiment Summary")
print(df['sentiment_rating'].value_counts())

In [None]:
# 11. Summary statistics for combined sentiment
print("\n📊 Combined Sentiment Summary")
print(df['sentiment_combined'].value_counts())

In [None]:
# 12. Save labeled dataset
df.to_csv(r"C:\Users\user\Documents\netflix_reviews_cleaned.csv", index=False)