In [2]:
import pandas as pd
from textblob import TextBlob

# Load the data into a Pandas dataframe
df = pd.read_csv("AmazonFineFoods.csv", encoding='latin-1')

# Define a function to classify the sentiment of the star rating
def classify_sentiment(score):
    if score >= 4:
        return 'positive'
    elif score == 3:
        return 'neutral'
    else:
        return 'negative'

# Add a new column to the dataframe with the sentiment classification
df['star_sentiment'] = df['Score'].apply(classify_sentiment)

# Define a function to classify the sentiment polarity of the review text
def classify_polarity(review_text):
    tb = TextBlob(review_text)
    polarity = tb.sentiment.polarity
    if polarity < -0.05:
        return 'negative'
    elif polarity > 0.05:
        return 'positive'
    else:
        return 'neutral'

# Add a new column to the dataframe with the sentiment polarity classification
df['text_sentiment'] = df['ReviewText'].apply(classify_polarity)

# Define a function to determine whether the star rating's sentiment agrees with the text sentiment
def sentiment_agreement(row):
    if row['star_sentiment'] == row['text_sentiment']:
        return True
    else:
        return False

# Add a new column to the dataframe with the sentiment agreement classification
df['sentiment_agreement'] = df.apply(sentiment_agreement, axis=1)

# Output a table summarizing the overall sentiment agreement of the reviews
total_count = len(df)
agree_count = len(df[df['sentiment_agreement'] == True])
disagree_count = len(df[df['sentiment_agreement'] == False])
print(f"Overall sentiment agreement: {agree_count} ({agree_count/total_count*100:.2f}%) agree, {disagree_count} ({disagree_count/total_count*100:.2f}%) disagree")

# Output a table summarizing the sentiment agreement for the reviews by star rating
for i in range(1,6):
    subset = df[df['Score'] == i]
    total_count = len(subset)
    agree_count = len(subset[subset['sentiment_agreement'] == True])
    disagree_count = len(subset[subset['sentiment_agreement'] == False])
    print(f"{i}-star reviews: {agree_count} ({agree_count/total_count*100:.2f}%) agree, {disagree_count} ({disagree_count/total_count*100:.2f}%) disagree")




Overall sentiment agreement: 831 (74.73%) agree, 281 (25.27%) disagree
1-star reviews: 38 (34.23%) agree, 73 (65.77%) disagree
2-star reviews: 7 (12.73%) agree, 48 (87.27%) disagree
3-star reviews: 15 (17.65%) agree, 70 (82.35%) disagree
4-star reviews: 125 (85.62%) agree, 21 (14.38%) disagree
5-star reviews: 646 (90.35%) agree, 69 (9.65%) disagree
