In [3]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [5]:
netflix = pd.read_csv('subreddit_netflix_data.csv')
disney = pd.read_csv('subreddit_DisneyPlus_data.csv')

In [7]:
# Add platform labels
netflix['platform'] = 'Netflix'
disney['platform'] = 'Disney+'

In [9]:
# Create Dataframe of all platforms
df = pd.concat([netflix, disney])

# Get sentiment scores from 'body' column

In [34]:
# Get sentiment scores from 'body' column
analyzer = SentimentIntensityAnalyzer()
df['sentiment_body'] = df['body'].apply(lambda x: analyzer.polarity_scores(str(x))['compound'])
df['sentiment_body']

0      0.7959
1     -0.1734
2      0.1280
3      0.7178
4     -0.5318
        ...  
925    0.0000
926    0.0000
927   -0.6418
928    0.0000
929    0.5023
Name: sentiment_body, Length: 1465, dtype: float64

In [36]:
# Calculate if each sentiment is positive or not
df['is_positive_body'] = df['sentiment_body'] > 0
df['is_positive_body']

0       True
1      False
2       True
3       True
4      False
       ...  
925    False
926    False
927    False
928    False
929     True
Name: is_positive_body, Length: 1465, dtype: bool

In [None]:
# Create function that can indicate if the score is positive, neutral or negative
# VADER's default thresholds would look something like this:
def get_sentiment_category(score):
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

In [38]:
# Apply get_sentiment_category function on sentiment score
df['sentiment_category_body'] = df['sentiment_body'].apply(get_sentiment_category)
df['sentiment_category_body']

0      positive
1      negative
2      positive
3      positive
4      negative
         ...   
925     neutral
926     neutral
927    negative
928     neutral
929    positive
Name: sentiment_category_body, Length: 1465, dtype: object

In [21]:
# Result from Sentiment scores from 'body' column by Platform

# Average Sentiment Score
print("Average Sentiment Score by Platform:")
print(df.groupby('platform')['sentiment_body'].mean())

# Sentiment Score Distribution
print("\nSentiment Distribution by Platform:")
print(df.groupby(['platform', 'sentiment_category_body']).size().unstack(fill_value=0))

# Percent Positive Sentiment Score by Platform:
print("\nPercent Positive by Platform:")
print(df.groupby('platform')['is_positive_body'].mean() * 100)

# Percentages of the three groups
sentiment_pcts = df.groupby('platform')['sentiment_category_body'].value_counts(normalize=True).unstack() * 100
print("\nSentiment Percentages by Platform:")
print(sentiment_pcts.round(2))

Average Sentiment Score by Platform:
platform
Disney+    0.133633
Netflix    0.107340
Name: sentiment_body, dtype: float64

Sentiment Distribution by Platform:
sentiment_category_body  negative  neutral  positive
platform                                            
Disney+                       197      376       357
Netflix                       147      163       225

Percent Positive by Platform:
platform
Disney+    39.032258
Netflix    42.429907
Name: is_positive_body, dtype: float64

Sentiment Percentages by Platform:
sentiment_category_body  negative  neutral  positive
platform                                            
Disney+                     21.18    40.43     38.39
Netflix                     27.48    30.47     42.06


# Get sentiment scores from 'title' column

In [40]:
# Get sentiment scores from 'title' column
analyzer = SentimentIntensityAnalyzer()
df['sentiment_title'] = df['title'].apply(lambda x: analyzer.polarity_scores(str(x))['compound'])
df['sentiment_title']

0     -0.4019
1      0.0000
2     -0.2732
3     -0.2960
4     -0.4767
        ...  
925    0.0000
926    0.0000
927    0.0000
928    0.0000
929    0.2415
Name: sentiment_title, Length: 1465, dtype: float64

In [46]:
# Calculate if each sentiment is positive or not
# Should be expanded to handle pos/neu/neg instead
df['is_positive_title'] = df['sentiment_title'] > 0
df['is_positive_title']

0      False
1      False
2      False
3      False
4      False
       ...  
925    False
926    False
927    False
928    False
929     True
Name: is_positive_title, Length: 1465, dtype: bool

In [51]:
# Apply get_sentiment_category function on sentiment score
df['sentiment_category_title'] = df['sentiment_title'].apply(get_sentiment_category)
df['sentiment_category_title']

0      negative
1       neutral
2      negative
3      negative
4      negative
         ...   
925     neutral
926     neutral
927     neutral
928     neutral
929    positive
Name: sentiment_category_title, Length: 1465, dtype: object

In [53]:
# Result from Sentiment scores from 'title' column by Platform

# Average Sentiment Score
print("Average Sentiment Score by Platform:")
print(df.groupby('platform')['sentiment_title'].mean())

# Sentiment Score Distribution
print("\nSentiment Distribution by Platform:")
print(df.groupby(['platform', 'sentiment_category_title']).size().unstack(fill_value=0))

# Percent Positive Sentiment Score by Platform:
print("\nPercent Positive by Platform:")
print(df.groupby('platform')['is_positive_title'].mean() * 100)

# Percentages of the three groups
sentiment_pcts = df.groupby('platform')['sentiment_category_title'].value_counts(normalize=True).unstack() * 100
print("\nSentiment Percentages by Platform:")
print(sentiment_pcts.round(2))

Average Sentiment Score by Platform:
platform
Disney+    0.042807
Netflix   -0.016393
Name: sentiment_title, dtype: float64

Sentiment Distribution by Platform:
sentiment_category_title  negative  neutral  positive
platform                                             
Disney+                        156      537       237
Netflix                        133      278       124

Percent Positive by Platform:
platform
Disney+    25.698925
Netflix    23.364486
Name: is_positive_title, dtype: float64

Sentiment Percentages by Platform:
sentiment_category_title  negative  neutral  positive
platform                                             
Disney+                      16.77    57.74     25.48
Netflix                      24.86    51.96     23.18
