In [37]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

df = pd.read_csv('../../data/merged_data_sentiment.csv')

# if a row has no Platform, it's from Twitter
df['Platfrom'] = df['Platfrom'].fillna('Twitter')
df['Platfrom'].value_counts()

youtube    44621
Reddit     19319
Twitter      360
Name: Platfrom, dtype: int64

In [38]:
# get mean sentiment for each comment
for i, row in df.iterrows():
  sent = row['Sentiment'].split(',')
  sent = [float(s) for s in sent]
  df.at[i, 'Avg_Sentiment'] = sum(sent) / len(sent)

In [39]:
# get month from date
print(df.columns)
df['Month'] = pd.to_datetime(df['Date']).dt.to_period('M').astype(str)

# get mean sentiment for each month
df = df.groupby(['Month', 'Platfrom']).mean().reset_index()


Index(['ID', 'Content', 'User', 'Date', 'Location', 'Reactions', 'N_Children',
       'Post Title', 'Platfrom', 'meta', 'lang', 'Subreddit', 'Unnamed: 0',
       'Sentiment', 'Avg_Sentiment'],
      dtype='object')



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [49]:
# show line chart of sentiment over time for each platform (by month)

# plot each platform on the same chart, one line for each
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df[df['Platfrom'] == 'youtube']['Month'],
                         y=df[df['Platfrom'] == 'youtube']['Avg_Sentiment'],
                         name='YouTube', line=dict(color='red')),
              secondary_y=True)
fig.add_trace(go.Scatter(x=df[df['Platfrom'] == 'Twitter']['Month'],
                         y=df[df['Platfrom'] == 'Twitter']['Avg_Sentiment'],
                         name='Twitter', line=dict(color='blue')),
              secondary_y=False)
fig.add_trace(go.Scatter(x=df[df['Platfrom'] == 'Reddit']['Month'],
                         y=df[df['Platfrom'] == 'Reddit']['Avg_Sentiment'],
                         name='Reddit', line=dict(color='green')),
              secondary_y=True)
fig.update_layout(title_text="Sentiment Over Time by Platform")
fig.show()
