In [13]:
import pandas as pd
import datetime
import plotly.express as px
import plotly.graph_objs as go

In [6]:
# set period under observation
start_date = datetime.datetime(2022, 7, 1)
end_date = datetime.datetime(2024, 5, 31)

In [30]:
# read twitter sentiment data 
tweets_sentiment = pd.read_csv('../Data/tweets_sentiment.csv', index_col='Unnamed: 0',  lineterminator='\n')

# compute tweet sentiment score and label
tweets_sentiment['sentiment_score'] = tweets_sentiment['Positive'] - tweets_sentiment['Negative']

# read twitter topic data 
tweets_topic = pd.read_csv('../Data/tweets_topic.csv', index_col='Unnamed: 0')

# merge twitter sentiment and topic into one dataframe
sentiment_df = tweets_sentiment.merge(tweets_topic, how='inner', on='id', suffixes=('', '_copy'))
sentiment_df.drop(columns=[col for col in sentiment_df.columns if col.endswith('_copy')], inplace=True)

# filter for tweets within date range
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
sentiment_df = sentiment_df[(sentiment_df['date'] >= start_date) & (sentiment_df['date'] <= end_date)]
sentiment_df.head()

Unnamed: 0,id,date,lang,rawContent,user_id,username,displayname,rawDescription,Positive,Negative,Neutral,sentiment_score,topic
0,1576358723295281152,2022-10-01,en,Goals? 1) Energy security2) Energy affordabili...,1622604644,Patricia_Energy,Patricia Schouker,💡Energy & Security • Non-Resident Fellow @cosc...,0.072202,0.010789,0.917008,0.061413,2
1,1576354710415720448,2022-10-01,en,"#SP500 3,585.62 0% #DJIA 28,725.51 0%...",1558597273386291200,EconSnapped,🤖 EconSnapped,Your economic update. Hourly.\n\n💰 Asset - Pr...,0.050506,0.04205,0.907444,0.008455,2
2,1576353909349941248,2022-10-01,en,Having this made for our new home #Brent,275455521,MarkSJordan,Mark,Married to the blonde.\nComplacency is the enemy,0.325245,0.00922,0.665534,0.316025,2
3,1576352995675021312,2022-10-01,en,SCOTT RITTER: The Onus Is on Biden #OOTT,29962180,CarlosFGarciaG,Carlos Felipe García,Economía UCV. Magister CGU. Estudios de filoso...,0.041617,0.019267,0.939116,0.022349,1
4,1576352573623263233,2022-10-01,en,Local Victorian park sometimes looks pretty #S...,9921452,gn0me,Andrei Ianovskii,Views expressed here are my own and do not nec...,0.044931,0.034805,0.920264,0.010126,0


In [31]:
# Group tweets by date and calculate average sentiment
daily_sentiment = sentiment_df.loc[:, ['date', 'sentiment_score']]
daily_sentiment = daily_sentiment.groupby(['date']).mean()

daily_sentiment.columns = [f'{col}_0' for col in daily_sentiment.columns]
daily_sentiment.head()

Unnamed: 0_level_0,sentiment_score_0
date,Unnamed: 1_level_1
2022-07-01,-0.133128
2022-07-02,0.212321
2022-07-03,-0.217177
2022-07-04,-0.123233
2022-07-05,-0.139811


In [32]:
# Group by date and topic and calculate average sentiment
daily_topic_sentiment = sentiment_df.loc[:, ['date', 'topic', 'sentiment_score']]
daily_topic_sentiment['topic'] = daily_topic_sentiment['topic']+1
daily_topic_sentiment = daily_topic_sentiment.groupby(['date', 'topic']).mean()

# Unstack 'topic' to become columns
daily_topic_sentiment = daily_topic_sentiment.unstack(level='topic')

# Flatten the column MultiIndex resulting from unstack
daily_topic_sentiment.columns = daily_topic_sentiment.columns.map('{0[0]}_{0[1]}'.format) 

daily_topic_sentiment.head()

Unnamed: 0_level_0,sentiment_score_1,sentiment_score_2,sentiment_score_3,sentiment_score_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-01,-0.323286,0.019052,-0.196079,0.037853
2022-07-02,-0.070201,-0.004047,0.648673,0.839905
2022-07-03,-0.283336,-0.173141,-0.271328,-0.135003
2022-07-04,-0.12921,-0.082532,-0.274846,-0.060758
2022-07-05,-0.233597,-0.049497,0.061214,-0.010699


In [33]:
# merge daily sentiment by topic and overall into one dataframe
daily_sentiment = pd.concat([daily_sentiment, daily_topic_sentiment], axis=1)
daily_sentiment.head()

Unnamed: 0_level_0,sentiment_score_0,sentiment_score_1,sentiment_score_2,sentiment_score_3,sentiment_score_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-01,-0.133128,-0.323286,0.019052,-0.196079,0.037853
2022-07-02,0.212321,-0.070201,-0.004047,0.648673,0.839905
2022-07-03,-0.217177,-0.283336,-0.173141,-0.271328,-0.135003
2022-07-04,-0.123233,-0.12921,-0.082532,-0.274846,-0.060758
2022-07-05,-0.139811,-0.233597,-0.049497,0.061214,-0.010699


In [34]:
test_chart_df = pd.melt(daily_sentiment, ignore_index=False)
test_chart_df.reset_index(inplace=True)


In [35]:
test_chart_df.head()

Unnamed: 0,date,variable,value
0,2022-07-01,sentiment_score_0,-0.133128
1,2022-07-02,sentiment_score_0,0.212321
2,2022-07-03,sentiment_score_0,-0.217177
3,2022-07-04,sentiment_score_0,-0.123233
4,2022-07-05,sentiment_score_0,-0.139811


In [37]:
# df = px.data.gapminder().query("continent=='Oceania'")
fig = px.line(test_chart_df, x="date", y="value", color='variable')

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.show()

In [11]:
price_df = pd.read_csv('../Data/DCOILBRENTEU.csv')
price_df['DATE'] = pd.to_datetime(price_df['DATE'])
price_df = price_df[(price_df['DATE'] > start_date) & (price_df['DATE'] < end_date)]

price_df.columns=['date', 'price']
price_df['returns'] = price_df['price'].pct_change(1, fill_method=None)
price_df.head()

Unnamed: 0,date,price,returns
653,2022-07-04,121.8,
654,2022-07-05,110.49,-0.092857
655,2022-07-06,108.54,-0.017649
656,2022-07-07,113.4,0.044776
657,2022-07-08,113.95,0.00485


In [15]:
# Get price and returns
price_df = pd.read_csv('../Data/DCOILBRENTEU.csv')
price_df['DATE'] = pd.to_datetime(price_df['DATE'])
price_df = price_df[(price_df['DATE'] > start_date) & (price_df['DATE'] < end_date)]

price_df.columns=['date', 'price']
price_df['returns'] = price_df['price'].pct_change(1, fill_method=None)
price_df.head()

# Create the figure object
fig = go.Figure()

# Add price trace on the left y-axis
fig.add_trace(go.Scatter(x=price_df['date'], y=price_df['price'], 
                         mode='lines', name='Price',
                         line=dict(color='blue')))

# Add returns trace on the right y-axis
fig.add_trace(go.Scatter(x=price_df['date'], y=price_df['returns'], 
                         mode='lines', name='Returns',
                         line=dict(color='orange'), yaxis='y2'))

# Create a secondary y-axis for returns
fig.update_layout(
    title='Price and Returns Over Time',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Price', titlefont=dict(color='blue')),
    yaxis2=dict(title='Returns', titlefont=dict(color='orange'), 
                overlaying='y', side='right')
)

# Show the plot
fig.show()