In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import json
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.express as px


***
# Loading Datasets

In [22]:
# BITCOIN DATA
today = datetime.now().date()

# Fetch historical data
btc = yf.download('BTC-USD', start='2023-08-01', end=(today + timedelta(days=1)).strftime('%Y-%m-%d'))

# Use the features of interest
btc = btc[['Open', 'High', 'Low', 'Volume', 'Close']]
btc.reset_index(inplace=True)
btc.Date = btc.Date.apply(lambda x: x.strftime('%Y-%m-%d'))


[*********************100%%**********************]  1 of 1 completed


In [85]:
# CRYPTONEWS DATA
with open('data/cryptonews.json', 'r') as f:
    dict_data = json.load(f)

news = pd.DataFrame(dict_data['data'])

# Converting timezones to UTC
news['date_utc'] = pd.to_datetime(news.date).dt.tz_convert('UTC')

# Only date, topics and sentiment is relevant
news = news[['date', 'date_utc', 'title', 'text', 'rank_score', 'topics','sentiment',]]

***
# Analytics

* _Context_: during the third week of August 2023, BTC's price plummeted.

* _Hypothesis_: could we have foreseen such drastic downward trend with news sentiment data?

In [39]:
fig = go.Figure(data=[go.Candlestick(x=btc.Date,
                open=btc['Open'], high=btc['High'],
                low=btc['Low'], close=btc['Close'])
                     ])

fig.update_layout(
     title="<b>Bitcoin USD Price - August 2023</b>",
     yaxis_title="Price in USD",
     xaxis_title="Date",
     xaxis_rangeslider_visible=False,
     template="ggplot2"
     )
fig.update_xaxes(type='category')

fig.show()

In [82]:
news_ = news.copy()
news_.date_utc = news_.date_utc.apply(lambda x: x.strftime('%Y-%m-%d'))
sents = news_.groupby('date_utc')['sentiment'].value_counts(normalize=True).to_frame('count')
sents['count'] = round(sents['count']*100)

fig = px.bar(
    sents.reset_index(), 
    x='date_utc', 
    y='count', 
    text=[f'{c}%' for c in sents['count']],
    color='sentiment', 
    color_discrete_map={'Positive': '#77dd77',
                        'Negative': '#ff6961',
                        'Neutral': '#ffb347'
                        }
    )

fig.update_layout(
     title="<b>Cryptonews Sentiment - August 2023</b>",
     yaxis_title="Count per Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()

Which does not seem to predict the downward trend. 

* Which news are related to whales offloading?

* What if we weight the sentiments based on rank?

In [116]:
keywords = ['Elon', 'Musk', 'SpaceX', 'Whale', 'whale']
kwstr = '|'.join(keywords)
mask = news[['title', 'text']].stack().str.contains(kwstr).any(level=0)

whales = news[mask].sort_values('date_utc')

whales['date_utc'] = whales['date_utc'].apply(lambda x: x.strftime('%Y-%m-%d'))
whales_sent = whales.groupby('date_utc')['sentiment'].value_counts().to_frame('count').reset_index()


fig = px.bar(
    whales_sent, 
    x='date_utc', 
    y='count', 
    # text=[f'{c}%' for c in sents['count']],
    color='sentiment', 
    color_discrete_map={'Positive': '#77dd77',
                        'Negative': '#ff6961',
                        'Neutral': '#ffb347'
                        }
    )

fig.update_layout(
     title="<b>Cryptonews Whales-Related Sentiment - August 2023</b>",
     yaxis_title="Count per Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

# fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()


Using the level keyword in DataFrame and Series aggregations is deprecated and will be removed in a future version. Use groupby instead. df.any(level=1) should use df.groupby(level=1).any()



Still inconclusive.

In [145]:
condlist = [
    news['sentiment']=="Positive",
    news['sentiment']=="Neutral",
    news['sentiment']=="Negative"
]

choices = [1, 0, -1 ]

news['date'] = news['date_utc'].apply(lambda x: x.strftime('%Y-%m-%d'))
news['polarity'] = np.select(condlist,choices)

news.rank_score = pd.to_numeric(news.rank_score)

wm = lambda x: np.average(x, weights=news.loc[x.index, 'rank_score'])

weighted = news.groupby(['date']).agg(weighted_avg=('polarity', wm))


fig = px.line(
    weighted.reset_index()[1:],
    x='date',
    y='weighted_avg',
    markers=True
)

fig.update_layout(
     title="<b>Cryptonews Weighted Sentiment - August 2023</b>",
     yaxis_title="Weighted Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()

In [172]:
# What about pct change?
weighted['pct_chg'] = weighted['weighted_avg'].pct_change()

fig = px.line(
    weighted.reset_index()[2:],
    x='date',
    y='pct_chg',
    text=[f'{int(round(p*100,0))}%' for p in weighted['pct_chg'][2:]],
    markers=True
)

fig.update_layout(
     title="<b>Cryptonews Weighted Sentiment Percentual Change - August 2023</b>",
     yaxis_title="Weighted Sentiment",
     xaxis_title="Date",
     template="ggplot2",
     width=1500,
     height=500
     )

fig.update_xaxes(type='category')
fig.update_traces(marker={'size':18, 'symbol':'square'})
fig.update_traces(textposition='middle center', textfont_size=6, textfont_color='white')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.add_hline(y=0,line_width=2, line_color="black")
fig.show()

Weighted sentiment seems to work better somewhat. Further conclusions would require going beyond analysis.