# Aug 2023 BTC crash analysis - daily data

***

# Import libraries and set some intial paramenters

## Import relevant libraries

In [1]:
import pandas as pd
import numpy as np
from binance.client import Client
from dotenv import load_dotenv

import json
from datetime import datetime, timedelta, date
import plotly.graph_objects as go
import plotly.express as px
import os

import sys
sys.path.append('../src')

import cryptonews as cn


## Set some initial parameters

In [None]:
load_dotenv(dotenv_path='../config_files/.env')

# Get Data

## [OPTIONAL] Download the data from the API's
Only if you have the API keys for `cryptonews-api` and `binance-api` and you want to download the data from the API's.

### Download Binance BTCUSDT data

In [None]:
asset_ticket = "BTCUSDT"
timestamp = 'Client.KLINE_INTERVAL_1DAY'
start_date = "01 Aug, 2023"
end_date = "20 Aug, 2023"

In [None]:
# Create the Binance API client
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

In [None]:
klines = client.get_historical_klines(asset_ticket, eval(timestamp), start_date, end_date)

In [None]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

# Convert 'object' to float pandas
df_klines['open'] = pd.to_numeric(df_klines['open'])
df_klines['high'] = pd.to_numeric(df_klines['high'])
df_klines['low'] = pd.to_numeric(df_klines['low'])
df_klines['close'] = pd.to_numeric(df_klines['close'])
df_klines['volume'] = pd.to_numeric(df_klines['volume'])

# Convert the 'open_time' and 'close_time' to a Pandas DataTime format
df_klines['formatted_open_time'] = pd.to_datetime(df_klines['open_time'], infer_datetime_format=True, unit="ms")

# Converting Date Column to DateTime Type
df_klines['date'] = pd.to_datetime(df_klines['formatted_open_time'])
df_klines['date'] = pd.to_datetime(df_klines['date']).dt.strftime('%Y-%m-%d %H:%M:%S')
df_klines['date'] = pd.to_datetime(df_klines['date'])

# Get the columns we need
df_klines = df_klines[['date', 'open', 'high', 'low', 'close', 'volume']]

# Export the data to a CSV file
df_klines.to_csv(f"../data/{asset_ticket}_daily_data_from_{datetime.strptime(start_date,'%d %b, %Y').strftime('%Y%m%d')}_to_{datetime.strptime(end_date,'%d %b, %Y').strftime('%Y%m%d')}.csv", index=False)

### Download Crypto News data

In [None]:
ticket = "BTC"
sd = date(2023, 8, 1)
ed = date(2023, 8, 20)

In [None]:
cn.get_cryptonews('BTC', sd, ed)

## Import data from local files

In [2]:
btc = pd.read_csv('../data/BTCUSDT_daily_data_from_20230801_to_20230820.csv')
btc['date'] = pd.to_datetime(btc['date'])

In [3]:
btc.head()

Unnamed: 0,date,open,high,low,close,volume
0,2023-08-01,29232.26,29739.25,28585.7,29705.99,44719.65162
1,2023-08-02,29705.99,30047.5,28927.5,29186.01,48181.65141
2,2023-08-03,29186.0,29433.33,28968.0,29193.64,26476.91994
3,2023-08-04,29193.65,29333.08,28807.54,29113.99,23551.95217
4,2023-08-05,29114.0,29152.23,28978.64,29072.13,11645.52018


In [4]:
news = pd.read_csv('../data/formatted_cryptonews_BTC_from_20230801_to_20230820.csv')
news['utc_datetime'] = pd.to_datetime(news['utc_datetime'])

In [5]:
news.head()

Unnamed: 0,news_url,image_url,title,text,source_name,date,topics,sentiment,type,tickers,news_id,rank_score,eventid,datetime,utc_zone,utc_datetime
0,https://blockworks.co/news/microstrategy-buys-...,https://crypto.snapi.dev/images/v1/h/o/falling...,MicroStrategy buys added 467 BTC to balance sh...,Michael Saylor's MicroStrategy released earnin...,Blockworks,"Tue, 01 Aug 2023 16:35:54 -0400",['regulations'],Positive,Article,['BTC'],341957,8.9,AAC320,2023-08-01 16:35:54,-400,2023-08-01 20:35:54
1,https://www.coindesk.com/policy/2023/08/01/bit...,https://crypto.snapi.dev/images/v1/i/e/ctk3lb5...,"Bitcoin Won't Back U.S Dollar, Presidential Ho...",The Republican candidate's comments stand in c...,Coindesk,"Tue, 01 Aug 2023 12:14:00 -0400",[],Positive,Article,['BTC'],341841,8.63,,2023-08-01 12:14:00,-400,2023-08-01 16:14:00
2,https://www.forbes.com/sites/emilymason/2023/0...,https://crypto.snapi.dev/images/v1/p/s/microst...,"MicroStrategy Adds To Its Bitcoin Stash In Q2,...",Earnings beat estimates as impairment charges ...,Forbes,"Tue, 01 Aug 2023 19:21:39 -0400",[],Positive,Article,['BTC'],341992,7.0,AAC320,2023-08-01 19:21:39,-400,2023-08-01 23:21:39
3,https://www.coindesk.com/markets/2023/08/01/mi...,https://crypto.snapi.dev/images/v1/w/g/3rfemlx...,Bitcoin Whale Michael Saylor Might Buy a Lot M...,"MicroStrategy, the software developer that's a...",Coindesk,"Tue, 01 Aug 2023 19:18:54 -0400",['whales'],Positive,Article,['BTC'],341993,6.94,AAC320,2023-08-01 19:18:54,-400,2023-08-01 23:18:54
4,https://decrypt.co/150871/bitcoin-ordinals-tea...,https://crypto.snapi.dev/images/v1/j/z/digital...,Bitcoin Ordinals Team Launches Nonprofit as In...,The team behind Ordinals said its Bitcoin-base...,Decrypt,"Tue, 01 Aug 2023 14:01:02 -0400",[],Positive,Article,['BTC'],341888,6.73,AAC329,2023-08-01 14:01:02,-400,2023-08-01 18:01:02


***
# Analytics

* _Context_: during the third week of August 2023, BTC's price plummeted.

* _Hypothesis_: could we have foreseen such drastic downward trend with news sentiment data?

In [6]:
fig = go.Figure(data=[go.Candlestick(x=btc['date'],
                open=btc['open'], high=btc['high'],
                low=btc['low'], close=btc['close'])
                     ])

fig.update_layout(
     title="<b>Bitcoin USDT Price - August 2023</b>",
     yaxis_title="Price in USDT",
     xaxis_title="Date",
     xaxis_rangeslider_visible=False,
     template="ggplot2"
     )
fig.update_xaxes(type='category')

fig.show()

In [7]:
news_ = news.copy()
news_.utc_datetime = news_.utc_datetime.apply(lambda x: x.strftime('%Y-%m-%d'))
sents = news_.groupby('utc_datetime')['sentiment'].value_counts(normalize=True).to_frame('count')
sents['count'] = round(sents['count']*100)

fig = px.bar(
    sents.reset_index(), 
    x='utc_datetime', 
    y='count', 
    text=[f'{c}%' for c in sents['count']],
    color='sentiment', 
    color_discrete_map={'Positive': '#77dd77',
                        'Negative': '#ff6961',
                        'Neutral': '#ffb347'
                        }
    )

fig.update_layout(
     title="<b>Cryptonews Sentiment - August 2023</b>",
     yaxis_title="Count per Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()

Which does not seem to predict the downward trend. 

* Which news are related to whales offloading?

* What if we weight the sentiments based on rank?

In [28]:
keywords = ['Elon', 'Musk', 'SpaceX', 'Whale', 'whale']
kwstr = '|'.join(keywords)
mask = news[['title', 'text']].stack().str.contains(kwstr).unstack().any(axis=1)


In [29]:
whales = news[mask].sort_values('utc_datetime')

whales['utc_datetime'] = whales['utc_datetime'].apply(lambda x: x.strftime('%Y-%m-%d'))
whales_sent = whales.groupby('utc_datetime')['sentiment'].value_counts().to_frame('count').reset_index()

In [30]:
fig = px.bar(
    whales_sent, 
    x='utc_datetime', 
    y='count', 
    # text=[f'{c}%' for c in sents['count']],
    color='sentiment', 
    color_discrete_map={'Positive': '#77dd77',
                        'Negative': '#ff6961',
                        'Neutral': '#ffb347'
                        }
    )

fig.update_layout(
     title="<b>Cryptonews Whales-Related Sentiment - August 2023</b>",
     yaxis_title="Count per Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

# fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()

Still inconclusive.

In [31]:
condlist = [
    news['sentiment']=="Positive",
    news['sentiment']=="Neutral",
    news['sentiment']=="Negative"
]

choices = [1, 0, -1 ]

news['date'] = news['utc_datetime'].apply(lambda x: x.strftime('%Y-%m-%d'))
news['polarity'] = np.select(condlist,choices)

news.rank_score = pd.to_numeric(news.rank_score)

wm = lambda x: np.average(x, weights=news.loc[x.index, 'rank_score'])

weighted = news.groupby(['date']).agg(weighted_avg=('polarity', wm))

In [32]:
fig = px.line(
    weighted.reset_index()[1:],
    x='date',
    y='weighted_avg',
    markers=True
)

fig.update_layout(
     title="<b>Cryptonews Weighted Sentiment - August 2023</b>",
     yaxis_title="Weighted Sentiment",
     xaxis_title="Date",
     template="ggplot2"
     )

fig.update_xaxes(type='category')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.show()

In [33]:
# What about pct change?
weighted['pct_chg'] = weighted['weighted_avg'].pct_change()

fig = px.line(
    weighted.reset_index()[2:],
    x='date',
    y='pct_chg',
    text=[f'{int(round(p*100,0))}%' for p in weighted['pct_chg'][2:]],
    markers=True
)

fig.update_layout(
     title="<b>Cryptonews Weighted Sentiment Percentual Change - August 2023</b>",
     yaxis_title="Weighted Sentiment",
     xaxis_title="Date",
     template="ggplot2",
     width=1500,
     height=500
     )

fig.update_xaxes(type='category')
fig.update_traces(marker={'size':18, 'symbol':'square'})
fig.update_traces(textposition='middle center', textfont_size=6, textfont_color='white')
fig.add_vline(x='2023-08-17',line_width=2, line_dash="dash", line_color="black")
fig.add_hline(y=0,line_width=2, line_color="black")
fig.show()

Weighted sentiment seems to work better somewhat. Further conclusions would require going beyond analysis.