In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import requests
from datetime import datetime as dt

In [2]:
ALPHAVANTAGE_KEY = os.getenv('ALPHAVANTAGE_KEY')
assert ALPHAVANTAGE_KEY is not None, "You must export ALPHAVANTAGE_KEY..."
ALPHAVANTAGE_KEY

'032HZXCB7T21N7SC'

In [3]:
SAMPLE_START_DATE = dt.fromisoformat('2019-12-19').date()
SAMPLE_END_DATE = dt.fromisoformat('2024-04-30').date()
(SAMPLE_START_DATE, SAMPLE_END_DATE)

(datetime.date(2019, 12, 19), datetime.date(2024, 4, 30))

In [4]:
DIGITAL_CURRENCY_URL = 'https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol=BTC&market=USD&apikey={}&datatype=csv'.format(ALPHAVANTAGE_KEY)
SPY_URL = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&outputsize=full&apikey={apikey}&datatype=csv'.format(symbol='SPY', apikey=ALPHAVANTAGE_KEY)
QQQ_URL = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&outputsize=full&apikey={apikey}&datatype=csv'.format(symbol='QQQ', apikey=ALPHAVANTAGE_KEY)
(DIGITAL_CURRENCY_URL, SPY_URL, QQQ_URL)

('https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol=BTC&market=USD&apikey=032HZXCB7T21N7SC&datatype=csv',
 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&outputsize=full&apikey=032HZXCB7T21N7SC&datatype=csv',
 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=QQQ&outputsize=full&apikey=032HZXCB7T21N7SC&datatype=csv')

In [5]:
%matplotlib inline
pd.options.display.float_format = '{:,.4f}'.format
sns.set_style("whitegrid")

mpl.rcParams.update({
                     'text.color' : "white",
                     'lines.linewidth': 3,
                     'font.size': 16.0,
                     'legend.facecolor': "182742",
                     'legend.edgecolor': 'CFB023',
                     'legend.fancybox': True,
                     'legend.framealpha': 1.,
                     'legend.loc': 'right',
                     'xtick.color': 'white',
                     'ytick.color': 'white',
                     'ytick.minor.visible': True,
                     'axes.facecolor': '#182742',
                     'axes.edgecolor': 'white',
#                      'axes.grid.axis': 'y',
#                      'axes.grid.which': 'minor',
                     'axes.labelcolor' : "white",
                     'axes.labelsize': "24",
                     'axes.labelweight': "bold",
                     'axes.titlesize': 28,
                     'axes.titleweight': 'bold',
                     'axes.titlepad': 20,
                     'figure.facecolor': '#182742',
#                      'xtick.bottom': False,
                    }
                   )

In [6]:
def total_return(prices):
  return prices.iloc[-1] / prices.iloc[0] - 1

# Download Bitcoin Historical Data

In [7]:
# TODO: Figure out how to add error checking
# r = requests.get(URL)
# data = r.json()
# assert 'Error Message' not in data.keys(), "Error Received. {}".format(data['Error Message'])

df = pd.read_csv(DIGITAL_CURRENCY_URL)
df['timestamp'] = pd.to_datetime(df.timestamp)
df.index = df.timestamp.dt.date
df = df[::-1]
print(df.columns)
df.rename(columns={"close": "btc_close"}, inplace=True)
# df = df.drop(['open (USD)', 'high (USD)', 'low (USD)', 'open (USD).1', 'high (USD).1', 'low (USD).1', 'close (USD).1'], axis=1)
df['pct_change'] = df['btc_close'].pct_change()
df[['timestamp', 'btc_close', 'pct_change']].to_csv('bitcoin_daily_returns.csv')
df[['timestamp', 'btc_close', 'pct_change']]
df

Index(['timestamp', 'open', 'high', 'low', 'close', 'volume'], dtype='object')


Unnamed: 0_level_0,timestamp,open,high,low,btc_close,volume,pct_change
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-23,2023-07-23,29793.6200,30350.7000,29733.5500,30081.6100,4513.8321,
2023-07-24,2023-07-24,30081.6100,30099.9900,28850.0000,29176.9800,14484.2754,-0.0301
2023-07-25,2023-07-25,29176.9800,29376.5100,29046.0000,29225.1400,7139.2107,0.0017
2023-07-26,2023-07-26,29225.1100,29686.4100,29089.5400,29353.2300,9635.0350,0.0044
2023-07-27,2023-07-27,29350.8800,29571.4200,29075.0000,29214.9200,7327.2908,-0.0047
...,...,...,...,...,...,...,...
2024-05-13,2024-05-13,61453.0100,63456.1800,60738.5100,62932.3700,9170.7541,0.0241
2024-05-14,2024-05-14,62927.4400,63103.0200,61024.5100,61539.8400,13456.6131,-0.0221
2024-05-15,2024-05-15,61539.7000,66480.0000,61299.5700,66244.6900,21821.3886,0.0765
2024-05-16,2024-05-16,66244.6800,66772.8300,64588.5000,65252.1800,12012.7020,-0.0150


In [8]:
btc_monthly_returns = df.groupby([df.timestamp.dt.year, df.timestamp.dt.month])['btc_close'].apply(total_return)
btc_monthly_returns.to_csv("bitcoin_monthly_returns.csv")
btc_monthly_returns

timestamp  timestamp
2023       7           -0.0283
           8           -0.1268
           9            0.0452
           10           0.2379
           11           0.0647
           12           0.0926
2024       1           -0.0378
           2            0.4202
           3            0.1418
           4           -0.1300
           5            0.1217
Name: btc_close, dtype: float64

In [9]:

def read_equities(url, start_date, end_date, outfile):
    df = pd.read_csv(url)
    df['timestamp'] = pd.to_datetime(df.timestamp)
    df.index = df.timestamp.dt.date
    df = df[::-1]
    sample = df[start_date:end_date]
    # df.rename(columns={"close (USD)": "btc_close"}, inplace=True)
    # df = df.drop(['open (USD)', 'high (USD)', 'low (USD)', 'open (USD).1', 'high (USD).1', 'low (USD).1', 'close (USD).1'], axis=1)
    sample['pct_change'] = sample['close'].pct_change().copy()
    sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
    sample['growth_of_10k'].iloc[0] = 10000.
#     sample.insert(4, 'growth_of_10k', 0.)
#     sample['growth_of_10k'].iloc[0] = 10000.0
# #     print(sample.iloc[1:]['growth_of_10k'])
#     print(sample['growth_of_10k'].shift(1))
#     print((1 + sample['pct_change']))
#     sample['growth_of_10k'] = sample['growth_of_10k'].shift(1) * (1 + sample['pct_change'])
    sample[['timestamp', 'close', 'pct_change', 'growth_of_10k']].to_csv(outfile)
    return sample[['timestamp', 'close', 'pct_change', 'growth_of_10k']]

In [10]:
spy_history = read_equities(SPY_URL, SAMPLE_START_DATE, SAMPLE_END_DATE, 'spy_history.csv')
spy_history

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['pct_change'] = sample['close'].pct_change().copy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'].iloc[0] = 10000.


Unnamed: 0_level_0,timestamp,close,pct_change,growth_of_10k
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-19,2019-12-19,320.9000,,10000.0000
2019-12-20,2019-12-20,320.7300,-0.0005,9994.7024
2019-12-23,2019-12-23,321.2200,0.0015,10009.9720
2019-12-24,2019-12-24,321.2300,0.0000,10010.2836
2019-12-26,2019-12-26,322.9400,0.0053,10063.5712
...,...,...,...,...
2024-04-24,2024-04-24,505.4100,-0.0005,15749.7663
2024-04-25,2024-04-25,503.4900,-0.0038,15689.9346
2024-04-26,2024-04-26,508.2600,0.0095,15838.5790
2024-04-29,2024-04-29,510.0600,0.0035,15894.6712


In [11]:
qqq_history = read_equities(QQQ_URL, SAMPLE_START_DATE, SAMPLE_END_DATE, 'qqq_history.csv')
qqq_history

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['pct_change'] = sample['close'].pct_change().copy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'].iloc[0] = 10000.


Unnamed: 0_level_0,timestamp,close,pct_change,growth_of_10k
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-19,2019-12-19,210.8600,,10000.0000
2019-12-20,2019-12-20,211.7100,0.0040,10040.3111
2019-12-23,2019-12-23,211.8100,0.0005,10045.0536
2019-12-24,2019-12-24,211.9200,0.0005,10050.2703
2019-12-26,2019-12-26,213.7900,0.0088,10138.9548
...,...,...,...,...
2024-04-24,2024-04-24,426.5100,0.0034,20227.1649
2024-04-25,2024-04-25,424.4500,-0.0048,20129.4698
2024-04-26,2024-04-26,431.0000,0.0154,20440.1024
2024-04-29,2024-04-29,432.7500,0.0041,20523.0959
