In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import requests
from datetime import datetime as dt

In [2]:
ALPHAVANTAGE_KEY = os.getenv('ALPHAVANTAGE_KEY')
assert ALPHAVANTAGE_KEY is not None, "You must export ALPHAVANTAGE_KEY..."
ALPHAVANTAGE_KEY

'032HZXCB7T21N7SC'

In [3]:
SAMPLE_START_DATE = dt.fromisoformat('2019-12-19').date()
SAMPLE_END_DATE = dt.fromisoformat('2024-05-31').date()
(SAMPLE_START_DATE, SAMPLE_END_DATE)

(datetime.date(2019, 12, 19), datetime.date(2024, 5, 31))

In [4]:
DIGITAL_CURRENCY_URL = 'https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol=BTC&market=USD&apikey={}&datatype=csv'.format(ALPHAVANTAGE_KEY)
SPY_URL = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&outputsize=full&apikey={apikey}&datatype=csv'.format(symbol='SPY', apikey=ALPHAVANTAGE_KEY)
QQQ_URL = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&outputsize=full&apikey={apikey}&datatype=csv'.format(symbol='QQQ', apikey=ALPHAVANTAGE_KEY)
(DIGITAL_CURRENCY_URL, SPY_URL, QQQ_URL)

('https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol=BTC&market=USD&apikey=032HZXCB7T21N7SC&datatype=csv',
 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&outputsize=full&apikey=032HZXCB7T21N7SC&datatype=csv',
 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=QQQ&outputsize=full&apikey=032HZXCB7T21N7SC&datatype=csv')

In [5]:
%matplotlib inline
pd.options.display.float_format = '{:,.4f}'.format
sns.set_style("whitegrid")

mpl.rcParams.update({
                     'text.color' : "white",
                     'lines.linewidth': 3,
                     'font.size': 16.0,
                     'legend.facecolor': "182742",
                     'legend.edgecolor': 'CFB023',
                     'legend.fancybox': True,
                     'legend.framealpha': 1.,
                     'legend.loc': 'right',
                     'xtick.color': 'white',
                     'ytick.color': 'white',
                     'ytick.minor.visible': True,
                     'axes.facecolor': '#182742',
                     'axes.edgecolor': 'white',
#                      'axes.grid.axis': 'y',
#                      'axes.grid.which': 'minor',
                     'axes.labelcolor' : "white",
                     'axes.labelsize': "24",
                     'axes.labelweight': "bold",
                     'axes.titlesize': 28,
                     'axes.titleweight': 'bold',
                     'axes.titlepad': 20,
                     'figure.facecolor': '#182742',
#                      'xtick.bottom': False,
                    }
                   )

In [6]:
def total_return(prices):
  return prices.iloc[-1] / prices.iloc[0] - 1

# Download Bitcoin Historical Data

In [7]:
# TODO: Figure out how to add error checking
# r = requests.get(URL)
# data = r.json()
# assert 'Error Message' not in data.keys(), "Error Received. {}".format(data['Error Message'])

df = pd.read_csv(DIGITAL_CURRENCY_URL)
df['timestamp'] = pd.to_datetime(df.timestamp)
df.index = df.timestamp.dt.date
df = df[::-1]
print(df.columns)
df.rename(columns={"close": "btc_close"}, inplace=True)
# df = df.drop(['open (USD)', 'high (USD)', 'low (USD)', 'open (USD).1', 'high (USD).1', 'low (USD).1', 'close (USD).1'], axis=1)
df['pct_change'] = df['btc_close'].pct_change()
df[['timestamp', 'btc_close', 'pct_change']].to_csv('bitcoin_daily_returns.csv')
df[['timestamp', 'btc_close', 'pct_change']]
df

Index(['timestamp', 'open', 'high', 'low', 'close', 'volume'], dtype='object')


Unnamed: 0_level_0,timestamp,open,high,low,btc_close,volume,pct_change
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-08-11,2023-08-11,29424.0400,29534.1400,29213.5900,29400.4500,5961.2174,
2023-08-12,2023-08-12,29400.4500,29477.3100,29349.9800,29416.9600,1906.1708,0.0006
2023-08-13,2023-08-13,29417.1900,29447.6000,29247.1500,29275.9400,2128.3744,-0.0048
2023-08-14,2023-08-14,29275.7800,29665.2700,29072.9600,29405.4900,7063.4681,0.0044
2023-08-15,2023-08-15,29405.4900,29464.6200,29046.5800,29170.1400,5815.3223,-0.0080
...,...,...,...,...,...,...,...
2024-06-01,2024-06-01,67473.0700,67837.3300,67371.2800,67719.2900,1949.5701,0.0037
2024-06-02,2024-06-02,67719.2900,68411.0000,67260.0000,67735.5200,2997.1524,0.0002
2024-06-03,2024-06-03,67731.1400,70295.7800,67568.0000,68791.0000,11884.0773,0.0156
2024-06-04,2024-06-04,68791.0400,71108.9000,68542.3700,70542.3200,18042.1940,0.0255


In [8]:
btc_monthly_returns = df.groupby([df.timestamp.dt.year, df.timestamp.dt.month])['btc_close'].apply(total_return)
btc_monthly_returns.to_csv("bitcoin_monthly_returns.csv")
btc_monthly_returns

timestamp  timestamp
2023       8           -0.1180
           9            0.0452
           10           0.2379
           11           0.0647
           12           0.0926
2024       1           -0.0378
           2            0.4202
           3            0.1418
           4           -0.1300
           5            0.1580
           6            0.0437
Name: btc_close, dtype: float64

In [9]:

def read_equities(url, start_date, end_date, outfile):
    df = pd.read_csv(url)
    df['timestamp'] = pd.to_datetime(df.timestamp)
    df.index = df.timestamp.dt.date
    df = df[::-1]
    sample = df[start_date:end_date]
    # df.rename(columns={"close (USD)": "btc_close"}, inplace=True)
    # df = df.drop(['open (USD)', 'high (USD)', 'low (USD)', 'open (USD).1', 'high (USD).1', 'low (USD).1', 'close (USD).1'], axis=1)
    sample['pct_change'] = sample['close'].pct_change().copy()
    sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
    sample['growth_of_10k'].iloc[0] = 10000.
#     sample.insert(4, 'growth_of_10k', 0.)
#     sample['growth_of_10k'].iloc[0] = 10000.0
# #     print(sample.iloc[1:]['growth_of_10k'])
#     print(sample['growth_of_10k'].shift(1))
#     print((1 + sample['pct_change']))
#     sample['growth_of_10k'] = sample['growth_of_10k'].shift(1) * (1 + sample['pct_change'])
    sample[['timestamp', 'close', 'pct_change', 'growth_of_10k']].to_csv(outfile)
    return sample[['timestamp', 'close', 'pct_change', 'growth_of_10k']]

In [10]:
spy_history = read_equities(SPY_URL, SAMPLE_START_DATE, SAMPLE_END_DATE, 'spy_history.csv')
spy_history

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['pct_change'] = sample['close'].pct_change().copy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'].iloc[0] = 10000.


Unnamed: 0_level_0,timestamp,close,pct_change,growth_of_10k
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-19,2019-12-19,320.9000,,10000.0000
2019-12-20,2019-12-20,320.7300,-0.0005,9994.7024
2019-12-23,2019-12-23,321.2200,0.0015,10009.9720
2019-12-24,2019-12-24,321.2300,0.0000,10010.2836
2019-12-26,2019-12-26,322.9400,0.0053,10063.5712
...,...,...,...,...
2024-05-24,2024-05-24,529.4400,0.0066,16498.5977
2024-05-28,2024-05-28,529.8100,0.0007,16510.1278
2024-05-29,2024-05-29,526.1000,-0.0070,16394.5154
2024-05-30,2024-05-30,522.6100,-0.0066,16285.7588


In [11]:
qqq_history = read_equities(QQQ_URL, SAMPLE_START_DATE, SAMPLE_END_DATE, 'qqq_history.csv')
qqq_history

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['pct_change'] = sample['close'].pct_change().copy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'] = (sample['pct_change'].iloc[1:] + 1.).cumprod() * 10000.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['growth_of_10k'].iloc[0] = 10000.


Unnamed: 0_level_0,timestamp,close,pct_change,growth_of_10k
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-19,2019-12-19,210.8600,,10000.0000
2019-12-20,2019-12-20,211.7100,0.0040,10040.3111
2019-12-23,2019-12-23,211.8100,0.0005,10045.0536
2019-12-24,2019-12-24,211.9200,0.0005,10050.2703
2019-12-26,2019-12-26,213.7900,0.0088,10138.9548
...,...,...,...,...
2024-05-24,2024-05-24,457.9500,0.0095,21718.2017
2024-05-28,2024-05-28,459.6800,0.0038,21800.2466
2024-05-29,2024-05-29,456.4400,-0.0070,21646.5902
2024-05-30,2024-05-30,451.5500,-0.0107,21414.6827
