In [130]:
import pandas as pd
import requests
from datetime import date

## construct api endpoint

In [131]:
base = 'https://api.coindesk.com/v1/bpi/historical/close.json?'
start_date = '2011-01-01' # 2011 is first year with full year data
end_date = str(date.today())
url = base + 'start=' + start_date + '&end=' + end_date
print(url)

https://api.coindesk.com/v1/bpi/historical/close.json?start=2011-01-01&end=2020-12-25


## make api call

In [132]:
df = pd.read_json(url)

## formatting

In [133]:
df.reset_index(inplace = True)
df.drop(columns = ['disclaimer','time'], inplace = True)
df.rename(columns = {'index': 'date','bpi': 'price'}, inplace = True)
df = df[df['price'].notnull()]
df['date'] = pd.to_datetime(df['date'])

In [134]:
df['year'] = pd.DatetimeIndex(df['date']).year
df['month'] = pd.DatetimeIndex(df['date']).month

cols = ['date','year','month','price']
df = df[cols]

In [135]:
df.head()

Unnamed: 0,date,year,month,price
0,2011-01-01,2011,1,0.3
1,2011-01-02,2011,1,0.3
2,2011-01-03,2011,1,0.295
3,2011-01-04,2011,1,0.299
4,2011-01-05,2011,1,0.299


In [136]:
df.tail()

Unnamed: 0,date,year,month,price
3641,2020-12-20,2020,12,23458.9967
3642,2020-12-21,2020,12,22718.225
3643,2020-12-22,2020,12,23817.3067
3644,2020-12-23,2020,12,23229.0133
3645,2020-12-24,2020,12,23729.6483


## create summary table by year
* confirm data quality: there is one price record for everyday in each year (excpet 2020, which is still ongoing)
* observation: since 2015, every incremental year has a higher min and median price than previous years

In [137]:
# create list of metrics i want to see for bitcoin's price each year
metrics = ['count','max','min','mean','median']

# create data frame with one row for every year in the bitcoin dataset
df_summary = pd.DataFrame(df['year'].unique(), columns = ['year']) 

# calculate metric for each year
for metric in metrics:
    metric_txt = "df.groupby('year')['price']." + metric + "().to_frame('price_" + metric + "').reset_index()"
    metric_eval = eval(metric_txt)
    metric_eval.iloc[:,1] = metric_eval.iloc[:,1].apply('{:,.0f}'.format)
    df_summary = df_summary.merge(metric_eval, how = 'left', left_on = 'year', right_on = 'year')

# show summary table
df_summary

Unnamed: 0,year,price_count,price_max,price_min,price_mean,price_median
0,2011,365,30,0,6,3
1,2012,366,14,4,8,7
2,2013,365,1147,13,189,112
3,2014,365,951,310,527,502
4,2015,365,466,177,272,248
5,2016,366,978,359,567,582
6,2017,365,19343,776,3990,2594
7,2018,365,17136,3214,7529,6896
8,2019,365,12907,3386,7380,7807
9,2020,359,23835,4914,10799,9668
