In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import json

In [2]:
# function courtesy of CryptoDataDownload.com
def fetch_daily_data(symbol):
    pair_split = symbol.split('/')  # symbol must be in format XXX/XXX ie. BTC/EUR
    symbol = pair_split[0] + '-' + pair_split[1]
    url = f'https://api.pro.coinbase.com/products/{symbol}/candles?granularity=86400'
    response = requests.get(url)
    if response.status_code == 200:  # check to make sure the response from server is good
        data = pd.DataFrame(json.loads(response.text), columns=['unix', 'low', 'high', 'open', 'close', 'volume'])
        data['date'] = pd.to_datetime(data['unix'], unit='s')  # convert to a readable date
        data['vol_fiat'] = data['volume'] * data['close']      # multiply the BTC volume by closing price to approximate fiat volume
                            
        # if we failed to get any data, print an error...otherwise write the file
        if data is None:
            print("Did not return any data from Coinbase for this symbol")
        else:
            data.to_csv(f'cb_{pair_split[0] + pair_split[1]}_daily.csv', index=False)
    else:
        print("Did not receieve OK response from Coinbase API")

In [3]:
fetch_daily_data('BTC/USD')

In [4]:
btc = pd.read_csv('cb_BTCUSD_daily.csv', index_col='date', parse_dates=[0])

In [5]:
btc.head()

Unnamed: 0_level_0,unix,low,high,open,close,volume,vol_fiat
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-11,1610323200,30100.0,38273.88,38168.89,31333.79,84503.23934,2647807000.0
2021-01-10,1610236800,34444.0,41452.12,40257.43,38171.57,43736.570316,1669494000.0
2021-01-09,1610150400,38800.0,41406.94,40642.15,40257.43,27152.971029,1093109000.0
2021-01-08,1610064000,36565.08,41986.37,39510.55,40665.15,48522.484903,1973174000.0
2021-01-07,1609977600,36200.0,40425.0,36859.26,39505.56,50346.305691,1988959000.0


In [6]:
btc.drop('unix', axis=1, inplace=True)

In [7]:
btc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 300 entries, 2021-01-11 to 2020-03-18
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   low       300 non-null    float64
 1   high      300 non-null    float64
 2   open      300 non-null    float64
 3   close     300 non-null    float64
 4   volume    300 non-null    float64
 5   vol_fiat  300 non-null    float64
dtypes: float64(6)
memory usage: 16.4+ KB


In [8]:
btc.index = pd.to_datetime(btc.index)

In [9]:
btc.head()

Unnamed: 0_level_0,low,high,open,close,volume,vol_fiat
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-11,30100.0,38273.88,38168.89,31333.79,84503.23934,2647807000.0
2021-01-10,34444.0,41452.12,40257.43,38171.57,43736.570316,1669494000.0
2021-01-09,38800.0,41406.94,40642.15,40257.43,27152.971029,1093109000.0
2021-01-08,36565.08,41986.37,39510.55,40665.15,48522.484903,1973174000.0
2021-01-07,36200.0,40425.0,36859.26,39505.56,50346.305691,1988959000.0


In [10]:
btc.head()

Unnamed: 0_level_0,low,high,open,close,volume,vol_fiat
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-11,30100.0,38273.88,38168.89,31333.79,84503.23934,2647807000.0
2021-01-10,34444.0,41452.12,40257.43,38171.57,43736.570316,1669494000.0
2021-01-09,38800.0,41406.94,40642.15,40257.43,27152.971029,1093109000.0
2021-01-08,36565.08,41986.37,39510.55,40665.15,48522.484903,1973174000.0
2021-01-07,36200.0,40425.0,36859.26,39505.56,50346.305691,1988959000.0


In [11]:
btc = btc.sort_values('date')

In [12]:
def make_features(data):
    data['month'] = data.index.month
    data['dayofweek'] = data.index.dayofweek
    data['range'] = data['high'] - data['low']
    data['week_low_mean'] = data['low'].shift().rolling(7).mean()
    data['week_high_mean'] = data['high'].shift().rolling(7).mean()
    data['month_low_mean'] = data['low'].shift().rolling(30).mean()
    data['month_high_mean'] = data['high'].shift().rolling(30).mean()

In [13]:
make_features(btc)

In [14]:
btc.tail(10)

Unnamed: 0_level_0,low,high,open,close,volume,vol_fiat,month,dayofweek,range,week_low_mean,week_high_mean,month_low_mean,month_high_mean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-01-02,29039.0,33300.0,29413.29,32225.91,46675.246521,1504152000.0,1,5,4261.0,26599.172857,28301.687143,21593.519,22838.525333
2021-01-03,32008.62,34810.0,32222.88,33080.66,36951.716506,1222387000.0,1,6,2801.38,27249.028571,29227.115714,21931.985667,23294.337333
2021-01-04,27678.0,33666.99,33082.84,32019.99,46045.389685,1474373000.0,1,0,5988.99,28139.895714,30144.687143,22379.738,23803.122
2021-01-05,29891.13,34499.67,32020.22,34030.64,42282.5692,1438903000.0,1,1,4608.54,28367.297143,31028.971429,22685.638,24285.689
2021-01-06,33352.54,37000.0,34043.91,36859.26,45744.1032,1686094000.0,1,2,3647.46,28946.925714,32043.671429,23052.742333,24788.003
2021-01-07,36200.0,40425.0,36859.26,39505.56,50346.305691,1988959000.0,1,3,4225.0,29809.898571,33183.92,23534.460333,25373.584
2021-01-08,36565.08,41986.37,39510.55,40665.15,48522.484903,1973174000.0,1,4,5421.29,30981.327143,34770.077143,24134.460333,26077.767
2021-01-09,38800.0,41406.94,40642.15,40257.43,27152.971029,1093109000.0,1,5,2606.94,32104.91,36526.861429,24765.329667,26855.735
2021-01-10,34444.0,41452.12,40257.43,38171.57,43736.570316,1669494000.0,1,6,7008.12,33499.338571,37684.995714,25461.829667,27617.419
2021-01-11,30100.0,38273.88,38168.89,31333.79,84503.23934,2647807000.0,1,0,8173.88,33847.25,38633.87,26023.963,28389.387
