In [18]:
import numpy as np
import pandas as pd
import os


In [2]:
data = pd.read_csv("data/btc-usd-max.csv")
data.head()

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,2013-04-28 00:00:00 UTC,135.3,1500518000.0,0.0
1,2013-04-29 00:00:00 UTC,141.96,1575032000.0,0.0
2,2013-04-30 00:00:00 UTC,135.3,1501657000.0,0.0
3,2013-05-01 00:00:00 UTC,117.0,1298952000.0,0.0
4,2013-05-02 00:00:00 UTC,103.43,1148668000.0,0.0


In [6]:
! pip install fire

Collecting fire
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting termcolor (from fire)
  Using cached termcolor-2.3.0-py3-none-any.whl (6.9 kB)
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25ldone
[?25h  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116931 sha256=b71eee16f72d8bbefb33c354cf9eadcf52c5551bf58b286a9476ac8d21839843
  Stored in directory: /Users/brunostordeur/Library/Caches/pip/wheels/f7/f1/89/b9ea2bf8f80ec027a88fef1d354b3816b4d3d29530988972f6
Successfully built fire
Installing collected packages: termcolor, fire
Successfully installed fire-0.5.0 termcolor-2.3.0


In [24]:
from typing import Optional
from pathlib import Path

import pandas as pd
import requests
import fire

DATA_DIR = "data"

def download_ohlc_data_from_coinbase(
    product_id: Optional[str] = "BTC-USD",
    from_day: Optional[str] = "2022-01-01",
    to_day: Optional[str] = "2023-06-01",
) -> Path:
    """
    Downloads historical candles from Coinbase API and saves data to disk
    Reference: https://docs.cloud.coinbase.com/exchange/reference/exchangerestapi_getproductcandles
    """
    # create list of days as strings
    days = pd.date_range(start=from_day, end=to_day, freq="1D")
    days = [day.strftime("%Y-%m-%d") for day in days]

    # create empty dataframe
    data = pd.DataFrame()

    # create download dir folder if it doesn't exist
    if not (Path(DATA_DIR + '/downloads')).exists():
        os.mkdir(Path(DATA_DIR + '/downloads'))
    
    for day in days:

        # download file if it doesn't exist
        file_name = DATA_DIR + '/downloads' + "/" + f'{day}.parquet'
        if Path(file_name).exists():
            data_one_day = pd.read_parquet(file_name)
        else:

            data_one_day = download_data_for_one_day(product_id, day)
            data_one_day.to_parquet(file_name, index=False)
    
        # combine today's file with the rest of the data
        data = pd.concat([data, data_one_day])

    # save data to disk   
    # data.to_parquet(DATA_DIR / f"ohlc_from_{from_day}_to_{to_day}.parquet", index=False)
    data.to_parquet(Path(DATA_DIR + "/" + f"ohlc_data.parquet"), index=False)

    return DATA_DIR +"/" + f"ohlc_data.parquet"

def download_data_for_one_day(product_id: str, day: str) -> pd.DataFrame:
    """
    Downloads one day of data and returns pandas Dataframe
    """
    # create start end end date strings
    start = f'{day}T00:00:00'
    from datetime import datetime, timedelta
    end = (datetime.strptime(day, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
    end = f'{end}T00:00:00'

    # call API
    URL = f'https://api.exchange.coinbase.com/products/{product_id}/candles?start={start}&end={end}&granularity=3600'
    r = requests.get(URL)
    data = r.json()

    # transform list of lists to pandas dataframe and return
    return pd.DataFrame(data, columns=['time', 'low', 'high', 'open', 'close', 'volume'])

if __name__== '__main__':
   download_ohlc_data_from_coinbase()

In [26]:
data = pd.read_parquet("data/downloads/2022-01-01.parquet")

In [27]:
data.head()

Unnamed: 0,time,low,high,open,close,volume
0,1641081600,47400.0,47770.37,47733.43,47637.43,455.548116
1,1641078000,47295.97,47823.25,47455.8,47733.43,352.513245
2,1641074400,47245.84,47500.0,47334.12,47455.8,240.044214
3,1641070800,47288.18,47550.0,47413.98,47335.57,377.381806
4,1641067200,47265.78,47579.24,47347.22,47413.98,306.432304
