In [29]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os
import time

In [30]:
load_dotenv()

# Constants
COINAPI_KEY = os.getenv('COIN_API_KEY')
SYMBOL_ID = "COINBASE_SPOT_ETH_USD"
GRANULARITY = 3600  # 1 hour in seconds
TOTAL_HOURS = 10000  # total number of hours to fetch
CHUNK_SIZE = 100   # CoinAPI limit per request

headers = {
  'Accept': 'text/plain',
  'X-CoinAPI-Key': COINAPI_KEY
}

In [31]:
def get_historic_candles(symbol_id, granularity, total_hours, chunk_size):
    all_data = []
    now = datetime.utcnow()

    for i in range(0, total_hours, chunk_size):
        end_time = (now - timedelta(seconds=granularity * i)).replace(microsecond=0)
        start_time = (end_time - timedelta(seconds=granularity * chunk_size)).replace(microsecond=0)

        url = f"https://rest.coinapi.io/v1/ohlcv/{symbol_id}/history"
        params = {
            'period_id': '1HRS',
            'time_start': start_time.isoformat() + 'Z',
            'time_end': end_time.isoformat() + 'Z',
            'limit': chunk_size
        }

        for attempt in range(3):
            r = requests.get(url, headers=headers, params=params)
            if r.status_code == 429:
                print("Rate limit hit. Waiting 60 seconds...")
                time.sleep(60)
                continue
            r.raise_for_status()
            data = r.json()
            all_data.extend(data)
            print(f"Fetched {len(data)} rows from {start_time} to {end_time}")
            break
        else:
            raise Exception("Failed to fetch after multiple attempts.")

        time.sleep(1)  # polite delay to avoid spamming API

    df = pd.DataFrame(all_data)
    df = df.rename(columns={
        'time_period_start': 'time',
        'price_low': 'low',
        'price_high': 'high',
        'price_open': 'open',
        'price_close': 'close',
        'volume_traded': 'volume'
    })
    df['time'] = pd.to_datetime(df['time'])
    df = df[['time', 'low', 'high', 'open', 'close', 'volume']]
    df = df.sort_values('time').reset_index(drop=True)
    return df

In [32]:
def get_order_book_raw(symbol_id, start_time, end_time):
    start_time = start_time.replace(microsecond=0, tzinfo=None)
    end_time = end_time.replace(microsecond=0, tzinfo=None)

    url = f"https://rest.coinapi.io/v1/orderbooks/{symbol_id}/history"
    params = {
        'time_start': start_time.isoformat() + 'Z',
        'time_end': end_time.isoformat() + 'Z'
    }
    r = requests.get(url, headers=headers, params=params)
    if r.status_code == 400:
        print("Order book history not available for this range or symbol. Skipping...")
        return []
    r.raise_for_status()
    return r.json()

In [33]:
candles_df = get_historic_candles(SYMBOL_ID, GRANULARITY, TOTAL_HOURS, CHUNK_SIZE)

  now = datetime.utcnow()


Fetched 100 rows from 2025-05-10 10:58:21 to 2025-05-14 14:58:21
Fetched 100 rows from 2025-05-06 06:58:21 to 2025-05-10 10:58:21
Fetched 100 rows from 2025-05-02 02:58:21 to 2025-05-06 06:58:21
Fetched 100 rows from 2025-04-27 22:58:21 to 2025-05-02 02:58:21
Fetched 100 rows from 2025-04-23 18:58:21 to 2025-04-27 22:58:21
Fetched 100 rows from 2025-04-19 14:58:21 to 2025-04-23 18:58:21
Fetched 100 rows from 2025-04-15 10:58:21 to 2025-04-19 14:58:21
Fetched 100 rows from 2025-04-11 06:58:21 to 2025-04-15 10:58:21
Fetched 100 rows from 2025-04-07 02:58:21 to 2025-04-11 06:58:21
Fetched 100 rows from 2025-04-02 22:58:21 to 2025-04-07 02:58:21
Fetched 100 rows from 2025-03-29 18:58:21 to 2025-04-02 22:58:21
Fetched 100 rows from 2025-03-25 14:58:21 to 2025-03-29 18:58:21
Fetched 100 rows from 2025-03-21 10:58:21 to 2025-03-25 14:58:21
Fetched 100 rows from 2025-03-17 06:58:21 to 2025-03-21 10:58:21
Fetched 100 rows from 2025-03-13 02:58:21 to 2025-03-17 06:58:21
Fetched 100 rows from 202

In [34]:
start_time = candles_df['time'].iloc[0]
end_time = candles_df['time'].iloc[-1]
order_book_raw = get_order_book_raw(SYMBOL_ID, start_time, end_time)

# Save both to CSV
candles_filename = f"eth_hourly_data.csv"
orderbook_filename = f"orderbook_raw_{datetime.utcnow().strftime('%Y%m%d%H%M')}.json"

candles_df.to_csv(candles_filename, index=False)
with open(orderbook_filename, 'w') as f:
    import json
    json.dump(order_book_raw, f, indent=2)

print(f"Saved candles: {candles_filename}")
print(f"Saved raw order book: {orderbook_filename}")

Saved candles: eth_hourly_data.csv
Saved raw order book: orderbook_raw_202505141505.json


  orderbook_filename = f"orderbook_raw_{datetime.utcnow().strftime('%Y%m%d%H%M')}.json"
