In [None]:
import requests
import pandas as pd
import time
from datetime import datetime, timezone
from pathlib import Path

dataDir = Path("../data")

In [None]:
def fetch_ohlcv_data(symbol, interval, start_date, end_date):
    """
    Fetches OHLCV data from Binance API for a given symbol, interval, and date range (specified as datetime objects).
    Handles pagination for large ranges (>1000 candles).

    Parameters:
    - symbol: str (e.g., 'BTCUSDT')
    - interval: str (e.g., '1m' for minute, '1h' for hour, '1d' for day)
    - start_date: datetime (UTC timezone-aware or naive; will be converted to UTC)
    - end_date: datetime (UTC timezone-aware or naive; will be converted to UTC)

    Returns: List of dicts with OHLCV data (oldest first) or error dict.
    """
    all_candles = []
    max_per_call = 1000

    try:
        # Ensure datetime objects are timezone-aware (UTC)
        if start_date.tzinfo is None:
            start_date = start_date.replace(tzinfo=timezone.utc)
        if end_date.tzinfo is None:
            end_date = end_date.replace(tzinfo=timezone.utc)

        # Convert to timestamps (ms)
        start_time = int(start_date.timestamp() * 1000)
        end_time = int(end_date.timestamp() * 1000)

        if start_time >= end_time:
            return {"error": "Start date must be before end date."}

        current_start = start_time

        while current_start < end_time:
            params = {
                "symbol": symbol.upper(),
                "interval": interval,
                "limit": max_per_call,
                "startTime": current_start,
                "endTime": end_time,
            }

            url = "https://api.binance.com/api/v3/klines?" + "&".join(
                f"{k}={v}" for k, v in params.items()
            )
            response = requests.get(url)
            response.raise_for_status()  # Raise an error for bad status codes
            data = response.json()

            if not data:
                break

            all_candles.extend(data)  # Extend with raw candle lists

            # Update current_start to the open time of the last candle + 1 ms
            last_open_time = data[-1][0]
            current_start = last_open_time + 1

            # Short delay to respect rate limits
            time.sleep(0.1)

        # Parse raw candles into list of dicts (oldest first)
        ohlcv_list = []
        for candle in all_candles:
            ohlcv_list.append(
                {
                    "open_time": datetime.fromtimestamp(
                        candle[0] / 1000, tz=timezone.utc
                    ).strftime("%Y-%m-%d %H:%M:%S"),
                    "open": float(candle[1]),
                    "high": float(candle[2]),
                    "low": float(candle[3]),
                    "close": float(candle[4]),
                    "volume": float(candle[5]),
                }
            )

        return ohlcv_list

    except (requests.exceptions.RequestException, ValueError) as e:
        return {"error": str(e)}


# List of symbols to fetch (e.g., BTCUSDT for Bitcoin/USDT)
symbols = ["BTCUSDT", "ETHUSDT", "SOLUSDT"]

# Parameters
interval = "1m"  # Change to '1m', '1d', etc.

start_date = datetime(2025, 9, 9, 0, 0, 0, tzinfo=timezone.utc)
end_date = datetime(2025, 9, 18, 23, 59, 59, tzinfo=timezone.utc)

for symbol in symbols:
    data = fetch_ohlcv_data(symbol, interval, start_date, end_date)
    # Transform into arrays
    dataset = {
        "open_time": [],
        "open": [],
        "high": [],
        "low": [],
        "close": [],
        "volume": [],
    }
    for candle in data:
        dataset["open_time"].append(candle["open_time"])
        dataset["open"].append(candle["open"])
        dataset["high"].append(candle["high"])
        dataset["low"].append(candle["low"])
        dataset["close"].append(candle["close"])
        dataset["volume"].append(candle["volume"])

    df = pd.DataFrame(dataset)
    df.to_csv(dataDir / f"{symbol}.csv", index=False)
# To save full data to CSV or process further, you can add code here.

In [None]:
import plotly.express as px

btc = pd.read_csv(dataDir / "BTCUSDT.csv")
px.line(btc, x="open_time", y="close")