In [1]:
import requests
import zipfile
import io
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from tqdm.notebook import tqdm



In [2]:
def get_perpetual_symbols():
    url = "https://fapi.binance.com/fapi/v1/exchangeInfo"
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()
    symbols = [
        s['symbol']
        for s in data['symbols']
        if s['contractType'] == 'PERPETUAL' and s['status'] == 'TRADING'
    ]
    return symbols

def download_and_extract(symbol, interval, year, month, out_dir):
    month_str = f"{month:02d}"
    url = (
        f"https://data.binance.vision/data/futures/um/monthly/klines/"
        f"{symbol}/{interval}/{symbol}-{interval}-{year}-{month_str}.zip"
    )
    extract_path = os.path.join(out_dir, symbol)
    os.makedirs(extract_path, exist_ok=True)
    csv_filename = f"{symbol}-{interval}-{year}-{month_str}.csv"
    csv_path = os.path.join(extract_path, csv_filename)
    if os.path.exists(csv_path):
        return f"Already exists: {csv_path}"
    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code == 200:
            with zipfile.ZipFile(io.BytesIO(resp.content)) as z:
                z.extractall(extract_path)
            return f"Downloaded: {csv_path}"
        else:
            return f"Not found: {url}"
    except Exception as e:
        return f"Error for {symbol} {year}-{month_str}: {e}"

def generate_tasks(symbols, interval, start_year, start_month, end_year, end_month):
    tasks = []
    for symbol in symbols:
        for year in range(start_year, end_year + 1):
            for month in range(1, 13):
                if (year == start_year and month < start_month) or (year == end_year and month > end_month):
                    continue
                tasks.append((symbol, interval, year, month))
    return tasks

In [3]:
def main():
    interval = "1m"
    out_dir = "binance_klines"
    start_year, start_month = 2016, 1
    end_year, end_month = 2024, 12
    max_workers = 16  # Adjust based on your bandwidth/CPU

    print("Fetching perpetual symbols...")
    symbols = get_perpetual_symbols()
    print(f"Found {len(symbols)} perpetual symbols.")

    print("Generating download tasks...")
    tasks = generate_tasks(symbols, interval, start_year, start_month, end_year, end_month)
    print(f"Total download tasks: {len(tasks)}")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_task = {
            executor.submit(download_and_extract, symbol, interval, year, month, out_dir): (symbol, year, month)
            for (symbol, interval, year, month) in tasks
        }
        for i, future in enumerate(as_completed(future_to_task), 1):
            result = future.result()
            print(f"[{i}/{len(future_to_task)}] {result}")

if __name__ == "__main__":
    main()


Fetching perpetual symbols...
Found 480 perpetual symbols.
Generating download tasks...
Total download tasks: 51840
[1/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-11.zip
[2/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-02.zip
[3/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-10.zip
[4/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-12.zip
[5/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-02.zip
[6/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-07.zip
[7/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2016-09.zip
[8/51840] Not found: https://data.binance.vision/data/futures/um/monthly/klines/BTCUSD