In [1]:
import polars as pl
import numpy as np
import os
import datetime
import urllib.request
from tqdm import tqdm
import zipfile

In [2]:
def get_klines_data(trading_pair: str, from_date: datetime, number_of_days: int, download_dir: str):
    trading_pair = trading_pair.upper()
    date_list = [from_date - datetime.timedelta(days=x) for x in range(1, number_of_days + 1)]
    os.makedirs(download_dir, exist_ok=True)
    data = pl.DataFrame()
    print(f"DOWNLOADING {number_of_days} FILES TO {download_dir} AND GENERATING index price klines")
    for current_date in tqdm(date_list):
        try:
            zip_file_path = f"./{download_dir}/{trading_pair}_{current_date.strftime('%Y-%m-%d')}"
            urllib.request.urlretrieve(f"https://data.binance.vision/data/futures/um/daily/indexPriceKlines/{trading_pair}/1m/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.zip", zip_file_path)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(download_dir)
            os.remove(zip_file_path)
            file = f"./{download_dir}/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.csv"
            downloaded = pl.read_csv(file)
            os.remove(file)
            downloaded = downloaded.select(pl.col(["close_time", "open", "high", "low", "close"]))
            downloaded = downloaded.rename({"close_time": "time"})
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='ms'))
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='us')).collect()
            data = pl.concat([data, downloaded])
        except Exception as e:
            print(f"Something went wrong with {trading_pair} {current_date}: {e}")
    return data

In [3]:
trading_pair = "ETHUSDT"
trading_pair = trading_pair.upper()
from_date = datetime.datetime.strptime('23032023', "%d%m%Y").date()
download_dir = f"./{trading_pair}_daily_trades_data_{from_date.strftime('%Y-%m-%d')}"
df = get_klines_data(trading_pair, from_date, 136, "ETHUSDT_trades")

DOWNLOADING 136 FILES TO ETHUSDT_trades AND GENERATING index price klines


100%|██████████| 136/136 [03:07<00:00,  1.38s/it]


In [4]:
df.shape

(195840, 5)

In [5]:
df.head(30)

time,open,high,low,close
datetime[μs],f64,f64,f64,f64
2023-03-22 00:00:59.999,1801.387179,1801.849231,1800.702564,1801.545128
2023-03-22 00:01:59.999,1801.545128,1801.97359,1800.793333,1800.950513
2023-03-22 00:02:59.999,1800.955641,1801.485128,1800.498205,1800.769487
2023-03-22 00:03:59.999,1800.764359,1801.505641,1800.764359,1801.050769
2023-03-22 00:04:59.999,1801.073846,1801.26359,1800.798462,1801.144872
2023-03-22 00:05:59.999,1801.144872,1801.180256,1800.404872,1800.555897
2023-03-22 00:06:59.999,1800.561026,1800.594103,1799.745641,1800.388718
2023-03-22 00:07:59.999,1800.378462,1800.391026,1799.582564,1799.713333
2023-03-22 00:08:59.999,1799.907179,1800.967436,1799.907179,1800.303846
2023-03-22 00:09:59.999,1800.303846,1800.308974,1799.734872,1799.734872


In [6]:
df.write_parquet("./ETHUSDT_INDEX_PRICE_136_23032023.parquet")