In [1]:
import polars as pl
import numpy as np
import os
import datetime
import urllib.request
from tqdm import tqdm
import zipfile

In [2]:
def get_klines_data(trading_pair: str, from_date: datetime, number_of_days: int, download_dir: str):
    trading_pair = trading_pair.upper()
    date_list = [from_date - datetime.timedelta(days=x) for x in range(1, number_of_days + 1)]
    os.makedirs(download_dir, exist_ok=True)
    data = pl.DataFrame()
    print(f"DOWNLOADING {number_of_days} FILES TO {download_dir} AND GENERATING mark price klines")
    for current_date in tqdm(date_list):
        try:
            zip_file_path = f"./{download_dir}/{trading_pair}_{current_date.strftime('%Y-%m-%d')}"
            urllib.request.urlretrieve(f"https://data.binance.vision/data/futures/um/daily/markPriceKlines/{trading_pair}/1m/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.zip", zip_file_path)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(download_dir)
            os.remove(zip_file_path)
            file = f"./{download_dir}/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.csv"
            downloaded = pl.read_csv(file)
            os.remove(file)
            downloaded = downloaded.select(pl.col(["close_time", "open", "high", "low", "close"]))
            downloaded = downloaded.rename({"close_time": "time"})
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='ms'))
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='us')).collect()
            data = pl.concat([data, downloaded])
        except Exception as e:
            print(f"Something went wrong with {trading_pair} {current_date}: {e}")
    return data

In [3]:
trading_pair = "ETHUSDT"
trading_pair = trading_pair.upper()
from_date = datetime.datetime.strptime('23032023', "%d%m%Y").date()
download_dir = f"./{trading_pair}_daily_trades_data_{from_date.strftime('%Y-%m-%d')}"
df = get_klines_data(trading_pair, from_date, 136, "SOLUSDT_trades")

DOWNLOADING 136 FILES TO SOLUSDT_trades AND GENERATING mark price klines


100%|██████████| 136/136 [03:02<00:00,  1.34s/it]


In [4]:
df.shape

(195840, 5)

In [5]:
df.head(30)

time,open,high,low,close
datetime[μs],f64,f64,f64,f64
2023-03-22 00:00:59.999,1800.385763,1801.08,1799.695763,1800.52166
2023-03-22 00:01:59.999,1800.52166,1801.36,1799.94,1800.13
2023-03-22 00:02:59.999,1800.13,1800.99,1799.84,1800.06
2023-03-22 00:03:59.999,1800.06,1801.0,1800.05,1800.53
2023-03-22 00:04:59.999,1800.53,1800.77,1800.024079,1800.44
2023-03-22 00:05:59.999,1800.44,1800.61,1799.657776,1799.805028
2023-03-22 00:06:59.999,1799.805028,1799.92,1798.980513,1799.640564
2023-03-22 00:07:59.999,1799.630308,1799.73,1798.790436,1799.08
2023-03-22 00:08:59.999,1799.14,1800.45,1799.14,1799.535925
2023-03-22 00:09:59.999,1799.500891,1799.506019,1798.902733,1798.902733


In [6]:
df.write_parquet("./ETHUSDT_MARK_PRICE_136_23032023.parquet")