In [1]:
import polars as pl
import numpy as np
import os
import datetime
import urllib.request
from tqdm import tqdm
import zipfile

In [2]:
def get_klines_data(trading_pair: str, from_date: datetime, number_of_days: int, download_dir: str):
    trading_pair = trading_pair.upper()
    date_list = [from_date - datetime.timedelta(days=x) for x in range(1, number_of_days + 1)]
    os.makedirs(download_dir, exist_ok=True)
    data = pl.DataFrame()
    print(f"DOWNLOADING {number_of_days} FILES TO {download_dir} AND GENERATING premium index klines")
    for current_date in tqdm(date_list):
        try:
            zip_file_path = f"./{download_dir}/{trading_pair}_{current_date.strftime('%Y-%m-%d')}"
            urllib.request.urlretrieve(f"https://data.binance.vision/data/futures/um/daily/premiumIndexKlines/{trading_pair}/1m/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.zip", zip_file_path)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(download_dir)
            os.remove(zip_file_path)
            file = f"./{download_dir}/{trading_pair}-1m-{current_date.strftime('%Y-%m-%d')}.csv"
            downloaded = pl.read_csv(file)
            os.remove(file)
            downloaded = downloaded.select(pl.col(["close_time", "open", "high", "low", "close"]))
            downloaded = downloaded.rename({"close_time": "time"})
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='ms'))
            downloaded = downloaded.lazy().with_columns(pl.from_epoch("time", unit='us')).collect()
            data = pl.concat([data, downloaded])
        except Exception as e:
            print(f"Something went wrong with {trading_pair} {current_date}: {e}")
    return data

In [3]:
trading_pair = "ETHUSDT"
trading_pair = trading_pair.upper()
from_date = datetime.datetime.strptime('23032023', "%d%m%Y").date()
download_dir = f"./{trading_pair}_daily_trades_data_{from_date.strftime('%Y-%m-%d')}"
df = get_klines_data(trading_pair, from_date, 136, "SOLUSDT_trades")

DOWNLOADING 136 FILES TO SOLUSDT_trades AND GENERATING premium index klines


100%|██████████| 136/136 [02:57<00:00,  1.31s/it]


In [4]:
df.shape

(195840, 5)

In [5]:
df.head(30)

time,open,high,low,close
datetime[μs],f64,f64,f64,f64
2023-03-22 00:00:59.999,-0.000692,-0.000384,-0.000791,-0.000616
2023-03-22 00:01:59.999,-0.000597,-0.000199,-0.000597,-0.000472
2023-03-22 00:02:59.999,-0.000337,-0.000174,-0.000482,-0.000306
2023-03-22 00:03:59.999,-0.000394,-0.000133,-0.000496,-0.000229
2023-03-22 00:04:59.999,-0.000284,-0.000207,-0.000583,-0.00022
2023-03-22 00:05:59.999,-0.000385,-0.000344,-0.000591,-0.000495
2023-03-22 00:06:59.999,-0.000542,-0.00005,-0.000672,-0.00005
2023-03-22 00:07:59.999,-0.000549,-0.000386,-0.000638,-0.000599
2023-03-22 00:08:59.999,-0.000172,-0.000172,-0.000497,-0.000313
2023-03-22 00:09:59.999,-0.000496,-0.000433,-0.000647,-0.0005


In [6]:
df.write_parquet("./ETHUSDT_PREMIUM_INDEX_136_23032023.parquet")