In [1]:
import pandas as pd
import numpy as np
import glob
from pathlib import Path
from tqdm.auto import tqdm  

from gym_trading_env.downloader import download
import datetime

In [2]:
download_dir = "./data/raw"
processed_dir = "./data/processed/"

TARGET_TIMEFRAME = pd.Timedelta("30m")
download_timeframe = "5m"

# Download

In [5]:
download(
    exchange_names = ["binance"],
    symbols= ["BTC/USDT", "ETH/USDT"],
    timeframe= download_timeframe,
    dir = download_dir,
    since= datetime.datetime(year= 2019, month= 1, day=1),
    # since= datetime.datetime(year= 2023, month= 6, day=1),
)

BTC/USDT downloaded from binance and stored at ./data/raw/binance-BTCUSDT-5m.pkl
ETH/USDT downloaded from binance and stored at ./data/raw/binance-ETHUSDT-5m.pkl


# Process

In [4]:
pathes = glob.glob(f"{download_dir}/*pkl")
for path in tqdm(pathes):
    name = Path(path).name.split(".")[0]
    df  = pd.read_pickle(path)
    timeframe = (df.index - df.index.to_series().shift(1)).value_counts().index[0]
    for offset in range(TARGET_TIMEFRAME//timeframe):
        process_df = df.resample("30min", offset= offset*timeframe).agg({
            "date_close": lambda x : x[-1] if len(x) > 0 else np.nan,
            "open": lambda x : x[0] if len(x) > 0 else np.nan,
            "high": lambda x : max(x) if len(x) > 0 else np.nan,
            "low": lambda x : min(x) if len(x) > 0 else np.nan,
            "close": lambda x : x[-1] if len(x) > 0 else np.nan,
            "volume": lambda x : sum(x) if len(x) > 0 else np.nan
        })[1:-1]
        process_df.dropna(inplace = True)
        process_df.to_pickle(f"./data/processed/{name}-{offset}.pkl")

  0%|          | 0/2 [00:00<?, ?it/s]