In [1]:
# 1) Import the downloader (robust to different working dirs)
try:
    from data_minute import download_minute_for_symbols
except Exception:
    import sys, os
    sys.path.append("simplest_ml/w1")
    from data_minute import download_minute_for_symbols

# 2) Resolve output directory relative to current notebook CWD
import os
candidates = [
    "data/minute",                 # when running inside simplest_ml/w1
    # "simplest_ml/w1/data/minute",  # when running from repo root
]
out_dir = None
for p in candidates:
    try:
        os.makedirs(p, exist_ok=True)
        out_dir = p
        break
    except Exception:
        pass
if out_dir is None:
    raise RuntimeError("Cannot create output dir in any candidate path.")

print(f"Saving 1m shards to: {out_dir}")

# 3) Configure symbols and date range
symbols = [
    "BTCUSDT","BNBUSDT","SOLUSDT","ETHUSDT","AVAXUSDT",
    "TRXUSDT","ARBUSDT","SUIUSDT","MATICUSDT"
]
start = "2023-10-30"   # inclusive
end   = "2025-10-30"   # exclusive (API window end)

# 4) Download (prints per-month progress; safe to re-run)
download_minute_for_symbols(
    symbols,
    start=start,
    end=end,
    out_dir=out_dir,
    interval="1m",
    overwrite=False,   # set True to re-pull existing months
    sleep=0.25,        # be polite to API, reduce 429
    verbose=True
)


Saving 1m shards to: data/minute
[minute] BTCUSDT: downloading 1m from 2023-10-30 to 2025-10-30 ...
[fetch] BTCUSDT 1m 202310: 2023-10-30 → 2023-11-01
  -> rows=2,881 saved: data/minute/binance_btcusdt_1m_202310.parquet
[fetch] BTCUSDT 1m 202311: 2023-11-01 → 2023-12-01
  -> rows=43,201 saved: data/minute/binance_btcusdt_1m_202311.parquet
[fetch] BTCUSDT 1m 202312: 2023-12-01 → 2024-01-01
  -> rows=44,641 saved: data/minute/binance_btcusdt_1m_202312.parquet
[fetch] BTCUSDT 1m 202401: 2024-01-01 → 2024-02-01
  -> rows=44,641 saved: data/minute/binance_btcusdt_1m_202401.parquet
[fetch] BTCUSDT 1m 202402: 2024-02-01 → 2024-03-01
  -> rows=41,761 saved: data/minute/binance_btcusdt_1m_202402.parquet
[fetch] BTCUSDT 1m 202403: 2024-03-01 → 2024-04-01
  -> rows=44,641 saved: data/minute/binance_btcusdt_1m_202403.parquet
[fetch] BTCUSDT 1m 202404: 2024-04-01 → 2024-05-01
  -> rows=43,201 saved: data/minute/binance_btcusdt_1m_202404.parquet
[fetch] BTCUSDT 1m 202405: 2024-05-01 → 2024-06-01
  -