In [1]:
import datetime as dt
import lakeapi
import cudf
import pandas as pd
import numpy as np
import os, re, datetime as dt
import cudf, cupy as cp

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# # # ─── 1. PARAMÉTEREK ────────────────────────────────────────────────────────────
# start_date = dt.datetime(2025, 2, 15)
# end_date   = dt.datetime(2025, 2, 28)
# symbol     = "BTC-USDT"
# exchange   = "BINANCE"

### API

In [3]:
# df_book = lakeapi.load_data(
#     table     = "book",
#     start     = start_date,
#     end       = end_date,
#     symbols   = [symbol],
#     exchanges = [exchange],
# )

# # # ─── 3. DINAMIKUS FÁJLNÉV ──────────────────────────────────────────────────────
# file_name = (
#     f"data/book_{symbol.lower().replace('-', '_')}_"
#     f"{start_date:%Y%m%d}_{end_date:%Y%m%d}.parquet"
# )

# # # ─── 4. MENTÉS PARQUET-BE cudf használatával ─────────────────────────────────────────
# df_book.to_parquet(
#     file_name,
#     engine="pyarrow",    # cuDF támogatja a pyarrow és fastparquet engine-t
#     compression="snappy",
# )

# print(f"Mentés kész: {file_name}")


In [4]:
def quality_report(
        symbol: str,
        start_date: str | dt.datetime,
        end_date:   str | dt.datetime,
        data_dir: str = "./data",
        cols: tuple[str,...] = ("origin_time", "received_time"),
):
    """Fájl-streamelt min/max + gap statisztika – max. 1 fájl a GPU-n"""

    # ---- előkészítés -------------------------------------------------------
    sym_pat = symbol.lower().replace("-", "_")
    rex = re.compile(rf"book_{sym_pat}_(\d{{8}})_(\d{{8}})\.parquet$")
    sd = pd.to_datetime(start_date)
    ed = pd.to_datetime(end_date)

    # ---- globális statok (inkrementálisan frissítjük) ----------------------
    min_o = max_o = min_r = max_r = None
    rec_cnt = 0
    largest_gap = pd.Timedelta(0)
    largest_gap_start = None
    missing_secs = set()

    # ---- stream feldolgozás fájlról fájlra ---------------------------------
    for fn in sorted(os.listdir(data_dir)):
        m = rex.match(fn)
        if not m:
            continue
        f_sd = pd.to_datetime(m.group(1))
        f_ed = pd.to_datetime(m.group(2))
        if f_ed < sd or f_sd > ed:
            continue

        fp = os.path.join(data_dir, fn)
        df = cudf.read_parquet(fp, columns=cols, engine="cudf")

        # ↓↓↓   csak a kért dátumtartomány
        df = df[
            (df["received_time"] >= sd) &
            (df["received_time"] <= ed)
        ]

        if df.empty:
            continue

        # ------- min / max frissítés ---------------------------------------
        o_min, o_max = df["origin_time"].min(),   df["origin_time"].max()
        r_min, r_max = df["received_time"].min(), df["received_time"].max()

        min_o = o_min if min_o is None else min(min_o, o_min)
        max_o = o_max if max_o is None else max(max_o, o_max)
        min_r = r_min if min_r is None else min(min_r, r_min)
        max_r = r_max if max_r is None else max(max_r, r_max)

        # ------- gaps a received_time alapján ------------------------------
        recv_sorted = df["received_time"].sort_values().to_pandas()
        gaps = recv_sorted.diff().dropna()
        if not gaps.empty and gaps.max() > largest_gap:
            largest_gap = gaps.max()
            largest_gap_start = recv_sorted.iloc[gaps.argmax()]

        # ------- hiányzó másodpercek (set-union) ---------------------------
        secs = recv_sorted.dt.floor("s").unique()
        missing_secs.update(
            pd.date_range(secs.min(), secs.max(), freq="s").difference(secs)
        )

        rec_cnt += len(df)

        # ------- memória felszabadítás GPU-n -------------------------------
        del df
        cp.get_default_memory_pool().free_all_blocks()

    # ---- report ------------------------------------------------------------
    dur_r = (max_r - min_r) / np.timedelta64(1, "s") if rec_cnt else 0
    print(f"""
[GPU stream report]   {symbol}
    idősáv: {sd}  →  {ed}

Origin_time:   {min_o}  →  {max_o}
Received_time: {min_r}  →  {max_r}
Records: {rec_cnt:,}
Avg rec/s: {rec_cnt/dur_r:.2f}

Largest gap: {largest_gap/np.timedelta64(1,'s'):.2f} s
Gap start  : {largest_gap_start}

Missing whole seconds: {len(missing_secs):,}
""")


In [5]:
quality_report(
        symbol="BTC-USDT",
        start_date=dt.datetime(2024,11,15),
        end_date  =dt.datetime(2025, 2,28),
        data_dir  ="./data",
        cols = ["received_time", "origin_time"],
)


[GPU stream report]   BTC-USDT
    idősáv: 2024-11-15 00:00:00  →  2025-02-28 00:00:00

Origin_time:   1970-01-01T00:00:00.000000000  →  2025-02-27T23:59:59.914000128
Received_time: 2024-11-15T00:00:00.017741312  →  2025-02-27T23:59:59.916363776
Records: 55,555,373
Avg rec/s: 6.12

Largest gap: 164.30 s
Gap start  : 2024-11-25 16:36:30.217062912

Missing whole seconds: 1,678



### cudf-el hosszabb időszakokra

In [6]:
def load_fair_mid_imbalance(
        symbol: str,
        start_date: dt.date | dt.datetime | str,
        end_date:   dt.date | dt.datetime | str,
        data_dir:   str = "./data",
        N:          int = 20,        # depth
        row_batch:  int = 500_000,   # batch-méret
) -> cudf.DataFrame:
    """
    Visszaad egy cuDF-et kizárólag a szükséges feature-ökkel:
        time | fair | mid | imbalance
    minden tickre, a megadott időintervallumban.
    """

    # --- segítség: string → datetime --------------------------------------
    def _to_dt(x):
        if isinstance(x, str):
            return dt.datetime.fromisoformat(x)
        if isinstance(x, dt.date) and not isinstance(x, dt.datetime):
            return dt.datetime.combine(x, dt.time.min)
        return x
    sd, ed = _to_dt(start_date), _to_dt(end_date)

    sym = symbol.lower().replace("-", "_")
    rex = re.compile(rf"book_{sym}_(\d{{8}})_(\d{{8}})\.parquet$")

    price_cols = [f"{side}_{i}_price" for side in ("bid", "ask") for i in range(N)]
    size_cols  = [f"{side}_{i}_size"  for side in ("bid", "ask") for i in range(N)]
    keep_cols  = ["received_time"] + price_cols + size_cols

    # -------- batch-kalkuláció --------------------------------------------
    def _calc(frame: cudf.DataFrame) -> cudf.DataFrame:
        # fair
        vamp, denom = 0.0, 0.0
        for p, s in zip(price_cols, size_cols):
            w     = frame[p] * frame[s]
            vamp += w
            denom+= frame[s]
        fair = vamp / denom
        mid  = (frame["bid_0_price"] + frame["ask_0_price"]) / 2

        # imbalance: (sum bid_size – sum ask_size) / total_depth
        depth_bid = sum(frame[f"bid_{i}_size"] for i in range(N))
        depth_ask = sum(frame[f"ask_{i}_size"] for i in range(N))
        total_depth = depth_bid + depth_ask
        imb = (depth_bid - depth_ask) / total_depth.replace(0, cp.nan)  # elkerüljük a 0-osztást

        return cudf.DataFrame({
            "time": frame["received_time"],
            "fair": fair.astype("float32"),
            "mid" : mid .astype("float32"),
            "imb" : imb .astype("float32"),
        })

    chunks = []

    # -------- fájl-stream --------------------------------------------------
    for fn in sorted(os.listdir(data_dir)):
        m = rex.match(fn)
        if not m:                       # nem passzol a mintára
            continue
        f_sd = dt.datetime.strptime(m.group(1), "%Y%m%d")
        f_ed = dt.datetime.strptime(m.group(2), "%Y%m%d")
        if f_ed < sd or f_sd > ed:      # nem metszik az intervallumot
            continue

        gdf = cudf.read_parquet(
            os.path.join(data_dir, fn),
            columns = keep_cols,
            engine  = "cudf"
        )
        # idősáv-szűrés
        gdf = gdf[(gdf["received_time"] >= sd) & (gdf["received_time"] <= ed)]
        if gdf.empty: continue

        # downcast float32
        for c in price_cols + size_cols:
            gdf[c] = gdf[c].astype("float32")
        cp.get_default_memory_pool().free_all_blocks()

        # batch-feldolgozás
        if len(gdf) > row_batch:
            for i in range(0, len(gdf), row_batch):
                chunk = _calc(gdf.iloc[i:i+row_batch])
                chunks.append(chunk)
                cp.get_default_memory_pool().free_all_blocks()
        else:
            chunks.append(_calc(gdf))
            cp.get_default_memory_pool().free_all_blocks()

    return cudf.concat(chunks, ignore_index=True) if chunks else \
           cudf.DataFrame(columns=["time","fair","mid","imb"])


In [7]:
fm_cu = load_fair_mid_imbalance(
    symbol     = "BTC-USDT",
    start_date = dt.datetime(2024,11,15),
    end_date   = dt.datetime(2025, 2,28),
    data_dir   = "./data",
    N          = 20,
    row_batch  = 400_000
)

print(fm_cu.head())
# time | fair | mid | imb


                           time          fair           mid       imb
0 2024-11-15 00:00:00.017741312  87325.171875  87325.593750  0.025857
1 2024-11-15 00:00:01.967001600  87326.820312  87325.593750 -0.225982
2 2024-11-15 00:00:02.339866624  87320.484375  87321.625000 -0.018794
3 2024-11-15 00:00:02.517839360  87320.203125  87318.164062 -0.611143
4 2024-11-15 00:00:02.805629952  87309.242188  87307.890625 -0.265398


### Szórás alapú threshold - elévült

In [13]:
# import numpy as np, pandas as pd, cudf

# def trading_pnl_realtime(
#         fm_cu,                   # fair–mid DataFrame (cuDF vagy pandas)
#         book_df,                 # teljes order-book  (pandas)
#         window="24h",
#         price_col="bid_0_price",
#         return_trade_log=False
# ):
#     # ---- 0) normalizálás pandasra, rendezés ---------------------------------
#     fm = fm_cu.to_pandas() if isinstance(fm_cu, cudf.DataFrame) else fm_cu.copy()
#     fm["time"]   = pd.to_datetime(fm["time"], errors="coerce")
#     fm = fm.dropna(subset=["time", "fair", "mid"]).sort_values("time")
#     fm["signal"] = fm["fair"] - fm["mid"]

#     # ---- 1) SOR-alapú rolling σ  -------------------------------------------
#     N = int(6 * pd.Timedelta(window).total_seconds())      # ≈ 6 tick/sec * window
#     fm["thr"] = (
#         fm["signal"]
#           .rolling(window=N, min_periods=N, closed="left")
#           .std(ddof=0)
#     )

#     # ---- 2) Benchmark return ugyanebből az időtartományból -----------------
#     trading_start = fm["time"].min() + pd.Timedelta(window)
#     trading_end   = fm["time"].max()

#     bench_slice = book_df[book_df["received_time"] >= trading_start]
#     bench_ret   = np.nan if bench_slice.empty else (
#         bench_slice[price_col].iloc[-1] / bench_slice[price_col].iloc[0]
#     )

#     # ---- 3) Pozíció-séta ----------------------------------------------------
#     pos = 0; entry = None; cum_ret = 1.0; trades = 0; log = []
#     for ts, row in fm.iterrows():
#         thr   = row.thr
#         if np.isnan(thr) or thr == 0:
#             continue

#         sig, price = row.signal, row.mid

#         if pos == 0:                               # flat  →  entry
#             if sig >  thr:
#                 pos, entry, trades =  1, price, trades+1
#                 log.append((ts, "enter_long",  price, np.nan))
#             elif sig < -thr:
#                 pos, entry, trades = -1, price, trades+1
#                 log.append((ts, "enter_short", price, np.nan))

#         elif pos == 1 and sig < -thr:              # long → flat
#             ret       = (price-entry)/entry
#             cum_ret  *= 1 + ret
#             trades   += 1
#             log.append((ts, "exit_long",  price, ret))
#             pos, entry = 0, None

#         elif pos == -1 and sig >  thr:             # short → flat
#             ret       = (entry-price)/entry
#             cum_ret  *= 1 + ret
#             trades   += 1
#             log.append((ts, "exit_short", price, ret))
#             pos, entry = 0, None

#     trade_log = (
#         pd.DataFrame(log, columns=["time", "action", "price", "ret"])
#         if return_trade_log else None
#     )

#     # ---- 4) sigma_df most már thr és signal oszloppal ----------------------
#     sigma_df = fm[["time", "thr", "signal"]].copy()

#     # ▼────────────────────────  DIAG: thr NaN-arány  ────────────────────────
#     period_mask = (sigma_df["time"] >= trading_start) & (sigma_df["time"] <= trading_end)
#     nan_ratio   = sigma_df.loc[period_mask, "thr"].isna().mean()
#     print(f"thr NaN aránya a kereskedési időszakban: {nan_ratio:6.2%}")
#     # ▲────────────────────────────────────────────────────────────────────────

#     return cum_ret, trades, bench_ret, trade_log, sigma_df


In [14]:
# cum_ret, n_trades, bench_ret, trade_log, sigma_df = trading_pnl_realtime(
#     fm, df_book, window="24h", return_trade_log=True
# )

# print(f"Stratégia hozam:     {cum_ret: .4f}×")
# print(f"Benchmark (bid₀):    {bench_ret: .4f}×")
# print(f"Trade-ek száma:      {n_trades}")


### Szórás alapú threshold ellenőrzése

In [15]:
# # 1) Extrém ret-ek listája
# extreme = trade_log[trade_log.ret.abs() > 0.2]
# print("Extrém ret-ek száma:", len(extreme))
# display(extreme.head())

# # 2) Trades / másodperc
# trade_log['time'] = pd.to_datetime(trade_log['time'])
# trade_log['sec']  = trade_log['time'].dt.floor('S')
# print("Átlag trades/sec:", trade_log.groupby('sec').size().mean())

# # 3) Threshold stat
# thr_stats = sigma_df['thr'].describe()
# print("\n--- Threshold statisztika ---")
# print(thr_stats)

# # 4) Jelek mekkora része lépi át közvetlenül a thr-t?
# #   (itt nincs 2*thr, csak abs(signal)>thr)
# df_active = sigma_df.dropna(subset=['thr','signal'])
# ratio    = (df_active['signal'].abs() > df_active['thr']).mean()
# print(f"\nA jel {ratio:.2%}-ában lépi át a thr-t")

# # Példa: feltételezzük, hogy sigma_df létezik és tartalmazza a 'time' és a 'thr' oszlopokat.

# # 1) Perc alapú csoportosítás: lekerekítjük a timestampeket percre
# sigma_df['minute'] = sigma_df['time'].dt.floor('T')

# # 2) Kiszűrjük azokat a perceket, ahol a thr nem NaN
# valid_minutes = (
#     sigma_df.loc[sigma_df['thr'].notna(), 'minute']
#     .drop_duplicates()
#     .sort_values()
#     .reset_index(drop=True)
# )

# # 3) Keresünk folyamatos futamokat (egymást követő percek)
# intervals = []
# if not valid_minutes.empty:
#     start = valid_minutes.iloc[0]
#     prev  = start
#     for current in valid_minutes.iloc[1:]:
#         if current - prev == pd.Timedelta('1T'):
#             prev = current
#         else:
#             intervals.append((start, prev))
#             start = current
#             prev  = current
#     intervals.append((start, prev))

# # 4) Kiíratás
# print("Threshold aktív intervallumok (percre aggregálva):")
# for s, e in intervals:
#     print(f"  • {s} → {e}")


In [8]:
import cudf, cupy as cp, pandas as pd

def extract_extremes_ticks(
        fm_cu,                    # cuDF vagy pandas – 'time' + ár
        window="24h",             # rolling ablak hossza
        q=0.99,                   # felső kvantilis (pl. 0.995 → 0.5-0.5 %)
        price_col="fair",
        tick_per_sec=6.0          # átlag tick/s (ha pontos értéket tudsz, add meg)
):
    """
    Tick-szintű extrém pontok (top / bottom) visszaadása.

    Visszatér: pandas DataFrame ['time','price','zone'] ahol zone ∈ {'top','bottom'}
    """
    # -- 0) cuDF-re, rendezés ------------------------------------------------
    gdf = fm_cu if isinstance(fm_cu, cudf.DataFrame) else cudf.from_pandas(fm_cu)
    gdf = gdf[["time", price_col]].copy()
    gdf["time"] = cudf.to_datetime(gdf["time"])
    gdf         = gdf.sort_values("time")

    # -- 1) rolling kvantilis tick-szinten ----------------------------------
    N = int(pd.Timedelta(window).total_seconds() * tick_per_sec)

    def _q(arr, qq):
        return cp.nanquantile(cp.asarray(arr, dtype=cp.float32), qq)

    try:   # ***** GPU *****
        gdf["top"] = (
            gdf[price_col].rolling(window=N, min_periods=N)
                           .apply(_q, raw=True, args=(q,))
        )
        gdf["bot"] = (
            gdf[price_col].rolling(window=N, min_periods=N)
                           .apply(_q, raw=True, args=(1-q,))
        )
    except Exception:
        # ***** CPU fallback – kicsi a többlet-idő *****
        pdf = gdf.to_pandas()
        pdf["top"] = pdf[price_col].rolling(N, min_periods=N).quantile(q)
        pdf["bot"] = pdf[price_col].rolling(N, min_periods=N).quantile(1-q)
        gdf = cudf.from_pandas(pdf)

    # -- 2) zone + szűrés ----------------------------------------------------
    gdf["zone"] = "mid"
    m_top       = gdf[price_col] >= gdf["top"]
    m_bot       = gdf[price_col] <= gdf["bot"]
    gdf.loc[m_top, "zone"] = "top"
    gdf.loc[m_bot, "zone"] = "bottom"

    extremes = gdf.loc[m_top | m_bot, ["time", price_col, "zone"]]  \
                  .rename(columns={price_col: "price"})

    return extremes.to_pandas().reset_index(drop=True)


In [9]:
ext = extract_extremes_ticks(
    fm_cu     = fm_cu,          # tick-szintű fair-mid cuDF
    window    = "24h",
    q         = 0.99998,       # felső 0.5 %  / alsó 0.5 %
    price_col = "fair",
    tick_per_sec = 6.12      # vagy amit a statisztikád mutat
)

print(ext.head())


                           time         price zone
0 2024-11-15 20:49:27.616990720  91215.757812  top
1 2024-11-15 20:49:27.718318080  91220.875000  top
2 2024-11-15 20:49:27.816460288  91221.257812  top
3 2024-11-15 20:49:28.019159296  91223.093750  top
4 2024-11-15 20:49:28.116102912  91222.773438  top


In [10]:
# --- 0) Idő szerint rendezés (ha még nem) ----------------------------
fm_cu = fm_cu.sort_values("time")

# --- 1) spread = mid − fair  (float32, hogy kíméljük a memóriát) ------
fm_cu["spread"] = (fm_cu["mid"] - fm_cu["fair"]).astype("float32")

# --- 2) 1-perces rolling σ  ------------------------------------------
#     (≈ 60 s × 6 tick/s  →  360 tick ablak)
N = 60 * 6                # 360
fm_cu["sigma"] = (
    fm_cu["spread"]
      .rolling(window=N, min_periods=N)
      .std(ddof=0)         # ddof=0, ugyanúgy mint a pandas-példában
      .astype("float32")
)

# --- 3) distance_to_fair  --------------------------------------------
fm_cu["dist_to_fair"] = fm_cu["spread"] / fm_cu["sigma"]

#   (ha a 0-s szórások miatt végtelenek/NaN-ok jönnének létre, kezelheted:)
fm_cu["dist_to_fair"] = fm_cu["dist_to_fair"].fillna(0).replace([cp.inf, -cp.inf], 0)

# --- 4) 'imb' már benne volt; semmi konverzió nem kell ---------------

# (opcionális) a temp 'spread' oszlopot törölheted
# fm_cu = fm_cu.drop_columns("spread")


In [16]:
def columnwise_minmax_pct_cu(df_cu: cudf.DataFrame,
                             p_low: float = 0.01,
                             p_high: float = 0.99,
                             sample: int | None = None) -> cudf.DataFrame:
    """
    missing_% | min | p_low | p_high | max   minden oszlopra, cuDF-ben.
    Igény szerint mintavételez: ha sample!=None és a sorok száma > sample,
    előbb `.sample(sample, random_state=0)`-t vesz a kvantilishez.
    """
    rows, cols = [], []
    for col in df_cu.columns:
        s = df_cu[col]
        miss = float(s.isna().mean() * 100)

        if cudf.api.types.is_numeric_dtype(s.dtype):
            # opcionális mintavétel kvantilishez
            s_q = s.sample(sample, random_state=0) if sample and len(s) > sample else s

            q = s_q.quantile([p_low, p_high])      # mini-cuDF
            lo, hi = float(q.iloc[0]), float(q.iloc[1])

            mn = float(s.min())
            mx = float(s.max())
            row = [miss, mn, lo, hi, mx]
        else:
            row = [miss, None, None, None, None]

        rows.append(row)
        cols.append(col)

        # GPU-memória takarítás
        del s
        cp.get_default_memory_pool().free_all_blocks()

    out = cudf.DataFrame(rows,
            columns=["missing_%", "min",
                     f"p{int(p_low*100)}", f"p{int(p_high*100)}", "max"])
    out.insert(0, "column", cols)          # hogy legyen oszlopnév is
    return out


# --- hívás ---------------------------------------------------------------
report_cu = columnwise_minmax_pct_cu(fm_cu, p_low=0.01, p_high=0.99, sample=1_000_000)

print(report_cu)         # továbbra is cuDF, marad GPU-n
# ha CPU-n szeretnéd nézni:
# print(report_cu.to_pandas().head())


         column  missing_%           min            p1          p99  \
0          time   0.000000          <NA>          <NA>         <NA>   
1          fair   0.000000   82258.15625   86436.82617    106628.06   
2           mid   0.000000   82256.01563      86436.78     106628.0   
3           imb   0.000000  -0.999823332  -0.986059854  0.986911363   
4        spread   0.000000   -903.546875     -2.609375    2.6015625   
5         sigma   0.000646   0.004667363    0.03304407  2.636027441   
6  dist_to_fair   0.000000   -101.365921  -3.490539446  3.527764509   

           max  
0         <NA>  
1  109583.5156  
2  109574.7656  
3  0.999962807  
4    765.15625  
5  155.2344666  
6  76.98890686  


In [12]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import cudf

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 0) fm és ext biztos pandas DataFrame
if isinstance(fm_cu, cudf.DataFrame):
    fm_pdf = fm_cu.to_pandas()
else:
    fm_pdf = fm_cu.copy()

if isinstance(ext, cudf.DataFrame):
    ext_pdf = ext.to_pandas()
else:
    ext_pdf = ext.copy()

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 1) perc-átlagolt ár (price_min)
price_min = (
    fm_pdf[["time", "fair"]]
      .assign(time=lambda df: pd.to_datetime(df["time"]).dt.floor("min"))
      .groupby("time", as_index=False)["fair"].mean()
      .rename(columns={"fair": "price"})
      .sort_values("time")
)

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 2) extrémek perc-re kerekítve (ext_min)
ext_min = (
    ext_pdf[["time", "zone"]]
      .assign(time=lambda df: pd.to_datetime(df["time"]).dt.floor("min"))
      .drop_duplicates("time")
)

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 3) merge + alap zone = 'mid'
price_min = price_min.merge(ext_min, on="time", how="left")
price_min["zone"] = price_min["zone"].fillna("mid")

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 4) Plotly-ábra
fig = make_subplots(rows=1, cols=1, shared_xaxes=True)

# 4.a) háttér: teljes perces árfolyam
fig.add_trace(go.Scatter(
    x=price_min["time"], y=price_min["price"],
    mode="lines",
    line=dict(color="rgba(130,130,130,0.35)", width=1),
    hoverinfo="skip", showlegend=False
))

# 4.b) extrém szakaszok: top=zöld, bottom=piros
cmap      = {"top": "seagreen", "bottom": "firebrick"}
is_extrem = price_min["zone"] != "mid"
grp_idx   = (is_extrem != is_extrem.shift()).cumsum()

for _, seg in price_min[is_extrem].groupby(grp_idx):
    z = seg["zone"].iat[0]
    fig.add_trace(go.Scatter(
        x=seg["time"], y=seg["price"],
        mode="lines",
        line=dict(color=cmap[z], width=1.6),
        showlegend=False
    ))

# 5) tengelyek, layout
fig.update_yaxes(title_text="Price")
fig.update_layout(
    height=600, width=950,
    hovermode="x unified",
    template="simple_white"
)

# ―――――――――――――――――――――――――――――――――――――――――――――――――
# 6) dátumok kinyerése és HTML mentés névvel
start_str = price_min["time"].min().strftime("%Y%m%d")
end_str   = price_min["time"].max().strftime("%Y%m%d")
filename  = f"price_extremes_{start_str}_{end_str}.html"

fig.write_html(filename, include_plotlyjs="cdn")
print(f"✔ {filename} elkészült")


✔ price_extremes_20241115_20250227.html elkészült
