Prerequisites
- load trained model weights (RL policy)

Steps
- mapping actions to target exposures: long-spread / short-spread / flat
- translate actions to orders for each short / long leg, using order template (limit/market)

In [None]:
from collections import deque
from decimal import Decimal
import math
import time


try:
# Hummingbot Script base (older/ScriptStrategyBase style). Adjust import path if needed.
    from hummingbot.strategy.script_strategy_base import ScriptStrategyBase
except Exception as e:
# Fallback name in some builds
    from hummingbot.strategy.script_base import ScriptStrategyBase as ScriptStrategyBase

class DummyPolicy:
"""
Placeholder for your trained RL policy. Replace with your real model.
Expect a method `predict(state: dict) -> dict` returning target exposures.
Example action: {"spread": "long" | "short" | "flat", "size_usd": 100}
"""
def __init__(self, path: str = None):
    self.path = path
# load weights here if needed


def predict(self, state: dict) -> dict:
    z = state.get("z_score", 0.0)
    size = state.get("base_size_usd", 100)
    # Simple threshold policy as placeholder
    if z > 2.0:
        return {"spread": "short", "size_usd": size}
    elif z < -2.0:
        return {"spread": "long", "size_usd": size}
    else:
        return {"spread": "flat", "size_usd": 0}

class RLPairsScript(ScriptStrategyBase):
    def _now(self) -> float:
        return time.time()


    def _zscore(self, x: float) -> float:
        self.residuals.append(x)
        if len(self.residuals) < 30: # warmup
            return 0.0
        mean = sum(self.residuals) / len(self.residuals)
        var = sum((r - mean) ** 2 for r in self.residuals) / max(1, len(self.residuals) - 1)
        std = math.sqrt(var) if var > 0 else 1e-9
        return (x - mean) / std


    def _usd_to_amount(self, trading_pair: str, usd: float, price: float) -> Decimal:
    # Convert USD notionals to base asset amounts; respect min increments externally if needed
        base = usd / max(price, 1e-9)
        return Decimal(str(base))


    def _place(self, tp: str, is_buy: bool, amount: Decimal, price: float = None):
        conn = self.connectors[self.exchange]
        if self.use_market or price is None:
            return conn.buy(tp, amount) if is_buy else conn.sell(tp, amount)
        else:
            from hummingbot.core.data_type.common import OrderType
            p = Decimal(str(price))
            ot = OrderType.LIMIT
            return conn.buy(tp, amount, order_type=ot, price=p) if is_buy else conn.sell(tp, amount, order_type=ot, price=p)


    def _cancel_all(self):
        try:
            self.cancel_all_orders()
        except Exception:
            pass


    def _net_exposure_check(self):
    # Placeholder: implement position size checks via position trackers if using perps
        return True


    def _daily_loss_breached(self) -> bool:
    # Simplified PnL check; ideally use connector position PnL
        if self.daily_pnl_start is None:
            self.daily_pnl_start = 0.0
        current_pnl = float(self.performance_metrics.get("total_pnl", 0.0))
        dd = self.daily_pnl_start - current_pnl
        return dd >= self.max_daily_loss_usd



# ---- CORE TICK ----
def on_tick(self):
    if not self._initialized:
    self.logger().info("RLPairsScript starting. Params snapshot: %s", {
    "exchange": self.exchange,
    "pair_a": self.pair_a,
    "pair_b": self.pair_b,
    "beta": self.beta,
    "window": self.window,
    "leverage": self.leverage,
    "use_market": self.use_market,
    })
    self._initialized = True


# Safety checks
if self._daily_loss_breached():
self.logger().warning("Daily loss limit breached. %s", {
"limit": self.max_daily_loss_usd
})
if self.kill_on_breach:
    if self.flatten_on_stop:
        self._cancel_all()
        # TODO: close positions via connector-specific calls if perps
        self.stop()
    return


now = self._now()
if now - self.last_action_ts < self.cooldown_sec:
    return


# 1) Build state
pa = self._mid_price(self.pair_a)
pb = self._mid_price(self.pair_b)
spread = pa - self.beta * pb
z = self._zscore(spread)


state = {
"price_a": pa,
"price_b": pb,
"spread": spread,
"z_score": z,
"base_size_usd": self.base_trade_usd,
}


# 2) Policy â†’ action
action = self.policy.predict(state)
side = action.get("spread", "flat")
notional = float(action.get("size_usd", 0.0))


if notional > self.max_leg_usd:
    notional = self.max_leg_usd


# 3) Translate to leg orders
# Long spread: +A, -B | Short spread: -A, +B
# Requires perps to short; for spot-only, set side="flat".
if side == "flat" or notional <= 0:
# Optionally cancel stale orders


IndentationError: expected an indented block (1386398066.py, line 9)