# Hull Tactical Market Prediction

### Import Libralies

In [202]:
# Utilities
import os
from pathlib import Path
import pandas as pd
import numpy as np

# Visualization
from colorama import Fore, Style
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from dataclasses import dataclass, asdict

# Models
import lightgbm as lgb

# Submission
import polars as pl
import kaggle_evaluation.default_inference_server

In [203]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [204]:
INNER_VAL_LEN = 180
TRADING_DAYS_PER_YR = 252

### Definition

In [205]:
# ============ RETURNS TO SIGNAL CONFIGS ============
MIN_SIGNAL: float = 0.0                         # Minimum value for the daily signal 
MAX_SIGNAL: float = 2.0                         # Maximum value for the daily signal 
SIGNAL_MULTIPLIER: float = 7.5                 # Multiplier of the OLS market forward excess returns predictions to signal 

@dataclass(frozen=True)
class RetToSignalParameters:
    signal_multiplier: float 
    min_signal : float = MIN_SIGNAL
    max_signal : float = MAX_SIGNAL

ret_signal_params = RetToSignalParameters(
    signal_multiplier= SIGNAL_MULTIPLIER
)

def convert_ret_to_signal(
    ret_arr: np.ndarray,
    params: RetToSignalParameters,
    signal_multiplier=None
) -> np.ndarray:
    """
    Converts raw model predictions (expected returns) into a trading signal.

    Args:
        ret_arr (np.ndarray): The array of predicted returns.
        params (RetToSignalParameters): Parameters for scaling and clipping the signal.

    Returns:
        np.ndarray: The resulting trading signal, clipped between min and max values.
    """
    
    # 予測値を基準に，投資戦略シグナルに変換
    # ret * signal_multiplier + 1 を min_signal ~ max_signal の範囲にクリップ
    if signal_multiplier is None:
        multi = params.signal_multiplier    
    else:
        multi = signal_multiplier
    
    ret = np.clip(
        ret_arr * multi + 1,
        params.min_signal, 
        params.max_signal
    )

    if ret.size < 20:
        print(f"Strategy:")
        for i, value in enumerate(ret): print(f'  {i}: {value:.4f}')

    return ret

In [206]:
# convert_ret_to_signalの動作確認
# 20個の乱数(0~1)
hoge = convert_ret_to_signal(np.array([5, 0.1, 0.3, -0.2, 1.3]), ret_signal_params)

Strategy:
  0: 2.0000
  1: 1.7500
  2: 2.0000
  3: 0.0000
  4: 2.0000


In [207]:
# ============ LOAD DATA ============
# プラットフォームがkaggleかローカルかで分岐
if os.getenv('KAGGLE_KERNEL_RUN_TYPE') is not None:
    # Kaggle上
    DATA_PATH: Path = Path('/kaggle/input/hull-tactical-market-prediction/')
else:
    BASE_PATH = Path.cwd()
    DATA_PATH: Path = BASE_PATH / 'data'


train = pd.read_csv(DATA_PATH / "train.csv")
test = pd.read_csv(DATA_PATH / "test.csv")

### Scoreing

In [208]:
class ParticipantVisibleError(Exception):
    # Custom error to show messages to participants
    pass

def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, intermediate_res:list = []) -> float:
    """
    Calculates a custom evaluation metric (volatility-adjusted Sharpe ratio).

    This metric penalizes strategies that take on significantly more volatility
    than the underlying market.

    Returns:
        float: The calculated adjusted Sharpe ratio.
    """
    solution = solution.copy().reset_index(drop=True)
    submission = submission.copy().reset_index(drop=True)
    solution['position'] = submission['prediction']

    # ありえない値を除外する (0 <= position <= 2)
        # 0 means that we don't invest in S & P at all but get only the risk-free rate.
        # 1 means that we invest all our money in S & P.
        # 2 means that we invest twice our capital in S & P while taking a credit at the risk-free rate.
        # -> つまり，普通に預金するか，S&Pに投資するか，S&Pに2倍レバレッジで投資するか（借金）の割合
    if solution['position'].max() > MAX_SIGNAL:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_SIGNAL}')
    if solution['position'].min() < MIN_SIGNAL:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_SIGNAL}')

    # Calculate strategy returns
    # フェデラルファンド金利(利息) * (1-予測値) + 予測値 * S&P500の翌日のリターン = 戦略のリターン(割合)
    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    # リターンとその標準偏差を用いてシャープレシオ（リスクあたりの効率）を計算
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate'] # 超過リターン -> 今回の戦略で得た割合から，リスクフリー時の割合を引いた分
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod() # 累積超過リターン -> 全期間の超過リターンをかけ合わせた分(1+で倍率に変換)
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1 # 平均超過リターン -> 複利は幾何平均で求める． また，倍率から割合に戻してる
    strategy_std = solution['strategy_returns'].std() # リターンの標準偏差

    trading_days_per_yr = 252 # 1年あたりの取引日数(固定値)
    if strategy_std == 0:
        raise ZeroDivisionError
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr) # 年率換算したシャープレシオ. sqrt(252)をかけることで年率換算している（統計的な性質らしい）
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)  # 年率換算したボラティリティ(価格変動率)

    # Calculate market return and volatility
    # S&P500に投資し続けた場合のリターンとボラティリティを計算
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate'] # S&P500が利息を上回る割合
    market_excess_cumulative = (1 + market_excess_returns).prod() # ↑の累積
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1 # train: 0.0003066067595838273 幾何平均，割合化
    market_std = solution['forward_returns'].std() # S&P500のリターンの標準偏差
    
    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100) # train: 16.748459963166347 %
    
    # Calculate the volatility penalty
    # ボラティリティペナルティを計算
    # -> 市場のボラティリティの1.2倍を超える場合のペナルティ
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    # リターンペナルティを計算
    # -> 市場のリターンを下回る場合のペナルティ
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100

    # Adjust the Sharpe ratio by the volatility and return penalty
    # ペナルティ値の反映
    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)

    # print("strategy_excess_returns NaN数:", solution['strategy_returns'].isna().sum())
    # print("strategy_std:", strategy_std)
    # print("strategy_excess_cumulative:", strategy_excess_cumulative)
    # print("market_excess_cumulative:", market_excess_cumulative)
    # print("adjusted_sharpe:", adjusted_sharpe)
    try:
        intermediate_res.append((strategy_mean_excess_return, strategy_std, sharpe, vol_penalty, return_penalty)) # 各値を記録(debug)
        return min(float(adjusted_sharpe), 1_000_000), intermediate_res # float変換，上限100万
    except NameError:
        return min(float(adjusted_sharpe), 1_000_000) # float変換，上限100万

### Training

In [209]:
# ====== ユーティリティ ======
def _annualize(sigma_daily: np.ndarray) -> np.ndarray:
    return sigma_daily * np.sqrt(252.0)

def _rolling_vol_no_leak(fr: pd.Series, window_size: int) -> np.ndarray:
    # center=False で未来不参照。序盤は expanding で埋める（過去のみ）
    roll = fr.rolling(window=window_size, min_periods=window_size, center=False).std()
    expd = fr.expanding(min_periods=2).std()
    vol = roll.combine_first(expd).bfill(limit=0)
    return vol.to_numpy()

def _ewma_vol_series(fr: pd.Series, lam: float = 0.94, min_periods: int = 2) -> np.ndarray:
    # adjust=False で逐次（未来を見ない）
    var = (fr**2).ewm(alpha=1 - lam, adjust=False, min_periods=min_periods).mean()
    return np.sqrt(var).to_numpy()

def _make_sigma_for_period(fr_all: np.ndarray, start: int, end: int,
                           mode="ewma", lam=0.94, window=30) -> np.ndarray:
    """[start, end) 用 σ_t を直前の過去を使って作る（リークなし）。"""
    warm = max(window, 20)
    prefix_start = max(0, start - warm)
    fr_prefix = fr_all[prefix_start:start]
    fr_period  = fr_all[start:end]
    fr_concat  = np.concatenate([fr_prefix, fr_period])
    s = pd.Series(fr_concat)
    if mode == "rolling":
        sigma_all = _rolling_vol_no_leak(s, window_size=window)
    elif mode == "ewma":
        sigma_all = _ewma_vol_series(s, lam=lam, min_periods=max(2, window//4))
    else:
        raise ValueError("mode must be 'rolling' or 'ewma'")
    return sigma_all[-(end - start):]

# ====== Phase1: 静的校正（z化）＋ soft-clip ======
def compute_z_calibration(y_pred_inner: np.ndarray, std_scale: float = 2.0) -> tuple[float, float]:
    """z 正規化に使う (b, T)。b: 中央値, T: std_scale×標準偏差（ゼロ除算ガード）"""
    y = np.asarray(y_pred_inner).reshape(-1)
    b = float(np.median(y))
    s = float(np.std(y, ddof=1))
    T = max(s * std_scale, 1e-8)
    return b, T

def choose_m_soft_by_clip(z_inner: np.ndarray,
                          T_soft: float = 1.0,
                          target_clip: float = 0.22,
                          m_bounds: tuple[float, float] = (0.5, 5.0)) -> float:
    """
    inner の z 分布から m を自動選定。
    1 + m*tanh(z/T_soft) の 0/2 クリップ率が target_clip に近い m を選ぶ。
    """
    z = np.asarray(z_inner).reshape(-1)

    def clip_rate_for(m: float) -> float:
        # 端貼り付き条件: m * tanh(|z|/T_soft) >= 1
        if m <= 1.0:
            return 0.0
        thr = T_soft * np.arctanh(1.0 / m)
        return float(np.mean(np.abs(z) >= thr))

    lo, hi = m_bounds
    grid = np.linspace(lo, hi, 20)
    vals = np.array([clip_rate_for(m) for m in grid])
    m0 = float(grid[np.argmin(np.abs(vals - target_clip))])

    m_lo, m_hi = max(lo, m0 - 0.5), min(hi, m0 + 0.5)
    for _ in range(12):  # 小さな二分探索
        m_mid = 0.5 * (m_lo + m_hi)
        cr = clip_rate_for(m_mid)
        if cr > target_clip:
            m_lo = m_mid
        else:
            m_hi = m_mid
    return float(np.clip(0.5 * (m_lo + m_hi), *m_bounds))

def soft_clip_from_z(z: np.ndarray, m: float, T_soft: float,
                     min_signal: float, max_signal: float) -> np.ndarray:
    """p = clip( 1 + m * tanh(z / T_soft), [min_signal, max_signal] )"""
    raw = 1.0 + m * np.tanh(z / T_soft)
    return np.clip(raw, min_signal, max_signal)

# ====== Phase2: 信頼度連動レバ（|z|） ======
def conf_m_from_z_abs(z: np.ndarray, m_lo: float = 1.0, m_hi: float = 3.0, Tm: float = 1.2) -> np.ndarray:
    """|z| が小さいとき m→m_lo, 大きいと m→m_hi"""
    return m_lo + (m_hi - m_lo) * np.tanh(np.abs(z) / Tm)

# ====== Phase4: ボラ・ターゲティング ======
def vol_target_scaler(sigma_daily: np.ndarray, gamma: float = 1.10, lev_cap: float = 2.0) -> np.ndarray:
    """ℓ_t = min(lev_cap, σ* / (σ_t^ann + eps)), σ* = gamma * median(σ^ann)"""
    sigma_ann = _annualize(sigma_daily)
    sigma_ref = np.median(sigma_ann)
    sigma_star = gamma * sigma_ref
    return np.minimum(lev_cap, sigma_star / (sigma_ann + 1e-6))

# ====== 推論パイプ（z → soft-clip → confidence → vol target） ======
def make_allocation_from_predictions(
    y_pred: np.ndarray,
    *,
    b_z: float,
    T_z: float,
    T_soft: float,
    m_soft: float,
    conf_params: tuple[float, float, float],  # (m_lo, m_hi, Tm)
    sigma_daily: np.ndarray | None,
    gamma: float,
    lev_cap: float,
    min_signal: float,
    max_signal: float
) -> np.ndarray:
    """1本のパイプで allocation を作る（リークしない前提の σ を渡すこと）。"""
    # z 化
    z = (y_pred - b_z) / T_z
    # 基本 soft-clip（中心1.0）
    p_soft = soft_clip_from_z(z, m=m_soft, T_soft=T_soft,
                              min_signal=min_signal, max_signal=max_signal)
    # 信頼度連動レバ（弱めから開始）
    m_lo, m_hi, Tm = conf_params
    m_conf = conf_m_from_z_abs(z, m_lo=m_lo, m_hi=m_hi, Tm=Tm)
    p_conf = np.clip(1.0 + (m_conf * np.tanh(z / T_soft)), min_signal, max_signal)

    # 2段構えにしたい場合は p_soft と p_conf を混合しても良いが、まずは p_conf を採用
    p = p_conf

    # ボラ・ターゲティング（σが渡されない場合はスキップ）
    if sigma_daily is not None:
        ell_t = vol_target_scaler(sigma_daily, gamma=gamma, lev_cap=lev_cap)
        p = np.clip((p - 1.0) * ell_t + 1.0, min_signal, max_signal)

    return p


In [210]:
# ====== クロスバリデーション（時系列CV + 最後180の診断） ======
score_list_dict = {}

def cross_validate(
    allocation_model,
    label: str = "",
    min_train_size: int = 3000,
    test_size: int = 180,
    *,
    inner_len: int = 720,          # INNER_VAL_LEN 相当
    vol_mode: str = "ewma",
    lambda_ewma: float = 0.94,
    window_size: int = 30,
    std_scale: float = 2.0,
    T_soft_init: float = 1.0,
    target_clip: float = 0.22,
    conf_params: tuple = (1.0, 3.0, 1.2),  # (m_lo, m_hi, Tm)
    gamma: float = 1.10,
    lev_cap: float = 2.0
):
    """
    時系列を考慮したCV。
    - inner から (b,T) と m_soft を決定（リークなし）
    - test & 最後180 を同一パイプで評価
    """
    n = len(train)
    oof = np.full(n, np.nan, dtype=float)
    score_list, val_list, intermediate_res = [], [], []

    # 特徴列
    drop_cols = ["date_id", "forward_returns", "risk_free_rate", "market_forward_excess_returns"]
    feature_cols = [c for c in train.columns if c not in drop_cols]
    X_all = train[feature_cols]
    y_all = train["forward_returns"].to_numpy()
    rfr_all = train["risk_free_rate"].to_numpy()

    # 最後180（LB）
    val_start = max(0, n - 180)
    X_val = X_all.iloc[val_start:]
    v_sol = pd.DataFrame({
        "forward_returns": y_all[val_start:],
        "risk_free_rate":  rfr_all[val_start:],
    }).reset_index(drop=True)

    for fold, test_start in enumerate(range(n - test_size, min_train_size, -test_size)):
        print(Fore.CYAN + f"=== Fold {fold} Test start at {test_start} ===" + Style.RESET_ALL)
        test_end = test_start + test_size

        # ---- split ----
        X_train = X_all.iloc[:test_start]
        X_test  = X_all.iloc[test_start:test_end]
        y_test  = y_all[test_start:test_end]   # debug用途
        sol_test = pd.DataFrame({
            "forward_returns": y_all[test_start:test_end],
            "risk_free_rate":  rfr_all[test_start:test_end],
        }).reset_index(drop=True)

        inner_start = max(0, test_start - inner_len)
        X_inner = X_all.iloc[inner_start:test_start]
        y_inner = y_all[inner_start:test_start]
        sol_inner = pd.DataFrame({
            "forward_returns": y_all[inner_start:test_start],
            "risk_free_rate":  rfr_all[inner_start:test_start],
        }).reset_index(drop=True)

        # ---- fit ----
        allocation_model.fit(X_train, y_all[:test_start])

        # ---- inner: z校正 & m_soft 決定 ----
        y_pred_inner = allocation_model.predict(X_inner)
        b_z, T_z = compute_z_calibration(y_pred_inner, std_scale=std_scale)
        z_inner = (y_pred_inner - b_z) / T_z
        T_SOFT = T_soft_init
        M_SOFT = choose_m_soft_by_clip(z_inner, T_soft=T_SOFT, target_clip=target_clip)

        # ---- test: 予測 → 配分 ----
        y_pred_test = allocation_model.predict(X_test)
        sigma_test_daily = _make_sigma_for_period(y_all, test_start, test_end,
                                                  mode=vol_mode, lam=lambda_ewma, window=window_size)
        allocation_list = make_allocation_from_predictions(
            y_pred_test,
            b_z=b_z, T_z=T_z,
            T_soft=T_SOFT, m_soft=M_SOFT,
            conf_params=conf_params,
            sigma_daily=sigma_test_daily,
            gamma=gamma, lev_cap=lev_cap,
            min_signal=ret_signal_params.min_signal,
            max_signal=ret_signal_params.max_signal,
        )

        # ---- 評価（test fold）----
        sub_test = pd.DataFrame({"prediction": allocation_list}).reset_index(drop=True)
        val_score, intermediate_res = score(sol_test, sub_test, "", intermediate_res)

        # ---- 最後180（LB診断）----
        y_pred_val = allocation_model.predict(X_val)
        sigma_val_daily = _make_sigma_for_period(y_all, val_start, n,
                                                 mode=vol_mode, lam=lambda_ewma, window=window_size)
        val_allocation = make_allocation_from_predictions(
            y_pred_val,
            b_z=b_z, T_z=T_z,
            T_soft=T_SOFT, m_soft=M_SOFT,
            conf_params=conf_params,
            sigma_daily=sigma_val_daily,
            gamma=gamma, lev_cap=lev_cap,
            min_signal=ret_signal_params.min_signal,
            max_signal=ret_signal_params.max_signal,
        )
        sub_val = pd.DataFrame({"prediction": val_allocation}).reset_index(drop=True)
        lb_score, inter2 = score(v_sol, sub_val, "", intermediate_res)

        # ---- ログ ----
        clip0 = np.mean(allocation_list <= ret_signal_params.min_signal) * 100
        clip2 = np.mean(allocation_list >= ret_signal_params.max_signal) * 100
        if inter2:
            strat_mu, strat_std, sharpe, vol_pen, ret_pen = inter2[-1]
            print(f"[last180] sharpe={sharpe:.3f} vol_pen={vol_pen:.2f} ret_pen={ret_pen:.2f}")
            lo = np.mean(val_allocation <= ret_signal_params.min_signal)
            hi = np.mean(val_allocation >= ret_signal_params.max_signal)
            print(f"[last180] clip@0={lo:.2%}, clip@2={hi:.2%}")
        print(f"[inner] M_SOFT={M_SOFT:.3f}, T_SOFT={T_SOFT:.2f}, target clip≈{int(target_clip*100)}%")
        print(f"[test ] clip@0={clip0:.2f}% clip@2={clip2:.2f}%")

        display(HTML(
            f"<p style='color: orange'>"
            f"train(:{test_start:4}) test({test_start:4}:{test_end:4})<br>"
            f"val_score: {val_score:6.3f}<br>"
            f"score(submission): {lb_score:.6f}<br>"
            f"z-calib: b={b_z:.3e}, T={T_z:.3e}, M_SOFT={M_SOFT:.3f}, T_SOFT={T_SOFT:.2f}"
            f"</p>"
        ))

        oof[test_start:test_end] = allocation_list
        score_list.append(val_score)
        val_list.append(lb_score)

    # ===== 集計表示 =====
    submit_model = allocation_model
    display(HTML('<h2 style="text-align:center;color:orange">======== Result ========</h2>'))
    avg_score = float(np.nanmean(score_list)) if len(score_list) else np.nan
    print(f"{label} Average Validation Score: {avg_score:.6f}")

    mask = np.isfinite(oof)
    if np.any(mask):
        solution_all = pd.DataFrame({
            "forward_returns": y_all[mask],
            "risk_free_rate":  rfr_all[mask],
        }).reset_index(drop=True)
        submission_all = pd.DataFrame({'prediction': oof[mask]}).reset_index(drop=True)
        overall_score, inter_all = score(solution_all, submission_all, '', [])
        vol_penalty = inter_all[-1][3] if inter_all else np.nan
        return_penalty = inter_all[-1][4] if inter_all else np.nan
        print(f"{label} Overall Validation Score: {overall_score:.6f} vol_penalty={vol_penalty:.2f} return_penalty={return_penalty:.2f}")
    else:
        print(f"{label} Overall Validation Score: NaN (no valid OOF)")

    score_list_dict[label] = score_list
    if score_list:
        print(f"{label} First(Test) Fold Validation Score: {score_list[0]:.6f}")
    if val_list:
        print(Fore.YELLOW + f"All(Test) Fold Validation Score : {(sum(val_list)/len(val_list)):6.3f}" + Style.RESET_ALL)

    # ヒスト
    if np.any(mask):
        vals = oof[mask]
        vmin, vmax = float(np.min(vals)), float(np.max(vals))
        if vmin == vmax: vmax = vmin + 1e-6
        bins = np.linspace(vmin, vmax, 50)
        plt.figure(figsize=(6, 2))
        plt.hist(vals, bins=bins, density=False, color='c', edgecolor='k', linewidth=0.5)
        plt.title(f'Allocation histogram of {label}')
        plt.gca().get_yaxis().set_visible(False)
        plt.xlim(vmin, vmax)
        plt.show()
        print(f"Range of predictions: [{vmin:.6f}, {vmax:.6f}]")

    return submit_model

In [211]:
# 単純なLightGBMモデルで試す

allocation_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    num_leaves=31,
    colsample_bytree=0.8,
    subsample=0.8,
    random_state=42,
)

submit_model = cross_validate(allocation_model, label="LightGBM Model")

[36m=== Fold 0 Test start at 8810 ===[0m
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21575
[LightGBM] [Info] Number of data points in the train set: 8810, number of used features: 94
[LightGBM] [Info] Start training from score 0.000468
[last180] sharpe=0.375 vol_pen=1.04 ret_pen=1.00
[last180] clip@0=0.00%, clip@2=0.00%
[inner] M_SOFT=1.421, T_SOFT=1.00, target clip≈22%
[test ] clip@0=0.00% clip@2=0.00%


[36m=== Fold 1 Test start at 8630 ===[0m
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21573
[LightGBM] [Info] Number of data points in the train set: 8630, number of used features: 94
[LightGBM] [Info] Start training from score 0.000460
[last180] sharpe=0.149 vol_pen=1.00 ret_pen=1.25
[last180] clip@0=0.00%, clip@2=0.00%
[inner] M_SOFT=1.421, T_SOFT=1.00, target clip≈22%
[test ] clip@0=0.00% clip@2=0.00%


[36m=== Fold 2 Test start at 8450 ===[0m
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 21575
[LightGBM] [Info] Number of data points in the train set: 8450, number of used features: 94
[LightGBM] [Info] Start training from score 0.000453


KeyboardInterrupt: 

### Submission
- time-series streaming形式
- Kaggle サーバーから1batchずつ送られるデータからsubmission.parquetを返す
- 返り値検証があるため，指定された形式で返す
- 指定形式
  - 

In [None]:
def predict(test: pl.DataFrame) -> float:
    """Replace this function with your inference code."""
    test_pd = test.to_pandas()
    # display(test_pd.info())
    if len(test_pd.columns) > 94:
        test_pd = test_pd.drop(
            ["date_id", "is_scored", "lagged_forward_returns", "lagged_risk_free_rate", "lagged_market_forward_excess_returns"], 
            axis = 1)
    
    preds = submit_model.predict(test_pd)
    raw_pred: float = float(preds[0])
    print(f"predict:{raw_pred}")
    
    # --- 出力（float or ndarray）---
    # KaggleのAPI仕様上、float単体かSeries/DataFrameで返す必要あり float(preds[0]) if len(preds) == 1 else preds　
    return convert_ret_to_signal(raw_pred, ret_signal_params)

In [None]:
# サーバー上でpredict(test_batch)を動かす
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))

GatewayRuntimeError: (<GatewayRuntimeErrorType.GATEWAY_RAISED_EXCEPTION: 5>, 'Traceback (most recent call last):\n  File "/home/masa1357/Dockerdata/kaggle/Kaggle_Hull-Tactical---Market-Prediction/kaggle_evaluation/core/base_gateway.py", line 134, in run\n    predictions, row_ids = self.get_all_predictions()\n  File "/home/masa1357/Dockerdata/kaggle/Kaggle_Hull-Tactical---Market-Prediction/kaggle_evaluation/core/base_gateway.py", line 109, in get_all_predictions\n    for data_batch, row_ids in self.generate_data_batches():\n  File "/home/masa1357/Dockerdata/kaggle/Kaggle_Hull-Tactical---Market-Prediction/kaggle_evaluation/default_gateway.py", line 29, in generate_data_batches\n    test = pl.read_csv(self.competition_data_dir / \'test.csv\')\n  File "/usr/local/lib/python3.10/dist-packages/polars/_utils/deprecation.py", line 128, in wrapper\n    return function(*args, **kwargs)\n  File "/usr/local/lib/python3.10/dist-packages/polars/_utils/deprecation.py", line 128, in wrapper\n    return function(*args, **kwargs)\n  File "/usr/local/lib/python3.10/dist-packages/polars/_utils/deprecation.py", line 128, in wrapper\n    return function(*args, **kwargs)\n  File "/usr/local/lib/python3.10/dist-packages/polars/io/csv/functions.py", line 549, in read_csv\n    df = _read_csv_impl(\n  File "/usr/local/lib/python3.10/dist-packages/polars/io/csv/functions.py", line 697, in _read_csv_impl\n    pydf = PyDataFrame.read_csv(\nFileNotFoundError: No such file or directory (os error 2): /kaggle/input/hull-tactical-market-prediction/test.csv\n')