In [1]:
# Cell 1: Imports and Setup for Live Backtest

import sys
from pathlib import Path
import os
import subprocess # To run the feature engineering script

# --- Add 'random/backtest' to sys.path for imports ---
# Assuming this notebook is in random/backtest/
current_notebook_dir = Path.cwd()
if str(current_notebook_dir) not in sys.path:
    sys.path.insert(0, str(current_notebook_dir))

# --- Also add project root if needed for other modules (e.g. if .env is there for fetch.py) ---
project_root = current_notebook_dir.parent.parent # Kalshi/ 
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import logging
import matplotlib.pyplot as plt

# Import from our live backtest suite
import live_backtest_config as live_config
import live_backtest_utils as live_utils
from live_backtest_engine import run_live_backtest

# --- Logging Setup for the Notebook ---
notebook_logger = logging.getLogger("live_backtest_notebook")
if not notebook_logger.handlers:
    notebook_logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
    ch = logging.StreamHandler(sys.stdout) # Log to notebook output
    ch.setFormatter(formatter)
    notebook_logger.addHandler(ch)
else:
    # Clear existing handlers if re-running cell to avoid duplicate logs
    for handler in notebook_logger.handlers[:]:
        notebook_logger.removeHandler(handler)
    notebook_logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
    ch = logging.StreamHandler(sys.stdout)
    ch.setFormatter(formatter)
    notebook_logger.addHandler(ch)

notebook_logger.info(f"Live Backtesting Notebook Setup Complete.")
notebook_logger.info(f"Live Kalshi Data Dir: {live_config.LIVE_KALSHI_DATA_DIR}")
notebook_logger.info(f"Live Binance Data Dir: {live_config.LIVE_BINANCE_DATA_DIR}")
notebook_logger.info(f"Live Features Output Dir: {live_config.LIVE_FEATURES_DIR}")
notebook_logger.info(f"Trade Logs Dir: {live_config.LOG_DIR}")
notebook_logger.info(f"Using Model from: {live_config.MODEL_PATH.parent}")

2025-05-23 02:06:41,328 - INFO - live_backtest_config - Live market outcomes expected from: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/live_sessions_market_outcomes.csv
2025-05-23 02:06:41,328 - INFO - live_backtest_config - Loaded 18 session-to-Binance file mappings.
2025-05-23 02:06:41,329 - INFO - live_backtest_config - Using NEW model from directory: /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi
2025-05-23 02:06:41,329 - INFO - live_backtest_config - Model path: /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi/logreg_per_minute_no_vol_oi_model.joblib
2025-05-23 02:06:41,329 - INFO - live_backtest_config - Scaler path: /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi/logreg_per_minute_no_vol_oi_scaler.joblib
2025-05-23 02:06:41,329 - INFO - live_backtest_config - Feature names path: /Users/omarabul-hassan/Desk

2025-05-23 02:06:41,351 - INFO - live_backtest_notebook - Live Backtesting Notebook Setup Complete.


2025-05-23 02:06:41,351 - INFO - live_backtest_notebook - Live Backtesting Notebook Setup Complete.


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Kalshi Data Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/market_data_logs


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Kalshi Data Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/market_data_logs


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Binance Data Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/binance_market_data_logs


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Binance Data Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/binance_market_data_logs


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Features Output Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/features_live


2025-05-23 02:06:41,352 - INFO - live_backtest_notebook - Live Features Output Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/features_live


2025-05-23 02:06:41,353 - INFO - live_backtest_notebook - Trade Logs Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/logs


2025-05-23 02:06:41,353 - INFO - live_backtest_notebook - Trade Logs Dir: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/logs


2025-05-23 02:06:41,353 - INFO - live_backtest_notebook - Using Model from: /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi


2025-05-23 02:06:41,353 - INFO - live_backtest_notebook - Using Model from: /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi


In [None]:
# Cell 2: Run Live Feature Engineering Script
# This script (live_feature_engineering.py) processes your live data files
# and creates a consolidated feature CSV.

notebook_logger.info("Starting live feature engineering script...")
feature_engineering_script_path = Path.cwd() / "live_feature_engineering.py"

if not feature_engineering_script_path.exists():
    notebook_logger.critical(f"CRITICAL: live_feature_engineering.py not found at {feature_engineering_script_path}")
    raise FileNotFoundError(f"live_feature_engineering.py not found.")

try:
    # Ensure Python executable is correctly identified (e.g., from current environment)
    python_executable = sys.executable 
    result = subprocess.run(
        [python_executable, str(feature_engineering_script_path)], 
        capture_output=True, text=True, check=True, timeout=1800 # 30 min timeout, adjust as needed
    )
    notebook_logger.info("live_feature_engineering.py script completed successfully.")
    notebook_logger.info("Script STDOUT:")
    print(result.stdout)
    if result.stderr:
        notebook_logger.warning("Script STDERR:")
        print(result.stderr)
except subprocess.CalledProcessError as e:
    notebook_logger.error(f"live_feature_engineering.py script failed with exit code {e.returncode}.")
    notebook_logger.error("Script STDOUT:")
    print(e.stdout)
    notebook_logger.error("Script STDERR:")
    print(e.stderr)
    raise RuntimeError("Feature engineering script failed.") from e
except subprocess.TimeoutExpired as e:
    notebook_logger.error(f"live_feature_engineering.py script timed out.")
    notebook_logger.error("Script STDOUT (if any):")
    print(e.stdout.decode() if e.stdout else 'N/A')
    notebook_logger.error("Script STDERR (if any):")
    print(e.stderr.decode() if e.stderr else 'N/A')
    raise RuntimeError("Feature engineering script timed out.") from e
except Exception as e:
    notebook_logger.error(f"An unexpected error occurred while trying to run live_feature_engineering.py: {e}", exc_info=True)
    raise

In [2]:
# Cell 3: Load Model and Live Features Data

try:
    notebook_logger.info("Loading pre-trained model, scaler, and feature names...")
    model, scaler, model_feature_names = live_utils.load_model_and_dependencies()
    notebook_logger.info("Model components loaded successfully.")

    notebook_logger.info("Loading features engineered from LIVE data...")
    all_live_features_df = live_utils.load_live_features_for_backtest()
    notebook_logger.info(f"Live features data loaded successfully with {len(all_live_features_df)} decision points.")
    
    if all_live_features_df.empty:
        notebook_logger.critical("Live Features DataFrame is empty. Aborting backtest.")
        # Consider using 'raise SystemExit("Live Features DataFrame empty.")' to halt execution
    else:
        # Quick check of loaded live features
        print("Head of loaded live features DataFrame:")
        display(all_live_features_df.head())
        
        # Check for NaNs in the columns the model will use
        if model_feature_names: # Ensure model_feature_names is loaded
            nan_check_model_cols = all_live_features_df[model_feature_names].isnull().sum()
            if nan_check_model_cols.sum() > 0:
                notebook_logger.warning(f"NaNs found in model input columns of loaded live features data:\n{nan_check_model_cols[nan_check_model_cols > 0]}")
                notebook_logger.warning("The backtest engine might skip these rows or impute. Ensure feature engineering handles NaNs appropriately.")
            else:
                notebook_logger.info("No NaNs found in model input columns of loaded live features data.")
        else:
            notebook_logger.warning("model_feature_names not loaded, skipping NaN check for model columns.")


except FileNotFoundError:
    notebook_logger.critical("Essential file not found (model, scaler, features, or live data features). Aborting backtest.")
    # raise # Uncomment to stop execution if files are missing
except Exception as e:
    notebook_logger.critical(f"An unexpected error occurred during loading: {e}", exc_info=True)
    # raise

2025-05-23 02:07:15,031 - INFO - live_backtest_notebook - Loading pre-trained model, scaler, and feature names...


2025-05-23 02:07:15,031 - INFO - live_backtest_notebook - Loading pre-trained model, scaler, and feature names...
2025-05-23 02:07:15,634 - INFO - root - Successfully loaded model from /Users/omarabul-hassan/Desktop/projects/kalshi/notebooks/trained_models/logreg_per_minute_no_vol_oi/logreg_per_minute_no_vol_oi_model.joblib, scaler, and 19 feature names.


2025-05-23 02:07:15,634 - INFO - live_backtest_notebook - Model components loaded successfully.


2025-05-23 02:07:15,634 - INFO - live_backtest_notebook - Model components loaded successfully.


2025-05-23 02:07:15,635 - INFO - live_backtest_notebook - Loading features engineered from LIVE data...


2025-05-23 02:07:15,635 - INFO - live_backtest_notebook - Loading features engineered from LIVE data...
2025-05-23 02:07:15,635 - INFO - root - Loading LIVE features for backtest from: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/features_live/kalshi_live_decision_features_SESSION_25MAY2015_20250523_020710.csv
2025-05-23 02:07:15,699 - INFO - root - Loaded 38716 rows from live-engineered feature data.


2025-05-23 02:07:15,699 - INFO - live_backtest_notebook - Live features data loaded successfully with 38716 decision points.


2025-05-23 02:07:15,699 - INFO - live_backtest_notebook - Live features data loaded successfully with 38716 decision points.


Head of loaded live features DataFrame:


Unnamed: 0,market_ticker,decision_timestamp_s,resolution_time_ts,strike_price,target,time_to_resolution_minutes,current_btc_price,current_dist_strike_abs,current_dist_strike_pct,btc_price_change_pct_1m,...,btc_volatility_5m,btc_volatility_15m,btc_volatility_30m,current_kalshi_yes_bid,current_kalshi_yes_ask,current_kalshi_mid_price,current_kalshi_spread_abs,current_kalshi_spread_pct,current_kalshi_volume,current_kalshi_oi
0,KXBTCD-25MAY2015-T106249.99,1747764064,1747767600,106249.99,0,58.93,,,,,...,,,,0.44,0.5,0.47,0.06,0.12766,,
1,KXBTCD-25MAY2015-T106249.99,1747764064,1747767600,106249.99,0,58.93,,,,,...,,,,0.44,0.5,0.47,0.06,0.12766,,
2,KXBTCD-25MAY2015-T106249.99,1747764064,1747767600,106249.99,0,58.93,,,,,...,,,,0.44,0.5,0.47,0.06,0.12766,,
3,KXBTCD-25MAY2015-T106249.99,1747764064,1747767600,106249.99,0,58.93,,,,,...,,,,0.44,0.5,0.47,0.06,0.12766,,
4,KXBTCD-25MAY2015-T106249.99,1747764064,1747767600,106249.99,0,58.93,,,,,...,,,,0.44,0.5,0.47,0.06,0.12766,,


current_btc_price              980
current_dist_strike_abs        980
current_dist_strike_pct        980
btc_price_change_pct_1m       1845
btc_price_change_pct_3m       3627
btc_price_change_pct_5m       5778
btc_price_change_pct_10m     11004
btc_price_change_pct_15m     16186
btc_price_change_pct_30m     31338
btc_volatility_5m             1845
btc_volatility_15m            1845
btc_volatility_30m            1845
current_kalshi_yes_bid         922
current_kalshi_mid_price       922
current_kalshi_spread_abs      922
current_kalshi_spread_pct      922
dtype: int64


current_btc_price              980
current_dist_strike_abs        980
current_dist_strike_pct        980
btc_price_change_pct_1m       1845
btc_price_change_pct_3m       3627
btc_price_change_pct_5m       5778
btc_price_change_pct_10m     11004
btc_price_change_pct_15m     16186
btc_price_change_pct_30m     31338
btc_volatility_5m             1845
btc_volatility_15m            1845
btc_volatility_30m            1845
current_kalshi_yes_bid         922
current_kalshi_mid_price       922
current_kalshi_spread_abs      922
current_kalshi_spread_pct      922
dtype: int64






In [3]:
# Cell 4: Run the Live Backtest

total_pnl_cents = 0
total_trades = 0

if 'model' in locals() and 'scaler' in locals() and 'model_feature_names' in locals() and \
   'all_live_features_df' in locals() and not all_live_features_df.empty:
    notebook_logger.info("Proceeding to run the backtest engine with LIVE data features...")
    try:
        total_pnl_cents, total_trades = run_live_backtest(
            all_live_features_df, 
            model, 
            scaler, 
            model_feature_names
        )
        
        notebook_logger.info("--- Main Live Backtest Execution Finished ---")
        notebook_logger.info(f"Overall P&L from live backtest engine: {total_pnl_cents / 100.0 :.2f} USD")
        notebook_logger.info(f"Total trades made by engine: {total_trades}")
        notebook_logger.info(f"Daily trade logs for this run are in: {live_config.LOG_DIR}")

    except Exception as e:
        notebook_logger.error(f"An error occurred during run_live_backtest: {e}", exc_info=True)
else:
    notebook_logger.error("Model or live features data not loaded, or features DataFrame is empty. Cannot run live backtest.")

2025-05-23 02:07:27,216 - INFO - live_backtest_notebook - Proceeding to run the backtest engine with LIVE data features...


2025-05-23 02:07:27,216 - INFO - live_backtest_notebook - Proceeding to run the backtest engine with LIVE data features...
2025-05-23 02:07:27,218 - INFO - live_backtest_engine_logger - Starting LIVE backtest engine...
2025-05-23 02:07:27,225 - INFO - live_backtest_engine_logger - Processing 38716 decision points from live data.
2025-05-23 02:07:32,011 - INFO - live_backtest_engine_logger - 
--- DETAILED TRADE DEBUG (BUY_NO) ---
2025-05-23 02:07:32,011 - INFO - live_backtest_engine_logger - Market Ticker: KXBTCD-25MAY2015-T106249.99, Decision Timestamp: 1747765921 (2025-05-20T18:32:01+00:00)
2025-05-23 02:07:32,011 - INFO - live_backtest_engine_logger - Raw Features Used (19):
strike_price                   106249.99
time_to_resolution_minutes         27.98
current_btc_price              105772.73
current_dist_strike_abs          -477.26
current_dist_strike_pct        -0.004492
btc_price_change_pct_1m         0.000215
btc_price_change_pct_3m        -0.000221
btc_price_change_pct_5m    

2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - --- Main Live Backtest Execution Finished ---


2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - --- Main Live Backtest Execution Finished ---


2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - Overall P&L from live backtest engine: 1373.97 USD


2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - Overall P&L from live backtest engine: 1373.97 USD


2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - Total trades made by engine: 6489


2025-05-23 02:07:35,485 - INFO - live_backtest_notebook - Total trades made by engine: 6489


2025-05-23 02:07:35,486 - INFO - live_backtest_notebook - Daily trade logs for this run are in: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/logs


2025-05-23 02:07:35,486 - INFO - live_backtest_notebook - Daily trade logs for this run are in: /Users/omarabul-hassan/Desktop/projects/kalshi/random/backtest/logs
