In [None]:
"""
Idea: Train neural net to buy/sell contracts during a game to capitalize on midgame volatility
Signals: Pregame price action, open price, open odds, open spread, current price, orderbook spread, live spread, live odds, game score, team, opponent
    Structure:
        For each market, take note of: price of contracts at market open,
        For each timestamp, gather: Lowest ask price (entry), Highest bid price (exit), orderbook spread, price and book spread for game point spread, prices of points o/u contracts
Output: Best time/price to buy/sell contracts to make money - reward consistently profitable trades over big swings (but maybe re-weight reward function later)
"""

In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, classification_report

class ContractSwingModel:
    def __init__(self):
        # Using a Random Forest as a robust baseline
        self.model = RandomForestClassifier(
            n_estimators=100,
            max_depth=7,
            min_samples_leaf=5,
            class_weight='balanced',
            random_state=42
        )

    def feature_engineering(self, df):
        """
        Generates microstructure features using the specific OHLC columns provided.
        """
        data = df.copy()

        # 1. Price Definition (Using Close prices for indicators)
        # 'yes_ask_close' is what we pay to enter NOW.
        # 'yes_bid_close' is what we get if we sell NOW.
        data['mid_price'] = (data['yes_bid_close'] + data['yes_ask_close']) / 2

        # Use provided spread or calculate implied spread
        if 'close_spread' in data.columns:
            data['spread'] = data['close_spread']
        else:
            data['spread'] = data['yes_ask_close'] - data['yes_bid_close']

        # 2. Volatility (Standard deviation of mid_price)
        data['volatility_short'] = data['mid_price'].rolling(window=5).std()
        data['volatility_long'] = data['mid_price'].rolling(window=20).std()

        # Volatility Ratio: Detects breakouts (high short-term vol relative to baseline)
        data['vol_ratio'] = data['volatility_short'] / (data['volatility_long'] + 1e-9)

        # 3. Candle Range (Intra-candle volatility)
        # High-Low difference indicates 'fighting' in the order book
        data['candle_range'] = data['high'] - data['low']

        # 4. Momentum & Reversion
        # ROC: Speed of price change over 5 candles
        data['velocity_5'] = data['mid_price'].diff(5)

        # Distance from Moving Average (Mean Reversion signal)
        data['ma_20'] = data['mid_price'].rolling(window=20).mean()
        data['dist_from_ma'] = data['mid_price'] - data['ma_20']

        # 5. Volume Pressure
        # Volume * Direction. (If price up & vol high = bullish pressure)
        data['vol_pressure'] = data['volume'] * np.sign(data['velocity_5'])

        # Drop NaN values created by rolling windows
        return data.dropna()

    def label_data(self, df, lookforward=12, profit_target=8, stop_loss=5):
        """
        Labels data for training.
        Returns 1 if we hit profit_target BEFORE hitting stop_loss within 'lookforward' window.

        Parameters:
        - lookforward: How many candles into the future to check (e.g., 12 * 5min = 1 hour).
        - profit_target: Cents of profit (e.g., Buy at 20, sell at 28).
        - stop_loss: Cents of loss allowed.
        """
        data = df.reset_index(drop=True) # Ensure clean index for lookahead
        targets = []

        # Convert columns to numpy arrays for speed
        ask_entry = data['yes_ask_close'].values  # We buy at the ASK

        # Future lookup arrays
        bid_highs = data['yes_bid_high'].values   # We sell at the BID (Best case in candle)
        bid_lows = data['yes_bid_low'].values     # We sell at the BID (Worst case in candle)

        n = len(data)

        for i in range(n):
            # If we run out of data at the end, mark 0
            if i + lookforward >= n:
                targets.append(0)
                continue

            entry_price = ask_entry[i]

            # Get the window of future candles
            future_highs = bid_highs[i+1 : i+1+lookforward]
            future_lows = bid_lows[i+1 : i+1+lookforward]

            # Did we hit the profit target? (Did the BID go high enough?)
            # We assume if the High Bid > Target, we could have filled a limit order there.
            hit_profit_mask = future_highs >= (entry_price + profit_target)

            # Did we hit the stop loss? (Did the BID drop too low?)
            hit_stop_mask = future_lows <= (entry_price - stop_loss)

            # Find indices where events happened
            profit_indices = np.where(hit_profit_mask)[0]
            stop_indices = np.where(hit_stop_mask)[0]

            if len(profit_indices) > 0:
                first_profit = profit_indices[0]

                # Check if stop loss happened BEFORE profit
                if len(stop_indices) > 0:
                    first_stop = stop_indices[0]
                    if first_stop < first_profit:
                        targets.append(0) # Stopped out first
                    else:
                        targets.append(1) # Profited first
                else:
                    targets.append(1) # Profited, never stopped
            else:
                targets.append(0) # Never hit profit target

        data['target'] = targets
        return data

    def train(self, df):
        # 1. Feature Engineering
        print("Generatng features...")
        df_features = self.feature_engineering(df)

        # 2. Labeling
        # Adjust 'profit_target' based on your asset volatility (e.g., 8 cents for NBA)
        print("Labeling data...")
        df_labeled = self.label_data(df_features, profit_target=8, stop_loss=5)

        # 3. Selection
        feature_cols = [
            'spread', 'volatility_short', 'vol_ratio', 'candle_range',
            'velocity_5', 'dist_from_ma', 'vol_pressure', 'volume'
        ]

        X = df_labeled[feature_cols]
        y = df_labeled['target']

        # 4. Split & Train
        # Shuffle=False preserves time order (train on past, test on future)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

        print(f"Training on {len(X_train)} candles, Testing on {len(X_test)}...")
        self.model.fit(X_train, y_train)

        # 5. Evaluate
        preds = self.model.predict(X_test)

        # Filter for only where model said BUY (1)
        buy_signals = preds == 1
        actual_outcomes = y_test[buy_signals]

        if len(actual_outcomes) > 0:
            win_rate = actual_outcomes.mean()
            print("\n--- Results ---")
            print(f"Buy Signals Generated: {sum(buy_signals)}")
            print(f"Win Rate on Signals: {win_rate:.2%}")
        else:
            print("\nModel generated NO buy signals in test set. Try lowering profit_target.")

        # Feature Importance
        print("\nFeature Importance:")
        importances = pd.Series(self.model.feature_importances_, index=feature_cols)
        print(importances.sort_values(ascending=False).head(5))

        return self.model

model = ContractSwingModel()
trained_model = model.train(pd.read_csv("nba_candles.csv"))

Generatng features...
Labeling data...
Training on 562842 candles, Testing on 140711...

--- Results ---
Buy Signals Generated: 34453
Win Rate on Signals: 14.60%

Feature Importance:
volume              0.277615
volatility_short    0.205965
candle_range        0.187624
dist_from_ma        0.125349
vol_ratio           0.109094
dtype: float64


In [3]:
df = pd.read_csv('nba_candles.csv')
df.head()

Unnamed: 0,series_ticker,market_ticker,market_status,time_since_open_ts,open,high,low,close,volume,yes_bid_open,yes_bid_low,yes_bid_high,yes_bid_close,yes_ask_open,yes_ask_low,yes_ask_high,yes_ask_close,open_spread,close_spread
0,KXNBAGAME,KXNBAGAME-25DEC03MIADAL-MIA,finalized,32220,,,,,0,65,65,65,65,74,73,74,73,9,8
1,KXNBAGAME,KXNBAGAME-25DEC03MIADAL-MIA,finalized,32340,,,,,0,65,65,65,65,73,73,73,73,8,8
2,KXNBAGAME,KXNBAGAME-25DEC03MIADAL-MIA,finalized,32400,73.0,73.0,73.0,73.0,39,65,65,73,73,73,73,74,74,8,1
3,KXNBAGAME,KXNBAGAME-25DEC03MIADAL-MIA,finalized,32460,73.0,73.0,73.0,73.0,8,73,73,73,73,74,74,74,74,1,1
4,KXNBAGAME,KXNBAGAME-25DEC03MIADAL-MIA,finalized,32520,73.0,74.0,73.0,73.0,39,73,73,73,73,74,74,74,74,1,1


In [4]:
df.columns

Index(['series_ticker', 'market_ticker', 'market_status', 'time_since_open_ts',
       'open', 'high', 'low', 'close', 'volume', 'yes_bid_open', 'yes_bid_low',
       'yes_bid_high', 'yes_bid_close', 'yes_ask_open', 'yes_ask_low',
       'yes_ask_high', 'yes_ask_close', 'open_spread', 'close_spread'],
      dtype='object')