In [45]:
# 10-Year Treasury Yield Prediction Model
# Target: Friday, December 12, 2025

import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

print("Libraries loaded successfully!")


Libraries loaded successfully!


# Step 1: Kalshi API Setup

First, we need to authenticate with the Kalshi API. You'll need:
- API Key ID
- API Secret Key

Get these from your Kalshi account settings.

**Important:** If you get a `TypeError` about `get_kalshi_headers()`, make sure to **re-run cell 2** to reload the updated function definitions!


In [46]:
# Kalshi API Configuration
KALSHI_API_KEY_ID = "a5db4aab-8cd5-4040-a60b-1f366375f776" 
KALSHI_API_SECRET = """MIIEpAIBAAKCAQEA6OLMIXYpE4WftPKMVhm/DzM/+wJUAI/tvfAdS2l8Oq5b2U2B
                    2KdFa1SMKN1horb9duBQcvM/X5gDQUjwzt2DPgS94sU15MAoD+K9tUcsXwLWWeWq
                    gO7fW+D5sHIKdn3YjYAr6WfoIYitT/h1dUddln8dEpiJBiffpaYb7HSYpYCzYVEL
                    iTFPx972ufw/nJeZWZN2Fa8kIgd3po6F36adPRwCmcCwmysBOGALT59qpbIeanOn
                    aYWIz5jKDdNn6/+Zqs/E2lqde4rRRzOnDvJ1ZUoDAdVcsp8th/qUo17t7tQf/+yb
                    sBfqt983g4PforxtW/gU3/jTOID6ducMZPLlaQIDAQABAoIBAA5aicW9zSe4kpCq
                    BGLhwMHHx5E84jJYrKJwzeie4a4r0fOeQ2438EWxvXCSJUx/zJzkYKdjfUvy7nxt
                    g14UqNw/mZG3biw631ByiePGG7xNZfdYRvG3oO+krNVzUFhf4EmBLvOtvx2izJ/Q
                    F5k4/IUw/d87PiX+R8DWo9D0MymI6F8sXMOY9NPvG2evM8sD/tLtgO8sbFJsVr8i
                    s1LfM1x1yBgnzMDHAUbep+seSYztxJLoLJvtqBQCSxvs/bUG8lXoslwgVbUCZPh3
                    J8RT3gZOXSZAfgB1EeLeUK6060Wzw/olyBJ+UrOBRvZVTYGURBR0b4UH/HFsHu5P
                    1sedybMCgYEA8mbUokhUkPMC2u4uycvLqY6tQ7b//QqwGtB4dU7dVudM2WA866vA
                    qTcM+nWHhVaOrF3OJ+EyrOg0ZL0CHenK4tB0HXsMHgVrB6Or8zfD6AKWdZwWO2xk
                    F6wEzbceLTEhXUZsdpKQJpVpoaNTpIYkICeUI2MeFVBg/WzPMSIKk2cCgYEA9fNP
                    O8Hg0alXvHonMpV1CZxsQT73b1Ai20ATT3TmDwJgaXymk1gKlWNAxqBEe9EKsk2M
                    Ut0wJZap+GGtB2KeUOAV1T5LMZ5cwgX8MB5DgGN4Dnh/T9wgVMCgFnKEh6l8H9Z1
                    1Q1oTlVaSNpFa/oMunJilrIKU7/LPkWJZCJDjq8CgYEAy3+upSrJ7AJHdFqfZwQz
                    T36bUQzR7bJKU8iOSBVUVn/KEXfszQEctjKkL6P5iOQ41NXPm8VAUM6EJcpTe59p
                    l27e/FzvuEheKUSrt30qd52siMHoHC0L/p+ITNedet0TIIZdylGSuQGYc311W9rN
                    ez0e7XJpsAUR18/ARYRJkqMCgYEA35NxKFpC+3RcHCpSrYntXXJDekA5/4cyWGpz
                    41vZsjUv3VSBvBlhbZFPFgAkoQVTGuihscX9+X1vPynTd44vakd5sWIySoWxvnJg
                    YIFyTSiev1DMMpVhEooUPLOXRBdOjUaP0L+iXOr76pP4XYJtxaMred+Ywa7sm8vs
                    bAk1rmMCgYAECK+bMBOogcq42nNO0RfO3dIMIejYOxjxedFiQEPfhFYFsSZ3bKp9
                    aqCYGXLb9aTFRjAVL0KHUzrTdeKWiXcBT5PsUURyMessgBFdn2yMJmN2cha2aYqy
                    2JHy8or21n4qvQjozf175V73o9mMEHk2719xasJ7bpY9Yh+NpAefAQ==
""" 

# Try both base URLs - Kalshi may use different endpoints
KALSHI_BASE_URL = "https://api.elections.kalshi.com/trade-api/v2"
# Alternative: "https://trading-api.kalshi.com/v1" or "https://api.kalshi.com/trade-api/v2"

# Market ticker for 10-Year Treasury Yield on Dec 12, 2025
# Note: Make sure this ticker exists and is correct
TARGET_MARKET_TICKER = "KXTNOTED-25DEC31-T3.93" 

# Clean up the API secret (remove extra whitespace and newlines)
KALSHI_API_SECRET_CLEANED = KALSHI_API_SECRET.strip().replace('\n', '').replace(' ', '')

def get_kalshi_headers(method, path, body=""):
    """
    Generate Kalshi API headers with signature-based authentication.
    Kalshi uses: KALSHI-ACCESS-KEY, KALSHI-ACCESS-TIMESTAMP, KALSHI-ACCESS-SIGNATURE
    Uses RSA-PSS signing as per Kalshi API documentation.
    """
    import time
    import base64
    from cryptography.hazmat.primitives import hashes, serialization
    from cryptography.hazmat.primitives.asymmetric import padding, rsa
    from cryptography.hazmat.backends import default_backend
    
    # Get current timestamp in milliseconds
    timestamp = str(int(time.time() * 1000))
    
    # Create the string to sign: method + path + timestamp + body
    string_to_sign = f"{method.upper()}\n{path}\n{timestamp}\n{body}"
    
    try:
        # Try to load the private key - handle different formats
        private_key = None
        
        # First, try if it already has BEGIN/END markers
        if "BEGIN" in KALSHI_API_SECRET_CLEANED or "BEGIN" in KALSHI_API_SECRET:
            try:
                private_key = serialization.load_pem_private_key(
                    KALSHI_API_SECRET.encode(),
                    password=None,
                    backend=default_backend()
                )
            except:
                pass
        
        # If that didn't work, try wrapping it
        if private_key is None:
            try:
                private_key_pem = f"-----BEGIN PRIVATE KEY-----\n{KALSHI_API_SECRET_CLEANED}\n-----END PRIVATE KEY-----"
                private_key = serialization.load_pem_private_key(
                    private_key_pem.encode(),
                    password=None,
                    backend=default_backend()
                )
            except Exception as e1:
                # Try RSA format instead
                try:
                    private_key_pem = f"-----BEGIN RSA PRIVATE KEY-----\n{KALSHI_API_SECRET_CLEANED}\n-----END RSA PRIVATE KEY-----"
                    private_key = serialization.load_pem_private_key(
                        private_key_pem.encode(),
                        password=None,
                        backend=default_backend()
                    )
                except Exception as e2:
                    raise Exception(f"Failed to parse private key. PKCS8 error: {e1}, RSA error: {e2}")
        
        # Sign the string using RSA-PSS (as per Kalshi docs)
        signature = private_key.sign(
            string_to_sign.encode(),
            padding.PSS(
                mgf=padding.MGF1(hashes.SHA256()),
                salt_length=padding.PSS.MAX_LENGTH
            ),
            hashes.SHA256()
        )
        
        # Encode signature in base64
        signature_b64 = base64.b64encode(signature).decode()
        
        headers = {
            "KALSHI-ACCESS-KEY": KALSHI_API_KEY_ID,
            "KALSHI-ACCESS-TIMESTAMP": timestamp,
            "KALSHI-ACCESS-SIGNATURE": signature_b64,
            "Content-Type": "application/json"
        }
        
        return headers
    except Exception as e:
        print(f"Error generating signature: {e}")
        import traceback
        traceback.print_exc()
        # Fallback: try simpler authentication (may work for some endpoints)
        return {
            "KALSHI-ACCESS-KEY": KALSHI_API_KEY_ID,
            "Content-Type": "application/json"
        }

print("Kalshi API configuration set up")


Kalshi API configuration set up


In [47]:
# Function to fetch market data from Kalshi
def get_kalshi_market_data(ticker, limit=1000):
    """
    Tries multiple endpoints: trades (historical), then orderbook (current).
    Returns: DataFrame with market data.
    """
    import urllib.parse
    import pandas as pd
    import requests

    encoded_ticker = urllib.parse.quote(ticker, safe="")

    # endpoint_template is just a label now, not literally the path
    endpoints_to_try = [
        ("trades", "trades"),
        ("orderbook", "orderbook"),
    ]

    for endpoint_template, endpoint_name in endpoints_to_try:
        # ----- Build path + params depending on endpoint -----
        if endpoint_name == "trades":
            # /markets/trades?ticker=KXTNOTED-25DEC31-T3.93&limit=...
            path = "/markets/trades"
            params = {"ticker": ticker, "limit": limit}

        elif endpoint_name == "orderbook":
            # /markets/KXTNOTED-25DEC31-T3.93/orderbook
            path = f"/markets/{encoded_ticker}/orderbook"
            params = {}

        # Build query string
        query_string = urllib.parse.urlencode(params)
        path_with_query = f"{path}?{query_string}" if query_string else path

        url = f"{KALSHI_BASE_URL}{path_with_query}"
        headers = get_kalshi_headers("GET", path_with_query)

        print(f"Trying {endpoint_name} endpoint for market: {ticker}")
        print("URL:", url)

        try:
            resp = requests.get(url, headers=headers)
        except Exception as e:
            print(f"{endpoint_name} request failed:", e)
            continue

        if resp.status_code != 200:
            print(f"✗ {endpoint_name} endpoint returned {resp.status_code}")
            continue

        data = resp.json()

        if endpoint_name == "trades":
            records = data.get("trades", [])
        else:  # orderbook
            records = [data]  # whatever structure you want

        df = pd.DataFrame(records)
        if not df.empty:
            return df  # success, stop trying others

    # If all endpoints fail:
    return pd.DataFrame()


# Function to get market orderbook (for probability distribution)
def get_kalshi_orderbook(ticker):
    """Get current orderbook to extract probability distribution from ladder prices"""
    import urllib.parse
    
    encoded_ticker = urllib.parse.quote(ticker, safe='')
    path = f"/markets/{encoded_ticker}/orderbook"
    url = f"{KALSHI_BASE_URL}{path}"
    
    headers = get_kalshi_headers("GET", path)
    
    try:
        print(f"Fetching orderbook for market: {ticker}")
        print(f"URL: {url}")
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            print("Successfully fetched orderbook")
            return data
        else:
            print(f"Error fetching orderbook: {response.status_code}")
            print(f"Response: {response.text}")
            return None
    except Exception as e:
        print(f"Exception fetching orderbook: {e}")
        import traceback
        traceback.print_exc()
        return None

# Function to search for markets (to find the correct ticker)
def search_kalshi_markets(query=None, limit=100):
    """Search for markets on Kalshi"""
    import urllib.parse
    
    path = "/markets"
    params = {
        "limit": limit
    }
    if query:
        params["keyword"] = query
    
    query_string = urllib.parse.urlencode(params)
    path_with_query = f"{path}?{query_string}" if query_string else path
    
    url = f"{KALSHI_BASE_URL}{path_with_query}"
    headers = get_kalshi_headers("GET", path_with_query)
    
    try:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            markets = data.get('markets', [])
            return pd.DataFrame(markets)
        else:
            print(f"Error searching markets: {response.status_code}")
            print(f"Response: {response.text}")
            return pd.DataFrame()
    except Exception as e:
        print(f"Exception searching markets: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()

# Function to get a specific market by ticker
def get_kalshi_market(ticker):
    """Get market information for a specific ticker"""
    import urllib.parse
    
    encoded_ticker = urllib.parse.quote(ticker, safe='')
    path = f"/markets/{encoded_ticker}"
    url = f"{KALSHI_BASE_URL}{path}"
    
    headers = get_kalshi_headers("GET", path)
    
    try:
        print(f"Fetching market info for: {ticker}")
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            print("Successfully fetched market info")
            return data.get('market', data)
        else:
            print(f"Error fetching market: {response.status_code}")
            print(f"Response: {response.text}")
            return None
    except Exception as e:
        print(f"Exception fetching market: {e}")
        import traceback
        traceback.print_exc()
        return None

print("Kalshi data fetching functions defined")


Kalshi data fetching functions defined


# Step 2: Fetch External Data from FRED

FRED (Federal Reserve Economic Data) is free and has excellent Treasury yield data.
You'll need a free API key from: https://fred.stlouisfed.org/docs/api/api_key.html


In [48]:
# FRED API Configuration
FRED_API_KEY = "bf64fd190ba3f88eee30224cded034ba"  # Get free key from https://fred.stlouisfed.org/docs/api/api_key.html
FRED_BASE_URL = "https://api.stlouisfed.org/fred"

def get_fred_data(series_id, start_date=None, end_date=None):
    """
    Fetch data from FRED API
    
    Key series IDs for Treasury yields:
    - DGS10: 10-Year Treasury Constant Maturity Rate
    - DGS2: 2-Year Treasury Constant Maturity Rate
    - DGS30: 30-Year Treasury Constant Maturity Rate
    - DFF: Federal Funds Rate
    - CPIAUCSL: Consumer Price Index
    - UNRATE: Unemployment Rate
    """
    url = f"{FRED_BASE_URL}/series/observations"
    params = {
        "series_id": series_id,
        "api_key": FRED_API_KEY,
        "file_type": "json",
        "sort_order": "asc"
    }
    
    if start_date:
        params["observation_start"] = start_date
    if end_date:
        params["observation_end"] = end_date
    
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            observations = data.get('observations', [])
            df = pd.DataFrame(observations)
            if not df.empty:
                df['date'] = pd.to_datetime(df['date'])
                df['value'] = pd.to_numeric(df['value'], errors='coerce')
                df = df.dropna(subset=['value'])
                df = df.rename(columns={'value': series_id})
            return df[['date', series_id]]
        else:
            print(f"Error fetching FRED data: {response.status_code}")
            return pd.DataFrame()
    except Exception as e:
        print(f"Exception fetching FRED data: {e}")
        return pd.DataFrame()

print("FRED data fetching functions defined")


FRED data fetching functions defined


# Step 3: Data Collection

Let's fetch the historical data we need for modeling.


In [49]:
# Fetch historical 10-year Treasury yield from FRED (primary external dataset)
print("Fetching 10-Year Treasury Yield data from FRED...")
df_10y = get_fred_data("DGS10", start_date="2020-01-01")
print(f"Fetched {len(df_10y)} observations")

# Fetch additional economic indicators for features
print("\nFetching additional economic indicators...")
df_2y = get_fred_data("DGS2", start_date="2020-01-01")  # 2-year yield (spread indicator)
df_30y = get_fred_data("DGS30", start_date="2020-01-01")  # 30-year yield
df_ffr = get_fred_data("DFF", start_date="2020-01-01")  # Federal Funds Rate
df_cpi = get_fred_data("CPIAUCSL", start_date="2020-01-01")  # CPI (inflation proxy)

print("Data fetching complete!")


Fetching 10-Year Treasury Yield data from FRED...
Fetched 1481 observations

Fetching additional economic indicators...
Data fetching complete!


In [50]:
# Merge all FRED data on date
print("Merging external datasets...")
df_external = df_10y.copy()
for df in [df_2y, df_30y, df_ffr, df_cpi]:
    if not df.empty:
        df_external = pd.merge(df_external, df, on='date', how='outer')

df_external = df_external.sort_values('date').reset_index(drop=True)

# Calculate additional features
if 'DGS2' in df_external.columns and 'DGS10' in df_external.columns:
    df_external['yield_spread_10y_2y'] = df_external['DGS10'] - df_external['DGS2']
if 'DGS30' in df_external.columns and 'DGS10' in df_external.columns:
    df_external['yield_spread_30y_10y'] = df_external['DGS30'] - df_external['DGS10']
if 'CPIAUCSL' in df_external.columns:
    df_external['cpi_yoy'] = df_external['CPIAUCSL'].pct_change(12) * 100  # Year-over-year CPI

print(f"External dataset shape: {df_external.shape}")
print(f"Date range: {df_external['date'].min()} to {df_external['date'].max()}")
print("\nFirst few rows:")
print(df_external.head())


Merging external datasets...
External dataset shape: (2164, 9)
Date range: 2020-01-01 00:00:00 to 2025-12-03 00:00:00

First few rows:
        date  DGS10  DGS2  DGS30   DFF  CPIAUCSL  yield_spread_10y_2y  \
0 2020-01-01    NaN   NaN    NaN  1.55   259.127                  NaN   
1 2020-01-02   1.88  1.58   2.33  1.55       NaN                 0.30   
2 2020-01-03   1.80  1.53   2.26  1.55       NaN                 0.27   
3 2020-01-04    NaN   NaN    NaN  1.55       NaN                  NaN   
4 2020-01-05    NaN   NaN    NaN  1.55       NaN                  NaN   

   yield_spread_30y_10y  cpi_yoy  
0                   NaN      NaN  
1                  0.45      NaN  
2                  0.46      NaN  
3                   NaN      NaN  
4                   NaN      NaN  


# Step 3b: Test Kalshi API Connection and Find Market

Let's test the API connection and verify the market ticker exists.


In [51]:
# Test Kalshi API connection and verify market ticker
print("Testing Kalshi API connection...\n")

# First, try to get the market info to verify the ticker exists
market_info = get_kalshi_market(TARGET_MARKET_TICKER)
if market_info:
    print(f"✓ Market found: {market_info.get('title', 'N/A')}")
    print(f"  Ticker: {market_info.get('ticker', TARGET_MARKET_TICKER)}")
    print(f"  Status: {market_info.get('status', 'N/A')}")
else:
    print(f"✗ Market '{TARGET_MARKET_TICKER}' not found or error accessing it")
    print("\nTrying to search for Treasury-related markets...")
    treasury_markets = search_kalshi_markets(query="treasury", limit=20)
    if not treasury_markets.empty:
        print(f"\nFound {len(treasury_markets)} Treasury-related markets:")
        print(treasury_markets[['ticker', 'title', 'status']].head(10))
    else:
        print("No Treasury markets found. Check your API credentials and base URL.")


Testing Kalshi API connection...

Fetching market info for: KXTNOTED-25DEC31-T3.93
Successfully fetched market info
✓ Market found: Will the yield of 10-year U.S. treasury notes be below 3.93 on Dec 31, 2025?
  Ticker: KXTNOTED-25DEC31-T3.93
  Status: active


In [52]:
# Fetch Kalshi market data (if available)
# Note: You'll need to find the actual market ticker for this specific market
print("Attempting to fetch Kalshi market data...")
# Uncomment and update ticker when you have it:
kalshi_data = get_kalshi_market_data(TARGET_MARKET_TICKER)
print(f"Fetched {len(kalshi_data)} trades from Kalshi")

# For now, we'll work with FRED data and simulate Kalshi probability distribution later
print("Using FRED data as primary dataset for now")


Attempting to fetch Kalshi market data...
Trying trades endpoint for market: KXTNOTED-25DEC31-T3.93
URL: https://api.elections.kalshi.com/trade-api/v2/markets/trades?ticker=KXTNOTED-25DEC31-T3.93&limit=1000
Fetched 267 trades from Kalshi
Using FRED data as primary dataset for now


# Step 4: Feature Engineering

Create features for the predictive model.


In [None]:
# Prepare features for modeling
def create_features(df):
    """Create time-based and lagged features"""
    df = df.copy()
    
    # Ensure we have the target variable
    if 'DGS10' not in df.columns:
        print("Warning: DGS10 not found in dataframe")
        return df
    
    # Time features
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day_of_week'] = df['date'].dt.dayofweek
    df['day_of_year'] = df['date'].dt.dayofyear
    
    # Lagged features (previous values)
    for lag in [1, 5, 10, 20, 30]:
        df[f'DGS10_lag_{lag}'] = df['DGS10'].shift(lag)
    
    # Rolling statistics
    for window in [5, 10, 20, 30, 60]:
        df[f'DGS10_ma_{window}'] = df['DGS10'].rolling(window=window).mean()
        df[f'DGS10_std_{window}'] = df['DGS10'].rolling(window=window).std()
    
    # Momentum features
    df['DGS10_change_1d'] = df['DGS10'].diff(1)
    df['DGS10_change_5d'] = df['DGS10'].diff(5)
    df['DGS10_change_20d'] = df['DGS10'].diff(20)
    
    # Volatility
    df['DGS10_volatility'] = df['DGS10'].rolling(window=20).std()
    
    return df

# Create features
df_features = create_features(df_external)
print(f"Feature dataset shape: {df_features.shape}")
print(f"Number of features: {len(df_features.columns) - 2}")  # Excluding 'date' and 'DGS10'


# Step 5: Build Predictive Model

We'll use a Random Forest model for its balance of accuracy and interpretability.


In [None]:
# Prepare data for modeling
# Remove rows with missing target or too many missing features
df_model = df_features.dropna(subset=['DGS10']).copy()

# Select feature columns (exclude date and target)
feature_cols = [col for col in df_model.columns 
                if col not in ['date', 'DGS10'] and df_model[col].notna().sum() > len(df_model) * 0.5]

# Fill remaining NaN values with forward fill then backward fill
df_model[feature_cols] = df_model[feature_cols].ffill().bfill()

# Final cleanup - remove any remaining NaN
df_model = df_model.dropna(subset=feature_cols + ['DGS10'])

print(f"Final dataset shape: {df_model.shape}")
print(f"Using {len(feature_cols)} features")
print(f"\nFeature list:")
for i, col in enumerate(feature_cols, 1):
    print(f"{i}. {col}")


In [None]:
# Split data into train and test sets
# Use 80% for training, 20% for testing
split_idx = int(len(df_model) * 0.8)
df_train = df_model.iloc[:split_idx].copy()
df_test = df_model.iloc[split_idx:].copy()

X_train = df_train[feature_cols]
y_train = df_train['DGS10']
X_test = df_test[feature_cols]
y_test = df_test['DGS10']

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"Training date range: {df_train['date'].min()} to {df_train['date'].max()}")
print(f"Test date range: {df_test['date'].min()} to {df_test['date'].max()}")


In [None]:
# Train Random Forest model
print("Training Random Forest model...")
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Evaluate model
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f"\nModel Performance:")
print(f"Training MAE: {train_mae:.4f}%")
print(f"Test MAE: {test_mae:.4f}%")
print(f"Training RMSE: {train_rmse:.4f}%")
print(f"Test RMSE: {test_rmse:.4f}%")

# Feature importance
feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print(f"\nTop 10 Most Important Features:")
print(feature_importance.head(10))


# Step 6: Make Prediction for December 12, 2025

Now we'll use the model to predict the 10-year Treasury yield on the target date.


In [None]:
# Create prediction for December 12, 2025
target_date = datetime(2025, 12, 12)

# Get the most recent data point to use as base
latest_data = df_model.iloc[-1].copy()

# Create a prediction row with the latest available features
# For future prediction, we'll use the most recent values and adjust time features
pred_row = latest_data[feature_cols].copy()

# Update time features for target date
pred_row['year'] = target_date.year
pred_row['month'] = target_date.month
pred_row['day_of_week'] = target_date.weekday()
pred_row['day_of_year'] = target_date.timetuple().tm_yday

# Convert to DataFrame for prediction
X_pred = pd.DataFrame([pred_row])

# Make prediction
predicted_yield = model.predict(X_pred)[0]

print(f"Predicted 10-Year Treasury Yield on {target_date.strftime('%B %d, %Y')}: {predicted_yield:.4f}%")
print(f"\nNote: This is a point estimate. We'll create a probability distribution next.")


# Step 7: Extract Probability Distribution from Kalshi Ladder Prices

The requirement is to produce a probability distribution based on Kalshi ladder prices.
This function will extract probabilities from the orderbook.


In [None]:
def extract_probability_distribution_from_kalshi(orderbook_data):
    """
    Extract probability distribution from Kalshi orderbook ladder prices.
    
    Kalshi markets are typically structured as binary options with different strike prices.
    The prices represent implied probabilities.
    """
    if orderbook_data is None:
        print("No orderbook data available")
        return None
    
    # Kalshi orderbook structure may vary - adjust based on actual API response
    # Typically contains: yes_bids, yes_asks, no_bids, no_asks for each strike
    probabilities = []
    
    # Example structure (adjust based on actual API):
    # For each ladder rung/strike:
    #   - yes_price represents probability of that outcome
    #   - no_price = 1 - yes_price
    
    try:
        # If orderbook has ladder structure
        if 'ladder' in orderbook_data:
            for rung in orderbook_data['ladder']:
                strike = rung.get('strike', rung.get('outcome', None))
                yes_price = rung.get('yes_bid', rung.get('yes_price', None))
                if yes_price is not None:
                    probabilities.append({
                        'yield_level': strike,
                        'probability': yes_price
                    })
        
        # Alternative: if orderbook has separate yes/no markets
        elif 'yes_bids' in orderbook_data or 'yes_asks' in orderbook_data:
            # Calculate mid-price as probability estimate
            yes_bid = orderbook_data.get('yes_bids', [{}])[0].get('price', 0) if orderbook_data.get('yes_bids') else 0
            yes_ask = orderbook_data.get('yes_asks', [{}])[0].get('price', 1) if orderbook_data.get('yes_asks') else 1
            prob = (yes_bid + yes_ask) / 2
            probabilities.append({
                'yield_level': 'target',
                'probability': prob
            })
        
        return pd.DataFrame(probabilities)
    
    except Exception as e:
        print(f"Error extracting probabilities: {e}")
        return None

# If you have Kalshi orderbook data, uncomment:
# orderbook = get_kalshi_orderbook(TARGET_MARKET_TICKER)
# prob_dist = extract_probability_distribution_from_kalshi(orderbook)
# print(prob_dist)

print("Probability distribution extraction function defined")


In [None]:
# Create a simulated probability distribution based on model uncertainty
# In practice, you'll replace this with actual Kalshi ladder prices

def create_probability_distribution_from_model(predicted_yield, uncertainty=0.5):
    """
    Create a probability distribution around the predicted yield.
    In production, this should come from Kalshi ladder prices.
    """
    # Generate yield levels around prediction
    yield_levels = np.arange(predicted_yield - 2, predicted_yield + 2, 0.1)
    
    # Use normal distribution centered on prediction
    probabilities = np.exp(-0.5 * ((yield_levels - predicted_yield) / uncertainty) ** 2)
    probabilities = probabilities / probabilities.sum()  # Normalize
    
    prob_dist = pd.DataFrame({
        'yield_level': yield_levels,
        'probability': probabilities
    })
    
    return prob_dist

# Create distribution
prob_dist = create_probability_distribution_from_model(predicted_yield, uncertainty=0.3)

print("Probability Distribution (simulated - replace with Kalshi data):")
print(prob_dist.head(10))
print(f"\nExpected value: {(prob_dist['yield_level'] * prob_dist['probability']).sum():.4f}%")
print(f"Standard deviation: {np.sqrt(((prob_dist['yield_level'] - predicted_yield)**2 * prob_dist['probability']).sum()):.4f}%")


In [None]:
# Visualize the probability distribution
plt.figure(figsize=(12, 6))
plt.plot(prob_dist['yield_level'], prob_dist['probability'], linewidth=2, label='Probability Distribution')
plt.axvline(predicted_yield, color='r', linestyle='--', linewidth=2, label=f'Predicted: {predicted_yield:.2f}%')
plt.xlabel('10-Year Treasury Yield (%)', fontsize=12)
plt.ylabel('Probability', fontsize=12)
plt.title('Probability Distribution for 10-Year Treasury Yield on Dec 12, 2025', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


# Step 8: Model Evaluation and Visualization


In [None]:
# Plot predictions vs actuals
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Plot 1: Time series of actual vs predicted
ax1 = axes[0]
ax1.plot(df_test['date'], y_test, label='Actual', linewidth=2, alpha=0.7)
ax1.plot(df_test['date'], y_test_pred, label='Predicted', linewidth=2, alpha=0.7)
ax1.set_xlabel('Date', fontsize=12)
ax1.set_ylabel('10-Year Treasury Yield (%)', fontsize=12)
ax1.set_title('Model Performance: Actual vs Predicted (Test Set)', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Scatter plot
ax2 = axes[1]
ax2.scatter(y_test, y_test_pred, alpha=0.5)
ax2.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
ax2.set_xlabel('Actual Yield (%)', fontsize=12)
ax2.set_ylabel('Predicted Yield (%)', fontsize=12)
ax2.set_title('Prediction Accuracy', fontsize=14)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


# Step 9: Final Summary

## Key Results:
1. **Model Type**: Random Forest Regressor
2. **Primary External Data**: FRED (10-year, 2-year, 30-year yields, Fed Funds Rate, CPI)
3. **Predicted Yield**: [See output above]
4. **Probability Distribution**: [See visualization above]

## Next Steps:
1. **Get Kalshi API credentials** and update the authentication
2. **Find the exact market ticker** for "Treasury 10-Year Yield on Friday, December 12, 2025"
3. **Fetch actual Kalshi market data** and replace simulated probability distribution
4. **Refine model** with additional features or different algorithms if needed
5. **Write 1-page methodology report**

## To Improve Accuracy:
- Add more external datasets (CME futures, credit spreads, etc.)
- Include macroeconomic forecasts
- Use ensemble methods
- Incorporate Kalshi market prices as features


In [None]:
# Save key results for report
results = {
    'target_date': '2025-12-12',
    'predicted_yield': float(predicted_yield),
    'model_type': 'RandomForestRegressor',
    'test_mae': float(test_mae),
    'test_rmse': float(test_rmse),
    'top_features': feature_importance.head(10).to_dict('records'),
    'probability_distribution': prob_dist.to_dict('records')
}

# Save to JSON for easy access
with open('model_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to model_results.json")
print(f"\n=== FINAL PREDICTION ===")
print(f"10-Year Treasury Yield on December 12, 2025: {predicted_yield:.4f}%")
print(f"Model Test MAE: {test_mae:.4f}%")
print(f"Model Test RMSE: {test_rmse:.4f}%")
