In [1]:
import joblib
import pandas as pd
import numpy as np

### --- Step A: Load Both Models (FIXED) ---
print("Loading models...")
# FIX 1: Load the correct HMM model file
hmm_model = joblib.load('hmm_regime_classifier_v1.joblib')

# Load the "Forecaster"
xgb_vol_model = joblib.load('volatility_xgb_no_persistence_v1.joblib')

print("‚úÖ Models loaded successfully.")

### --- Step B: Create Sample New Data (FIXED) ---
# FIX 2: Added all missing columns that the xgb_vol_model 
# pipeline needs to see (even the ones it will drop).
new_data = pd.DataFrame({
    # --- HMM features ---
    'Return_Pct': [0.1],
    'Realized_Vol_10d': [1.4],
    
    # --- "Lazy" features (pipeline will drop) ---
    'Realized_Vol_20d': [1.5],
    'Volatility_Ratio': [1.0],
    
    # --- Secondary features (pipeline will use) ---
    'VIX': [19.0],
    'Price_ZScore': [0.5],
    'Return_Pct_lag_1': [0.1],
    'Return_Pct_lag_2': [-0.2],
    'Return_Pct_lag_3': [0.3],
    'Ticker': ['AAPL'],
    
    # --- All other columns from the previous error ---
    'CPI': [250.0],
    'Unemployment_Rate': [3.5],
    'Close': [152.0],
    'Momentum_Ratio_S_M': [1.1],
    'Volume_MA_20': [50000000.0],
    'Price_Range_Pct': [1.2],
    'Volume_Ratio': [0.9],
    'Momentum_5d': [2.5],
    'MA_20': [148.0],
    'Fed_Funds_Rate': [0.25],
    'Volume': [60000000.0],
    'Momentum_Ratio_M_L': [1.0],
    'Open': [151.5],
    'High': [153.0],
    'MA_200': [140.0],
    'GDP': [20000.0],
    'MA_50': [150.2],
    'Yield_Curve_10Y_2Y': [0.15],
    'Price_Range': [1.5],
    'Low': [151.0]
})

print("\nSample Data (with all columns):")
print(new_data.head())

### --- Step C: Run the 2-Step Prediction ---

# 1. Use HMM to get the 'regime'
#    FIX 3: The real HMM model wants a NumPy array, not a DataFrame.
hmm_features = new_data[['Return_Pct', 'Realized_Vol_10d']].values
predicted_regimes = hmm_model.predict(hmm_features)

# 2. Add the new 'regime' feature to your data
new_data['regime'] = predicted_regimes
print(f"\nAdded 'regime' feature: {predicted_regimes}")

# 3. Predict Volatility
#    This will now work. The pipeline will:
#    a) Drop the 'lazy' vol features ('Realized_Vol_10d', etc.)
#    b) Keep the secondary features ('VIX', 'Price_ZScore', etc.)
#    c) OneHotEncode 'Ticker'
#    d) Drop the new 'regime' column (because 'remainder=drop'
#       was set in your pipeline and it wasn't trained on it)
vol_prediction = xgb_vol_model.predict(new_data)

print("\n--- üìà Final Volatility Prediction ---")
for i, ticker in enumerate(new_data['Ticker']):
    print(f"  {ticker}: Predicted Volatility = {vol_prediction[i]:.4f}")

Loading models...
‚úÖ Models loaded successfully.

Sample Data (with all columns):
   Return_Pct  Realized_Vol_10d  Realized_Vol_20d  Volatility_Ratio   VIX  \
0         0.1               1.4               1.5               1.0  19.0   

   Price_ZScore  Return_Pct_lag_1  Return_Pct_lag_2  Return_Pct_lag_3 Ticker  \
0           0.5               0.1              -0.2               0.3   AAPL   

   ...      Volume  Momentum_Ratio_M_L   Open   High  MA_200      GDP  MA_50  \
0  ...  60000000.0                 1.0  151.5  153.0   140.0  20000.0  150.2   

   Yield_Curve_10Y_2Y  Price_Range    Low  
0                0.15          1.5  151.0  

[1 rows x 30 columns]

Added 'regime' feature: [0]

--- üìà Final Volatility Prediction ---
  AAPL: Predicted Volatility = 1.5371


In [2]:
import joblib
import pandas as pd
import numpy as np

### --- Step A: Load the Model ---
print("Loading model...")
returns_model = joblib.load('returns_xgb_global_tuned_v1.joblib')
print("‚úÖ Model loaded successfully.")

### --- Step B: Create Sample New Data (FIXED) ---
# This data MUST have *all* the columns the model was trained on.
# The error message listed all the missing ones.

new_data_returns = pd.DataFrame({
    # --- Original 6 Columns ---
    'Return_Pct_lag_1': [0.1],
    'Return_Pct_lag_2': [-0.2],
    'VIX': [19.0],
    'MA_50': [150.2],
    'MA_200': [140.0],
    'Ticker': ['AAPL'],
    
    # --- Columns from the ValueError (with dummy values) ---
    'Volatility_Ratio': [1.0],
    'CPI': [250.0],
    'Unemployment_Rate': [3.5],
    'Close': [152.0],
    'Momentum_Ratio_S_M': [1.1],
    'Return_Pct': [0.1], 
    'Realized_Vol_20d': [1.5],
    'Volume_MA_20': [50000000.0],
    'Price_Range_Pct': [1.2],
    'Volume_Ratio': [0.9],
    'Momentum_5d': [2.5],
    'MA_20': [148.0],
    'Fed_Funds_Rate': [0.25],
    'Realized_Vol_10d': [1.4],
    'Volume': [60000000.0],
    'Return_Pct_lag_3': [0.3],
    'Momentum_Ratio_M_L': [1.0],
    'Price_ZScore': [0.5],
    'Open': [151.5],
    'High': [153.0],
    'GDP': [20000.0],
    'Yield_Curve_10Y_2Y': [0.15],
    'Price_Range': [1.5],
    'Low': [151.0]
    
    # NOTE: Your training code excluded 'Return_MA_20',
    # so we correctly leave it out of this list.
})

print("\nSample Data (with all columns):")
print(new_data_returns.head())

### --- Step C: Run the Prediction ---
# This will now work
return_prediction = returns_model.predict(new_data_returns)

print("\n--- üí∏ Final Returns Prediction ---")
for i, ticker in enumerate(new_data_returns['Ticker']):
    prediction = return_prediction[i]
    direction = "Up" if prediction > 0 else "Down/Flat"
    print(f"  {ticker}: Raw Score = {prediction:.4f}  (Predicted: {direction})")

Loading model...
‚úÖ Model loaded successfully.

Sample Data (with all columns):
   Return_Pct_lag_1  Return_Pct_lag_2   VIX  MA_50  MA_200 Ticker  \
0               0.1              -0.2  19.0  150.2   140.0   AAPL   

   Volatility_Ratio    CPI  Unemployment_Rate  Close  ...      Volume  \
0               1.0  250.0                3.5  152.0  ...  60000000.0   

   Return_Pct_lag_3  Momentum_Ratio_M_L  Price_ZScore   Open   High      GDP  \
0               0.3                 1.0           0.5  151.5  153.0  20000.0   

   Yield_Curve_10Y_2Y  Price_Range    Low  
0                0.15          1.5  151.0  

[1 rows x 30 columns]

--- üí∏ Final Returns Prediction ---
  AAPL: Raw Score = 0.0713  (Predicted: Up)


In [3]:
import joblib
import pandas as pd
import numpy as np

### --- Step A: Load the Model ---
print("Loading HMM model...")
hmm_model = joblib.load('hmm_regime_classifier_v1.joblib')
print("‚úÖ Model loaded successfully.")

### --- Step B: Create Sample Data ---
# This model *only* wants the 2 features it was trained on.
hmm_features = np.array([
    [0.1, 1.2],  # Day 1: Low return, low vol
    [-2.5, 3.0], # Day 2: High return, high vol
    [0.2, 1.0],  # Day 3: Low return, low vol
    [1.5, 2.8]   # Day 4: High return, high vol
])

print("\nSample Data:")
print(hmm_features)

### --- Step C: Run the Prediction ---
# The output is an array of 0s and 1s.
predicted_regimes = hmm_model.predict(hmm_features)

print("\n--- üå¶Ô∏è Final Regime Predictions ---")
print(predicted_regimes)
# (Assuming 0=Low-Vol, 1=High-Vol)
# Output: [0 1 0 1]

Loading HMM model...
‚úÖ Model loaded successfully.

Sample Data:
[[ 0.1  1.2]
 [-2.5  3. ]
 [ 0.2  1. ]
 [ 1.5  2.8]]

--- üå¶Ô∏è Final Regime Predictions ---
[0 1 1 1]


In [2]:
import joblib
import pandas as pd
import numpy as np
import warnings

# Suppress warnings from scikit-learn/xgboost about feature names
warnings.filterwarnings('ignore', category=UserWarning)

### --- Step A: Load All Three Models (Your NEW, FIXED models) ---
print("--- Loading NEWLY-TRAINED, FIXED MODELS ---")
try:
    hmm_model = joblib.load('hmm_regime_classifier_v1.joblib')
    xgb_vol_model = joblib.load('volatility_xgb_no_persistence_v1.joblib')
    xgb_ret_model = joblib.load('returns_xgb_global_tuned_v1.joblib')
    print("‚úÖ Models loaded successfully.")
except FileNotFoundError as e:
    print(f"‚ùå FAILED TO LOAD MODEL: {e}")
    print("Make sure your .joblib files are in the same directory as this script.")
    exit()

### --- Step B: Create Sample New Data (Using the FIXED scale) ---
# This data is now on the *correct* decimal scale,
# matching your new training data.
new_data = pd.DataFrame({
    # --- HMM features (correct scale) ---
    'Return_Pct': [0.0011],         # +0.11%
    'Realized_Vol_10d': [0.0108],   # 1.08% daily vol
    
    # --- "Lazy" features (pipeline will drop) ---
    'Realized_Vol_20d': [0.0113],
    'Volatility_Ratio': [0.9540],
    
    # --- Secondary features (pipeline will use) ---
    'VIX': [19.8200],
    'Price_ZScore': [-5.0000], # This was -5.0 in your data, let's use it
    'Return_Pct_lag_1': [np.nan], # Use NaN to simulate start
    'Return_Pct_lag_2': [np.nan],
    'Return_Pct_lag_3': [np.nan],
    'Ticker': ['GOOGL'],
    
    # --- All other columns (from your 2011-03-08 GOOGL row) ---
    'CPI': [223.0460],
    'Unemployment_Rate': [9.0000],
    'Close': [14.7210],
    'Momentum_Ratio_S_M': [1.0008],
    'Volume_MA_20': [97045457.4],
    'Price_Range_Pct': [0.0131],
    'Volume_Ratio': [0.9408],
    'Momentum_5d': [-0.0141],
    'MA_20': [15.2317],
    'Fed_Funds_Rate': [0.1400],
    'Volume': [91304604.0],
    'Momentum_Ratio_M_L': [1.0111],
    'Open': [14.7364],
    'High': [14.8620],
    'MA_200': [15.0527],
    'GDP': [15351.4480],
    'MA_50': [15.2203],
    'Yield_Curve_10Y_2Y': [2.8300],
    'Price_Range': [0.1934],
    'Low': [14.6686],
    'year': [2011],
    'month': [3],
    'day_of_week': [1],
    'quarter': [1]
})

print("\n--- Sample Data (Correctly Scaled) ---")

### --- Step C: Run the Full Prediction Sequence ---
try:
    # 1. Use HMM to get the 'regime'
    hmm_features = new_data[['Return_Pct', 'Realized_Vol_10d']].values
    # Handle potential NaNs for the first row
    hmm_features = np.nan_to_num(hmm_features) 
    predicted_regimes = hmm_model.predict(hmm_features)

    # 2. Add the new 'regime' feature to your data
    new_data['regime'] = predicted_regimes
    print(f"\n[Step 1] Predicted 'regime': {predicted_regimes[0]}")

    # 3. Predict Volatility
    vol_prediction = xgb_vol_model.predict(new_data)
    
    # 4. Predict Returns
    ret_prediction = xgb_ret_model.predict(new_data)

    print("\n--- üìà Final SANE Predictions ---")
    
    # --- Check Volatility Prediction ---
    # The raw number should be a small decimal (like 0.0107)
    vol_pred_raw = vol_prediction[0]
    print(f"  Volatility Forecast (Raw Decimal): {vol_pred_raw:.6f}")
    if abs(vol_pred_raw) < 0.1: # Check for a sane decimal
        print("  -> ‚úÖ SANITY CHECK PASSED: Volatility is realistic.")
    else:
        print("  -> ‚ùå SANITY CHECK FAILED: Volatility is still a large number.")

    # --- Check Returns Prediction ---
    # The raw number should be a small decimal (like -0.0009)
    ret_pred_raw = ret_prediction[0]
    ret_pct = ret_pred_raw * 100 # Convert to percentage for display
    
    print(f"  Next-Day Return Forecast: {ret_pct:+.2f}% (Raw: {ret_pred_raw:.6f})")
    if abs(ret_pct) < 10: # A 10% move in *one day* is insane
        print("  -> ‚úÖ SANITY CHECK PASSED: Return forecast is realistic.")
    else:
        print("  -> ‚ùå SANITY CHECK FAILED: Return forecast is still unrealistic.")

except Exception as e:
    print(f"\n--- üí• PREDICTION FAILED ---")
    print(f"Error: {e}")
    print("\nThis likely means the 'new_data' is missing a column.")
    import traceback
    traceback.print_exc()

--- Loading NEWLY-TRAINED, FIXED MODELS ---
‚úÖ Models loaded successfully.

--- Sample Data (Correctly Scaled) ---

[Step 1] Predicted 'regime': 0

--- üìà Final SANE Predictions ---
  Volatility Forecast (Raw Decimal): 0.034191
  -> ‚úÖ SANITY CHECK PASSED: Volatility is realistic.
  Next-Day Return Forecast: -0.08% (Raw: -0.000793)
  -> ‚úÖ SANITY CHECK PASSED: Return forecast is realistic.
