<a href="https://colab.research.google.com/github/semunyujunior/WhatsAppcode/blob/main/NFPpredictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas numpy scikit-learn xgboost lightgbm shap matplotlib seaborn



In [None]:
# 🚀 PRO NFP MODEL - REGRESSION PIPELINE
# Built with Victor - Fund-grade NFP signal
# Split version: REGRESSION

# ==========================================
# STEP 1 — IMPORT LIBRARIES
# ==========================================

# Core
import pandas as pd
import numpy as np

# ML
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Explainability
import shap
import matplotlib.pyplot as plt
import seaborn as sns

# Misc
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries loaded 🚀")


# ==========================================
# STEP 2 — LOAD DATA
# ==========================================
# 👉 You can modify this path to your current CSV or DataFrame!

# Example: Load your pre-engineered feature dataset
# (For demo here I'll create a dummy DataFrame — replace with your real one!)

# Example columns used in the project:
feature_columns = [
    'Delta_Prev_Forecast',
    'Momentum',
    'Month',
    'Quarter',
    'Lag1_NFP',
    'Lag2_NFP',
    'Rolling_Vol_3M',
    'Rolling_Vol_6M',
    'ADP_Pct_Change',
    'Jobless_4Wk_Avg',
    'ISM_PMI_1M_Change',
    'ISM_PMI_3M_Avg',
    'Yield_10Y_Change'
]

# Load your dataset:
df = pd.read_csv('YOUR_FEATURE_DATASET.csv')  # Replace with your feature dataset path!

# Target: Classification → NFP Higher (1) / Lower (0)
target_class = 'Target_Classification'  # Your target column

# Features & Target
X = df[feature_columns]
y = df[target_class]

print("✅ Data loaded — Shape:", X.shape)


# ==========================================
# STEP 3 — TRAIN/TEST SPLIT
# ==========================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print(f"✅ Split done — Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")



# ==========================================
# STEP 4 — XGBOOST CLASSIFIER + TUNING
# ==========================================

xgb_clf = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

xgb_param_grid = {
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1]
}

xgb_search = RandomizedSearchCV(
    estimator=xgb_clf,
    param_distributions=xgb_param_grid,
    n_iter=20,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1,
    random_state=42
)

xgb_search.fit(X_train, y_train)

best_xgb = xgb_search.best_estimator_
print("✅ XGBoost Classifier tuned — Best params:", xgb_search.best_params_)


# ==========================================
# STEP 5 — LIGHTGBM CLASSIFIER + TUNING
# ==========================================

lgbm_clf = LGBMClassifier(random_state=42)

lgbm_param_grid = {
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1]
}

lgbm_search = RandomizedSearchCV(
    estimator=lgbm_clf,
    param_distributions=lgbm_param_grid,
    n_iter=20,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1,
    random_state=42
)

lgbm_search.fit(X_train, y_train)

best_lgbm = lgbm_search.best_estimator_
print("✅ LightGBM Classifier tuned — Best params:", lgbm_search.best_params_)


# ==========================================
# STEP 6 — STACKED CLASSIFIER
# ==========================================

stacked_clf = StackingClassifier(
    estimators=[
        ('xgb', best_xgb),
        ('lgbm', best_lgbm)
    ],
    final_estimator=XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
    cv=5,
    n_jobs=-1
)

stacked_clf.fit(X_train, y_train)

print("✅ Stacked Classifier trained 🚀")



# ==========================================
# STEP 7 — EVALUATION
# ==========================================

# Predict
y_pred = stacked_clf.predict(X_test)

# Metrics
acc = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("🎯 STACKED CLASSIFIER PERFORMANCE:")
print("Accuracy:", acc)
print(report)

# Confusion Matrix Plot
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix — Stacked Classifier')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()



# ==========================================
# STEP 8 — SHAP ANALYSIS (Explainability)
# ==========================================

# Use SHAP on best base model (XGBoost is common)
explainer = shap.Explainer(best_xgb, X_train)
shap_values = explainer(X_test)

# Summary Plot
shap.summary_plot(shap_values, X_test, plot_type="bar")

# Full SHAP plot (optional — big)
# shap.summary_plot(shap_values, X_test)

# ==========================================
# STEP 9 — FINAL CELL → MANUAL INPUT FOR MONTHLY PREDICTION
# ==========================================

# 👉 You enter latest macro values → model predicts HIGHER or LOWER 🚀

# Example — replace with YOUR latest data:
manual_input = pd.DataFrame([{
    'Delta_Prev_Forecast': -5000,
    'Momentum': 3000,
    'Month': 6,
    'Quarter': 2,
    'Lag1_NFP': 250000,
    'Lag2_NFP': 240000,
    'Rolling_Vol_3M': 18000,
    'Rolling_Vol_6M': 22000,
    'ADP_Pct_Change': 0.05,
    'Jobless_4Wk_Avg': 215000,
    'ISM_PMI_1M_Change': 1.2,
    'ISM_PMI_3M_Avg': 53.5,
    'Yield_10Y_Change': 0.10
}])

# Predict
manual_pred = stacked_clf.predict(manual_input)
manual_prob = stacked_clf.predict_proba(manual_input)

# Output
print("🚀 FINAL MONTHLY PREDICTION:")
print("Predicted Class (1 = Higher, 0 = Lower):", manual_pred[0])
print("Probability (Lower / Higher):", manual_prob[0])




✅ Libraries loaded 🚀


FileNotFoundError: [Errno 2] No such file or directory: 'YOUR_FEATURE_DATASET.csv'