# Complete Quantitative Trading System - Main Execution

This notebook executes the complete quantitative trading pipeline from data acquisition to backtesting.

**Pipeline:**
1. Data Acquisition
2. Feature Engineering (EMA + Technical Indicators)
3. Outlier Detection and Handling
4. HMM Regime Detection
5. ML Model Training (XGBoost, LightGBM, Neural Networks)
6. Signal Generation
7. Backtesting
8. Performance Analysis

**Author:** Senior Quantitative Researcher  
**Date:** January 2026  
**Market:** Indian Equity Markets (NIFTY)

In [None]:
# Import libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
from src.data_acquisition import DataAcquisition
from src.feature_engineering import FeatureEngineer
from src.outlier_detection import OutlierDetector
from src.hmm_regime import HMMRegimeDetector
from src.ml_models import MLModelTrainer
from src.backtesting import BacktestEngine
from src.visualization import Visualizer
from src.utils import (
    load_config, setup_logging, split_train_test_by_date,
    save_dataframe, load_dataframe
)

# Setup
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')
logger = setup_logging(log_level='INFO')

print("="*60)
print("QUANTITATIVE TRADING SYSTEM")
print("Production-Grade Implementation for Indian Markets")
print("="*60)
print("\nAll modules loaded successfully!\n")

## 1. Configuration and Setup

In [None]:
# Load configuration
config = load_config('../configs/config.yaml')

# Extract parameters
SYMBOL = config['data']['symbol']
START_DATE = config['data']['start_date']
END_DATE = config['data']['end_date']
INITIAL_CAPITAL = config['backtesting']['initial_capital']

print(f"Configuration Loaded:")
print(f"  Symbol: {SYMBOL}")
print(f"  Period: {START_DATE} to {END_DATE}")
print(f"  Initial Capital: ‚Çπ{INITIAL_CAPITAL:,.0f}")
print(f"  HMM States: {config['hmm']['n_states']}")
print(f"  Strategy: {config['strategy']['method']}")

## 2. Data Acquisition

In [None]:
print("\n" + "="*60)
print("STEP 1: DATA ACQUISITION")
print("="*60)

# Initialize data acquisition
da = DataAcquisition(
    symbol=SYMBOL,
    start_date=START_DATE,
    end_date=END_DATE,
    source=config['data']['source']
)

# Fetch data
df_raw = da.fetch_data()

print(f"\nData Shape: {df_raw.shape}")
print(f"Date Range: {df_raw.index[0].date()} to {df_raw.index[-1].date()}")
print(f"\nColumns: {list(df_raw.columns)}")

# Save raw data
save_dataframe(df_raw, '../data/raw/nifty_raw.csv')
print("\n‚úì Raw data saved")

df_raw.tail()

## 3. Feature Engineering

In [None]:
print("\n" + "="*60)
print("STEP 2: FEATURE ENGINEERING")
print("="*60)

# Initialize feature engineer
fe = FeatureEngineer(df_raw)

# Create all features
df_features = fe.create_all_features(
    ema_periods=config['features']['ema_periods'],
    add_lagged=True
)

print(f"\nFeature Engineering Complete!")
print(f"Original columns: {len(df_raw.columns)}")
print(f"Total features: {len(df_features.columns)}")
print(f"Data shape: {df_features.shape}")

# Display feature categories
print(f"\nFeature Categories:")
print(f"  EMA features: {len(fe.get_feature_names('ema'))}")
print(f"  Momentum indicators: {len(fe.get_feature_names('momentum'))}")
print(f"  Volatility indicators: {len(fe.get_feature_names('volatility'))}")
print(f"  Volume features: {len(fe.get_feature_names('volume'))}")
print(f"  Price features: {len(fe.get_feature_names('price'))}")

# Save features
save_dataframe(df_features, '../data/processed/nifty_features.csv')
print("\n‚úì Features saved")

df_features.tail()

## 4. Outlier Detection and Handling

In [None]:
print("\n" + "="*60)
print("STEP 3: OUTLIER DETECTION")
print("="*60)

# Initialize outlier detector
outlier_detector = OutlierDetector(df_features)

# Detect all outliers
outliers = outlier_detector.detect_all()

# Display summary
print("\nOutlier Detection Summary:")
print(outlier_detector.get_summary())

# Handle outliers
df_clean = outlier_detector.handle_outliers(
    method=config['outliers']['handling']['method'],
    columns=['returns', 'volume'],
    percentile=tuple(config['outliers']['handling']['percentile'])
)

print(f"\nData shape after outlier handling: {df_clean.shape}")

# Save cleaned data
save_dataframe(df_clean, '../data/processed/nifty_clean.csv')
print("‚úì Cleaned data saved")

## 5. HMM Regime Detection

In [None]:
print("\n" + "="*60)
print("STEP 4: HMM REGIME DETECTION")
print("="*60)

# Initialize HMM
hmm_detector = HMMRegimeDetector(
    n_states=config['hmm']['n_states'],
    n_iter=config['hmm']['n_iter'],
    random_state=config['hmm']['random_state']
)

# Fit and predict regimes
df_with_regime = hmm_detector.fit_predict(
    df_clean,
    feature_columns=config['hmm']['features']
)

print("\nRegime Detection Complete!")
print(f"\nRegime Distribution:")
print(df_with_regime['regime_label'].value_counts())

# Analyze transitions
print("\nRegime Transition Matrix:")
transitions = hmm_detector.get_regime_transitions(df_with_regime)
print(transitions)

# Current regime
current_regime = hmm_detector.get_current_regime(df_with_regime)
print(f"\nCurrent Market Regime: {current_regime['regime_label']}")

# Save model and data
hmm_detector.save('../models/hmm_regime_model.pkl')
save_dataframe(df_with_regime, '../data/processed/nifty_with_regime.csv')
print("\n‚úì HMM model and data saved")

## 6. Prepare ML Training Data

In [None]:
print("\n" + "="*60)
print("STEP 5: ML DATA PREPARATION")
print("="*60)

# Create target variable
ml_trainer = MLModelTrainer(
    model_type='xgboost',
    task='classification',
    random_state=config['execution']['random_state']
)

df_ml = ml_trainer.create_target(
    df_with_regime,
    method=config['ml_models']['target']['method'],
    horizon=config['ml_models']['target']['horizon']
)

print(f"ML data shape: {df_ml.shape}")

# Split data
train_df, val_df, test_df = split_train_test_by_date(
    df_ml,
    train_ratio=config['ml_models']['train_ratio'],
    validation_ratio=config['ml_models']['validation_ratio']
)

print(f"\nData Split:")
print(f"  Training: {len(train_df)} samples ({train_df.index[0].date()} to {train_df.index[-1].date()})")
print(f"  Validation: {len(val_df)} samples ({val_df.index[0].date()} to {val_df.index[-1].date()})")
print(f"  Test: {len(test_df)} samples ({test_df.index[0].date()} to {test_df.index[-1].date()})")

# Prepare features and target
X_train, y_train = ml_trainer.prepare_data(train_df, 'target')
X_val, y_val = ml_trainer.prepare_data(val_df, 'target')
X_test, y_test = ml_trainer.prepare_data(test_df, 'target')

print(f"\nFeature shape: {X_train.shape}")
print(f"Number of features: {len(ml_trainer.feature_columns)}")

## 7. Train ML Models

In [None]:
print("\n" + "="*60)
print("STEP 6: ML MODEL TRAINING")
print("="*60)

# Train XGBoost
print("\nTraining XGBoost...")
xgb_trainer = MLModelTrainer('xgboost', 'classification', random_state=42)
xgb_trainer.feature_columns = ml_trainer.feature_columns
xgb_metrics = xgb_trainer.train(
    X_train, y_train, X_val, y_val,
    hyperparameters=config['ml_models']['xgboost']
)
print(f"XGBoost Metrics: {xgb_metrics}")
xgb_trainer.save('../models/xgboost_model.pkl')

# Train LightGBM
print("\nTraining LightGBM...")
lgb_trainer = MLModelTrainer('lightgbm', 'classification', random_state=42)
lgb_trainer.feature_columns = ml_trainer.feature_columns
lgb_metrics = lgb_trainer.train(
    X_train, y_train, X_val, y_val,
    hyperparameters=config['ml_models']['lightgbm']
)
print(f"LightGBM Metrics: {lgb_metrics}")
lgb_trainer.save('../models/lightgbm_model.pkl')

# Train Neural Network
print("\nTraining Neural Network...")
nn_trainer = MLModelTrainer('neural_network', 'classification', random_state=42)
nn_trainer.feature_columns = ml_trainer.feature_columns
nn_metrics = nn_trainer.train(
    X_train, y_train, X_val, y_val,
    hyperparameters=config['ml_models']['neural_network']
)
print(f"Neural Network Metrics: {nn_metrics}")
nn_trainer.save('../models/nn_model.pkl')

# Compare models
print("\n" + "="*60)
print("MODEL COMPARISON")
print("="*60)
comparison = pd.DataFrame({
    'Model': ['XGBoost', 'LightGBM', 'Neural Network'],
    'Accuracy': [xgb_metrics['accuracy'], lgb_metrics['accuracy'], nn_metrics['accuracy']],
    'Precision': [xgb_metrics['precision'], lgb_metrics['precision'], nn_metrics['precision']],
    'Recall': [xgb_metrics['recall'], lgb_metrics['recall'], nn_metrics['recall']],
    'F1-Score': [xgb_metrics['f1'], lgb_metrics['f1'], nn_metrics['f1']],
    'Val_Accuracy': [xgb_metrics['val_accuracy'], lgb_metrics['val_accuracy'], nn_metrics['val_accuracy']]
})
print(comparison)

# Feature importance (XGBoost)
print("\nTop 10 Important Features (XGBoost):")
print(xgb_trainer.get_feature_importance(top_n=10))

print("\n‚úì All models trained and saved")

## 8. Generate Trading Signals

In [None]:
print("\n" + "="*60)
print("STEP 7: SIGNAL GENERATION")
print("="*60)

# Generate predictions for test set
xgb_pred = xgb_trainer.predict_proba(X_test)
lgb_pred = lgb_trainer.predict_proba(X_test)
nn_pred = nn_trainer.predict_proba(X_test)

# Ensemble predictions (weighted average)
weights = config['ml_models']['ensemble']['weights']
ensemble_pred = (
    weights['xgboost'] * xgb_pred +
    weights['lightgbm'] * lgb_pred +
    weights['neural_network'] * nn_pred
)

# Create signals based on probability threshold
threshold = config['strategy']['thresholds']['ml_probability']
signals = pd.Series(0, index=test_df.index)
signals[ensemble_pred > threshold] = 1  # Buy
signals[ensemble_pred < (1 - threshold)] = -1  # Sell

print(f"\nSignal Distribution:")
print(f"  Buy signals: {(signals == 1).sum()}")
print(f"  Sell signals: {(signals == -1).sum()}")
print(f"  Hold signals: {(signals == 0).sum()}")

# Apply regime filter if configured
if config['strategy']['use_regime_filter']:
    allowed_regimes = config['strategy']['trade_regimes']
    regime_mask = test_df['regime_label'].isin(allowed_regimes)
    signals[~regime_mask] = 0
    print(f"\nAfter regime filter:")
    print(f"  Buy signals: {(signals == 1).sum()}")
    print(f"  Sell signals: {(signals == -1).sum()}")
    print(f"  Hold signals: {(signals == 0).sum()}")

# Save signals
signals_df = pd.DataFrame({
    'signal': signals,
    'xgb_prob': xgb_pred,
    'lgb_prob': lgb_pred,
    'nn_prob': nn_pred,
    'ensemble_prob': ensemble_pred
}, index=test_df.index)
save_dataframe(signals_df, '../results/trading_signals.csv')
print("\n‚úì Signals saved")

## 9. Backtesting

In [None]:
print("\n" + "="*60)
print("STEP 8: BACKTESTING")
print("="*60)

# Initialize backtest engine
backtest_engine = BacktestEngine(
    initial_capital=config['backtesting']['initial_capital'],
    transaction_cost=config['backtesting']['transaction_cost'],
    slippage=config['backtesting']['slippage'],
    position_size=config['backtesting']['position_size']
)

# Run backtest
backtest_results = backtest_engine.run_backtest(
    test_df,
    signals,
    price_column='close'
)

# Print summary
backtest_engine.print_summary()

# Save results
save_dataframe(backtest_results['equity_curve'], '../results/equity_curve.csv')
save_dataframe(backtest_results['trades'], '../results/trade_log.csv')

# Save metrics
metrics_df = pd.DataFrame([backtest_results['metrics']])
save_dataframe(metrics_df, '../results/backtest_metrics.csv')

print("\n‚úì Backtest results saved")

## 10. Performance Visualization

In [None]:
print("\n" + "="*60)
print("STEP 9: VISUALIZATION")
print("="*60)

# Initialize visualizer
viz = Visualizer(output_dir='../plots')

# Create comprehensive report
viz.create_backtest_report(
    backtest_results,
    test_df,
    prefix='final_backtest'
)

# Additional visualizations
viz.plot_regime_analysis(
    df_with_regime,
    title='HMM Market Regime Analysis',
    filename='regime_analysis.png'
)

viz.plot_feature_importance(
    xgb_trainer.get_feature_importance(top_n=15),
    top_n=15,
    title='Top 15 Features (XGBoost)',
    filename='feature_importance.png'
)

viz.plot_ema_analysis(
    df_features.iloc[-252:],  # Last year
    ema_periods=[20, 50, 200],
    title='EMA Analysis (Last Year)',
    filename='ema_analysis.png'
)

print("\n‚úì All visualizations created")

## 11. Final Summary and Conclusions

In [None]:
print("\n" + "="*70)
print(" " * 15 + "QUANTITATIVE TRADING SYSTEM")
print(" " * 20 + "FINAL SUMMARY")
print("="*70)

print(f"\nüìä DATA")
print(f"  Symbol: {SYMBOL}")
print(f"  Period: {START_DATE} to {END_DATE}")
print(f"  Total Days: {len(df_raw)}")
print(f"  Features Created: {len(df_features.columns)}")

print(f"\nüîç REGIME DETECTION")
print(f"  States: {config['hmm']['n_states']}")
print(f"  Current Regime: {current_regime['regime_label']}")

print(f"\nü§ñ MACHINE LEARNING")
print(f"  Models Trained: XGBoost, LightGBM, Neural Network")
print(f"  Best Model Accuracy: {max(xgb_metrics['val_accuracy'], lgb_metrics['val_accuracy'], nn_metrics['val_accuracy']):.2%}")

print(f"\nüìà BACKTESTING PERFORMANCE")
metrics = backtest_results['metrics']
print(f"  Initial Capital: ‚Çπ{metrics['initial_capital']:,.0f}")
print(f"  Final Value: ‚Çπ{metrics['final_value']:,.0f}")
print(f"  Total Return: {metrics['total_return_pct']:.2f}%")
print(f"  Annualized Return: {metrics['annualized_return_pct']:.2f}%")
print(f"  Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")
print(f"  Max Drawdown: {metrics['max_drawdown_pct']:.2f}%")
print(f"  Win Rate: {metrics.get('win_rate_pct', 0):.2f}%")
print(f"  Total Trades: {metrics.get('total_trades', 0):.0f}")

print(f"\nüí∞ COMPARISON")
print(f"  Buy & Hold Return: {metrics['buy_hold_return_pct']:.2f}%")
print(f"  Strategy Excess Return: {metrics['excess_return_pct']:.2f}%")

outperformance = "‚úì" if metrics['excess_return'] > 0 else "‚úó"
print(f"\n{outperformance} Strategy {'OUTPERFORMED' if metrics['excess_return'] > 0 else 'UNDERPERFORMED'} Buy & Hold")

print(f"\nüìÅ OUTPUT FILES")
print(f"  Data: data/processed/")
print(f"  Models: models/")
print(f"  Plots: plots/")
print(f"  Results: results/")

print("\n" + "="*70)
print(" " * 22 + "EXECUTION COMPLETE")
print("="*70 + "\n")

print("‚úì All steps completed successfully!")
print("‚úì System ready for production deployment")
print("\n‚ö†Ô∏è  DISCLAIMER: Past performance does not guarantee future results.")
print("    Always perform thorough testing before live trading.")