# üè¶ Enterprise Credit Approval ML Pipeline (V3.5)

--- 

**Clean Architecture + Professional MLOps Hybrid Framework**

This notebook runs an end-to-end credit approval system using **modular Python packages (`src/`)**. The project calculates not only technical metrics (AUC/F1) but also critical financial values for enterprise decisions such as **ROI**, **NPV**, and **Amortization**.

---

## 1Ô∏è‚É£ [CELL 1] Environment & Infrastructure Setup

In this cell, the Google Colab environment is verified, GPU is detected, and necessary libraries are installed.

In [None]:
import sys
import os
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

print("üîÑ Checking system...")

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("üåê Google Colab Environment Detected.")
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    
    # ENTER YOUR PROJECT PATH HERE (Folder name on Drive)
    PROJECT_PATH = '/content/drive/MyDrive/credit-approval'
    
    if os.path.exists(PROJECT_PATH):
        os.chdir(PROJECT_PATH)
        if PROJECT_PATH not in sys.path:
            sys.path.append(PROJECT_PATH)
        print(f"‚úÖ Working directory: {PROJECT_PATH}")
    else:
        print(f"‚ö†Ô∏è ERROR: {PROJECT_PATH} not found! Please ensure it is uploaded to Drive.")
else:
    print("üíª Local Environment Detected.")
    PROJECT_PATH = os.getcwd()
    print(f"‚úÖ Working directory: {PROJECT_PATH}")

In [None]:
# Install Dependencies
if IN_COLAB:
    print("üì¶ Installing libraries (requirements.txt)...")
    %pip install -r requirements.txt
else:
    print("‚ÑπÔ∏è Local run: pip install skipped.")

## 2Ô∏è‚É£ [CELL 2-8] Enterprise ML Pipeline Execution

The pipeline goes through these stages:
1. **Data Loading & Validation**
2. **Feature Engineering**
3. **Model Training & Optimization**
4. **Statistical Validation (Friedman Test)**
5. **Intelligent Model Selection & Validation**
6. **Business Impact Analysis (ROI/NPV)**
7. **Automated Offline A/B Simulation** (Inside the pipeline)

In [None]:
from src.core.config import get_config
from src.core.logger import setup_logger
from src.pipelines.training_pipeline import TrainingPipeline

# 1. Load Configuration
config = get_config(reload=True)
config.optuna_trials = 30  # Reduced trials for fast demo

# 2. Prepare Logger
logger = setup_logger().logger

# 3. Start Pipeline
pipeline = TrainingPipeline(config=config, logger=logger)

try:
    results = pipeline.run()
    print(f"\nüèÜ PIPELINE COMPLETED! Selected Model: {results['best_model']}")
except Exception as e:
    print(f"‚ùå Execution error: {e}")

## 3Ô∏è‚É£ [STAGE 8] Manual Offline A/B Simulation & Validation

This stage simulates A/B testing between the best model (Challenger) and a baseline model (Champion) to validate deployment decisions using bootstrap resampling.

In [None]:
from src.evaluation.ab_testing import ABTestSimulator
from sklearn.linear_model import LogisticRegression
from pathlib import Path

print("=" * 80)
print("STAGE 8: A/B TESTING SIMULATION")
print("=" * 80)

# Variables from pipeline results
best_model_name = results['best_model']
best_model = results['training_results'][best_model_name]['model']
X_train_processed = results['splits']['X_train']
y_train = results['splits']['y_train']
X_test_processed = results['splits']['X_test']
y_test = results['splits']['y_test']
output_dir = Path(config.output_dir)
plots_dir = output_dir / config.plots_dir

print("\nüìä Setting up A/B Test...")
print(f"   Champion Model: Logistic Regression (Baseline)")
print(f"   Challenger Model: {best_model_name}")

# Train a simple champion model
champion_model = LogisticRegression(random_state=config.random_state, max_iter=1000)
champion_model.fit(X_train_processed, y_train)

challenger_model = best_model

# Initialize A/B Test Simulator
ab_simulator = ABTestSimulator(
    champion_model=champion_model,
    challenger_model=challenger_model,
    X_test=X_test_processed,
    y_test=y_test,
    n_iterations=1000,  # Bootstrap iterations
    confidence_level=0.95,  # 95% confidence
    random_state=config.random_state,
    config=config
)

# Run A/B Test Simulation
ab_results_sim = ab_simulator.run_simulation(
    traffic_split=0.5,  # 50/50 split
    verbose=True
)

# Generate Report
print("\n" + "=" * 80)
print("A/B TEST RESULTS")
print("=" * 80)

report = ab_simulator.generate_report(ab_results_sim)
print(report)

# Save report to file
ab_report_txt_path = output_dir / config.results_dir / "ab_test_report.txt"
with open(ab_report_txt_path, 'w') as f:
    f.write(report)

print(f"\n‚úÖ A/B Test report saved to: {ab_report_txt_path}")

# Visualize Results
print("\nüìä Generating A/B Test Dashboard...")
ab_plot_path = plots_dir / "08_ab_testing_dashboard.png"
ab_simulator.plot_results(ab_results_sim, save_path=str(ab_plot_path))

print(f"‚úÖ A/B Test dashboard saved to: {ab_plot_path}")

# --- DECISION RECOMMENDATION ---
print("\n" + "=" * 80)
print("DEPLOYMENT DECISION")
print("=" * 80)

if ab_results_sim.winner == 'Challenger':
    print("‚úÖ RECOMMENDATION: DEPLOY CHALLENGER MODEL")
    print(f"\n   Key Improvements:")
    for metric in ['accuracy', 'f1', 'auc']:
        improvement = ab_results_sim.statistical_tests[metric]['relative_improvement']
        p_value = ab_results_sim.statistical_tests[metric]['p_value']
        print(f"   ‚Ä¢ {metric.upper()}: {improvement:+.2f}% (p={p_value:.6f})")
    
    print(f"\n   Financial Impact:")
    print(f"   ‚Ä¢ Annual ROI Increase: ${ab_results_sim.business_impact['annual_financial_impact']:+,.0f}")
    
elif ab_results_sim.winner == 'Champion':
    print("‚ö†Ô∏è  RECOMMENDATION: KEEP CURRENT MODEL")
    print("\n   The baseline model performs equally well or better.")
    
else:
    print("‚öñÔ∏è  RECOMMENDATION: FURTHER INVESTIGATION NEEDED")
    print("\n   No statistically significant difference detected.")

print("=" * 80)

print("\nüìù Key Metrics for Reporting:")
print(f"   ‚Ä¢ Simulations Run: {ab_simulator.n_iterations:,}")
print(f"   ‚Ä¢ Statistical Confidence: {ab_simulator.confidence_level*100:.0f}%")
print(f"   ‚Ä¢ Effect Size (Cohen's d): {ab_results_sim.effect_size:.4f}")
print(f"   ‚Ä¢ Winner: {ab_results_sim.winner}")

if ab_results_sim.winner == 'Challenger':
    f1_improvement = ab_results_sim.statistical_tests['f1']['relative_improvement']
    roi_improvement = ab_results_sim.business_impact['roi_improvement_pct']
    print(f"   ‚Ä¢ F1 Score Improvement: {f1_improvement:+.2f}%")
    print(f"   ‚Ä¢ ROI Improvement: {roi_improvement:+.2f}%")

print("\n‚úÖ Stage 8 Complete: A/B Testing validated deployment decision!")
print("=" * 80)

## 4Ô∏è‚É£ [RESULTS] Advanced Dashboards & Stakeholder Reports

Below are the **enterprise analysis dashboards** and reports automatically generated by the pipeline.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pathlib import Path

def display_dashboard(title, filename):
    path = Path(config.output_dir) / config.plots_dir / filename
    if path.exists():
        plt.figure(figsize=(15, 12))
        img = mpimg.imread(str(path))
        plt.imshow(img)
        plt.axis('off')
        plt.title(title, fontsize=14, fontweight='bold')
        plt.show()
    else:
        print(f"‚ö†Ô∏è Visual not found: {filename}")

display_dashboard("1. Training Results Dashboard", "training_results_dashboard.png")
display_dashboard("2. Advanced Model Selection Dashboard", "model_selection_dashboard.png")
display_dashboard("3. 12-Panel Business Impact Analysis", "business_impact_extended.png")
display_dashboard("4. Offline A/B Simulation Dashboard", "ab_test_dashboard.png")

### üìú Executive Summary, Business Case & A/B Results

Automatically generated text-based reports and A/B test findings are below:

In [None]:
def print_report(filename):
    path = Path(config.output_dir) / config.results_dir / filename
    if path.exists():
        print(f"\n--- [ {filename} ] ---\n")
        with open(path, 'r', encoding='utf-8') as f:
            print(f.read())
    else:
        print(f"‚ö†Ô∏è Report not found: {filename}")

print_report("business_case.txt")
print_report("ab_test_report.txt")
print_report("ab_test_report.json")

## üßπ Memory Optimization

You can run the cell below to clear memory.

In [None]:
import gc
import torch

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
print("üßπ Memory and GPU cache cleared.")