In [12]:
import pandas as pd
import joblib
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np

class CreditRiskPipeline:
    """
    Production-ready pipeline class to load the model, scaler, and features, 
    and handle preprocessing and prediction in one consistent step.
    """
    def __init__(self):
        # NOTE: Paths assume execution from the root project directory (credit-risk-scorecard)
        try:
            # This loads your pre-saved model, scaler, and feature list.
            self.model = joblib.load("models/xgb_model.pkl")
            self.scaler = joblib.load("models/scaler.pkl")
            self.features_list = joblib.load("models/features_list.pkl")
            print("Pipeline initialized: Model, Scaler, and Features loaded successfully.")
        except FileNotFoundError as e:
            # This error will show if model files are not found.
            print(f"ERROR: Model file missing at startup: {e}. Check 'models/' directory.")
            raise

    def preprocess(self, df: pd.DataFrame) -> np.ndarray:
        # 1. Fills missing values with 0.
        df = df.fillna(0)

        # 2. Reorders/aligns columns as required by the model.
        missing_cols = set(self.features_list) - set(df.columns)
        for c in missing_cols:
            df[c] = 0.0 
            
        df = df[self.features_list]

        # 3. Scales (standardizes) the data.
        df_scaled = self.scaler.transform(df)
        return df_scaled
    
    def predict(self, df: pd.DataFrame) -> tuple[float, int]:
        X = self.preprocess(df)
        # Predicts the probability of Class 1 (Default).
        prob = self.model.predict_proba(X)[:, 1][0] 
        # Returns 1 (Risk) if the default probability is greater than 0.30.
        pred = (prob > 0.30).astype(int)
        return prob, pred

# --- TESTING CODE (Now you will get output) ---
if __name__ == "__main__":
    # Sample data for testing (must match the features used by the model)
    sample_data = {
        'AMT_INCOME_TOTAL': [145000.0],
        'AMT_CREDIT': [520000.0],
        'AGE': [42],
        'YEARS_EMPLOYED': [8.0],
        'EXT_SOURCE_1': [0.61],
        'EXT_SOURCE_2': [0.42],
        'EXT_SOURCE_3': [0.68],
        'DAYS_LATE_EMI_RATIO': [0.04],
        'CREDIT_UTILIZATION': [0.29]
    }
    test_df = pd.DataFrame(sample_data)

    print("\n--- Pipeline Test Starting ---")
    
    try:
        # 1. Create an object of the class (This will load the files)
        risk_pipeline = CreditRiskPipeline()
        
        # 2. Call the predict method
        probability, prediction = risk_pipeline.predict(test_df)
        
        # 3. Print the result
        print("\nPrediction Result:")
        print(f"Default Probability (Class 1): {probability:.4f}")
        print(f"Risk Flag (Prediction): {prediction} (1=High Risk)")
        
        print("\nTest Complete: Pipeline executed successfully.")

    except Exception as e:
        print(f"\nTEST FAILED: An error occurred during execution. Check model files. Error: {e}")


--- Pipeline Test Starting ---
ERROR: Model file missing at startup: [Errno 2] No such file or directory: 'models/xgb_model.pkl'. Check 'models/' directory.

TEST FAILED: An error occurred during execution. Check model files. Error: [Errno 2] No such file or directory: 'models/xgb_model.pkl'


In [10]:
# Save this file as app.py

from fastapi import FastAPI
import uvicorn
import pandas as pd
from pydantic import BaseModel
from pipeline import CreditRiskPipeline
import os

# --- Pydantic Model for Request Body Validation ---
# Define the expected structure of the incoming JSON data
class LoanData(BaseModel):
    # These fields must match the features used in your pipeline/sample request
    AMT_INCOME_TOTAL: float
    AMT_CREDIT: float
    AGE: int
    YEARS_EMPLOYED: float
    EXT_SOURCE_1: float
    EXT_SOURCE_2: float
    EXT_SOURCE_3: float
    DAYS_LATE_EMI_RATIO: float = 0.0  # Setting defaults for optional/derived features
    CREDIT_UTILIZATION: float = 0.0 # Setting defaults for optional/derived features


# --- FastAPI Application Setup ---
app = FastAPI(
    title="Credit Risk Scoring API", 
    description="Real-time prediction service using XGBoost pipeline."
)

# Initialize the pipeline globally (only once at app startup)
# This loads the model, scaler, and features list into memory
try:
    pipeline = CreditRiskPipeline() 
except Exception as e:
    print(f"FATAL: Failed to initialize pipeline. Check models directory. Error: {e}")
    # Raise the error to prevent the app from starting with a broken pipeline
    raise


# --- API Endpoint ---
@app.post("/predict")
def predict_risk(data: LoanData):
    """
    Accepts loan application data (JSON) and returns default probability and risk flag.
    Risk Flag is 1 if probability > 0.30.
    """
    try:
        # Convert incoming Pydantic model data to a DataFrame (required by the pipeline)
        # data.model_dump() converts Pydantic model to a standard dict
        df = pd.DataFrame([data.model_dump()])
        
        # Predict using the prepared pipeline
        prob, pred = pipeline.predict(df)
        
        return {
            "status": "success",
            "default_probability": float(prob),
            "risk_flag": int(pred) # 1 = High Risk (Default predicted), 0 = Low Risk
        }
    except Exception as e:
        # Catch prediction-specific errors (e.g., unexpected data format)
        return {
            "status": "error",
            "message": f"Prediction failed due to processing error: {e}"
        }


# --- Local Run Command ---
if __name__ == "__main__":
    # Command to run locally: python app.py
    # This uses uvicorn directly via the Python script
    uvicorn.run("app:app", host="0.0.0.0", port=8000)

ModuleNotFoundError: No module named 'pipeline'

In [4]:
!pip install fastapi uvicorn pandas pydantic

Defaulting to user installation because normal site-packages is not writeable
Collecting fastapi
  Downloading fastapi-0.122.0-py3-none-any.whl.metadata (30 kB)
Collecting uvicorn
  Using cached uvicorn-0.38.0-py3-none-any.whl.metadata (6.8 kB)
Collecting starlette<0.51.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.50.0-py3-none-any.whl.metadata (6.3 kB)
Collecting annotated-doc>=0.0.2 (from fastapi)
  Using cached annotated_doc-0.0.4-py3-none-any.whl.metadata (6.6 kB)
Downloading fastapi-0.122.0-py3-none-any.whl (110 kB)
Using cached uvicorn-0.38.0-py3-none-any.whl (68 kB)
Using cached annotated_doc-0.0.4-py3-none-any.whl (5.3 kB)
Downloading starlette-0.50.0-py3-none-any.whl (74 kB)
Installing collected packages: annotated-doc, uvicorn, starlette, fastapi
Successfully installed annotated-doc-0.0.4 fastapi-0.122.0 starlette-0.50.0 uvicorn-0.38.0


