In [0]:
# ✅ Package installation
restart_required = False

try:
    import databricks.feature_engineering
except ImportError:
    %pip install databricks-feature-engineering
    restart_required = True

try:
    import xgboost
except ImportError:
    %pip install xgboost
    restart_required = True

try:
    import catboost
except ImportError:
    %pip install catboost
    restart_required = True

try:
    import lightgbm
except ImportError:
    %pip install lightgbm
    restart_required = True


if restart_required:
    dbutils.library.restartPython()
else:
    print("✅ All required packages are already installed")


In [0]:
# ✅ Imports
import pandas as pd
from databricks.feature_engineering import FeatureEngineeringClient, FeatureLookup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import mlflow
import mlflow.sklearn
import warnings


In [0]:
# Ignore warnings
warnings.filterwarnings("ignore")

# ✅ Load feature + label data from feature store
fe = FeatureEngineeringClient()


# Load labels
file_path = "file:/Workspace/Users/yasodhashree91@gmail.com/oms-databricks/04_AI_ML/model_evaluation/classification/data_files/housing_loan.csv"
labels_df = spark.read.csv(file_path, header=True, inferSchema=True)\
    .select("LoanID", "IsDefault")\
    .dropna(subset=["IsDefault"])

# Define feature lookup
feature_lookups = [
    FeatureLookup(
        table_name="realestate.ml.loan_features",
        lookup_key="LoanID"
    )
]

# Create training set
training_set = fe.create_training_set(
    df=labels_df,
    feature_lookups=feature_lookups,
    label="IsDefault",
    exclude_columns=["LoanID"]
)

# Load to pandas
data = training_set.load_df().toPandas()    
X = data.drop(columns=["IsDefault"])            # Features (X)
y = data["IsDefault"]                           # Label (y)

In [0]:
# ✅ Split Features (X) and Label (y) into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


# ✅ Model setup and configurations
models = {
    "Random Forest": RandomForestClassifier(n_estimators=50, max_depth=8, class_weight='balanced', n_jobs=-1, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, max_depth=6),
    "CatBoost": CatBoostClassifier(iterations=100, depth=6, verbose=False, allow_writing_files=False),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "KNN": KNeighborsClassifier(n_neighbors=3, weights='distance', n_jobs=-1),
    "LightGBM": LGBMClassifier(n_estimators=100, max_depth=6, verbosity=-1),
}

# ✅ Training + evaluation
for model_name, model_instance in models.items():
    with mlflow.start_run(run_name=f"classifier_{model_name}"):
        model_instance.fit(X_train, y_train)
        
        predictions = model_instance.predict(X_test)
        
        accuracy = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions, zero_division=0)
        recall = recall_score(y_test, predictions, zero_division=0)
        f1 = f1_score(y_test, predictions, zero_division=0)
        conf_matrix = confusion_matrix(y_test, predictions)

        print(f"Model: {model_name}")
        print(f"Accuracy     : {accuracy:.4f}")
        print(f"Precision    : {precision:.4f}")
        print(f"Recall       : {recall:.4f}")
        print(f"F1 Score     : {f1:.4f}")
        print("Confusion Matrix:")
        print(conf_matrix)
        print("=" * 80)

        # Log model
        fe.log_model(
            model=model_instance,
            artifact_path=model_name.replace(" ", "_").lower(),
            flavor=mlflow.sklearn,
            training_set=training_set,
            registered_model_name="realestate.ml.loan_default_classifier"
        )

### 📊 Classification Metrics Overview

| **Metric**           | **Good When** | **What It Tells You**                                        |
|----------------------|---------------|---------------------------------------------------------------|
| **Accuracy**         | High          | Overall correctness                                            |
| **Precision**        | High          | Model is good at avoiding false positives                     |
| **Recall (Sensitivity)** | High      | Model is good at catching all true positives                  |
| **F1 Score**         | High          | Balance between precision and recall                          |
| **Confusion Matrix** | —             | Exact count of True Positives, False Positives, True Negatives, and False Negatives |


### ✅ Understanding Confusion Matrix

Confusion Matrix is generally displayed in this format:  
`[[TN FP]`  
&nbsp;&nbsp;`[FN TP]]`  

Example:  
`[[24068    23]`  
&nbsp;&nbsp;`[   45  1090]]`



This means:  
- **24068** → True Negatives (TN)  
- **23**   → False Positives (FP)  
- **45**  → False Negatives (FN)  
- **1090**   → True Positives (TP)  

| **Term**            | **What It Means**                                                                                      | **Example (Loan Default Prediction)**                            |
|---------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------|
| **True Positive (TP)**  | Number of times model predicted **Yes**, and it was actually **Yes**                                 | Model said a person will default, and they **did**.               |
| **False Positive (FP)** | Number of times model predicted **Yes**, but it was actually **No**                                  | Model said a person will default, but they **didn't** (False alarm). |
| **True Negative (TN)**  | Number of times model predicted **No**, and it was actually **No**                                   | Model said a person won't default, and they **didn't**.           |
| **False Negative (FN)** | Number of times model predicted **No**, but it was actually **Yes**                                  | Model said a person won't default, but they **did** (Missed case). |


> 📌 Think of TRUE values (True Positives and True Negatives) as correct predictions, and FALSE values (False Positives and False Negatives) as incorrect predictions. A model with many True Positives and True Negatives is likely performing well, while many False Positives and False Negatives suggest poor performance.
