# Modeling and interpretability

In [1]:
from sklearn.compose import ColumnTransformer 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import QuantileTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from imblearn.pipeline import Pipeline as ImbPipeline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import plotly.express as px

In [2]:

df = pd.read_csv("customer_booking.csv", encoding="ISO-8859-1").copy()

# Lead bins (create as 'lead_bin' to match cat_cols)
lead_bins = [-np.inf, 3, 7, 14, 30, 90, np.inf]
lead_bins_label = ['0-3','4-7','8-14','15-30','31-90','90+']
df['lead_bin'] = pd.cut(df['purchase_lead'].clip(lower=0),
                        bins=lead_bins, labels=lead_bins_label, include_lowest=True)

# Length of stay bins (already named 'stay_bin')
stay_bins = [-np.inf, 3, 7, 14, 30, 90, np.inf]
stay_bins_label = ['0-3','4-7','8-14','15-30','31-90','90+']
df['stay_bin'] = pd.cut(df['length_of_stay'].clip(lower=0),
                        bins=stay_bins, labels=stay_bins_label, include_lowest=True)

# Part of day (consistent label case helps later one-hot)
def day_part(h):
    if 0 <= h <= 5:   return 'Night'
    if 6 <= h <= 11:  return 'Morning'
    if 12 <= h <= 17: return 'Afternoon'
    return 'Evening'
df['daypart'] = df['flight_hour'].apply(day_part)

target = 'booking_complete'

cat_cols = [
    'lead_bin','stay_bin','daypart','sales_channel','trip_type','flight_day',
    'route','booking_origin','wants_extra_baggage','wants_preferred_seat','wants_in_flight_meals'
]
num_cols = ['purchase_lead','length_of_stay','flight_hour','flight_duration','num_passengers']


# Optional safety: strip spaces and check existence
df.columns = df.columns.str.strip()

missing = [c for c in cat_cols + num_cols if c not in df.columns]
if missing:
    raise KeyError(f"Missing columns: {missing}")

X = df[cat_cols + num_cols]
y = df[target].astype(int)


## train/test(stratified)

In [3]:

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)



In [4]:

from imblearn.over_sampling import RandomOverSampler,SMOTE
ros=RandomOverSampler(random_state=42)
X_resampled,y_resampled=ros.fit_resample(X_train,y_train)
print(y_resampled.value_counts())

booking_complete
0    34018
1    34018
Name: count, dtype: int64


##  encoding

In [5]:
preprocessor=ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num",QuantileTransformer(), num_cols)
    ]
)
X=df.drop(columns=[target])
y=df['booking_complete'].astype(int)




## Baseline: Logistic Regression

In [6]:
model=LogisticRegression(max_iter=1000,random_state=42,solver='saga')
pipe=Pipeline(
    [
        ("preprocessor", preprocessor),
        ("model", model)
    ]
)
mod=GridSearchCV(
    estimator=pipe,
    param_grid={
        'model__max_iter':[100,200,300,400,500,600,700,800,900,1000,2000],
        'model__solver':['saga','liblinear']
    },
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)
mod.fit(X_resampled,y_resampled)
y_pred=mod.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.70      0.80      8504
           1       0.31      0.75      0.44      1496

    accuracy                           0.71     10000
   macro avg       0.62      0.73      0.62     10000
weighted avg       0.85      0.71      0.75     10000



In [7]:
pd.DataFrame(mod.cv_results_).sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__max_iter,param_model__solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
14,3.928021,0.43485,0.108118,0.006623,800,saga,"{'model__max_iter': 800, 'model__solver': 'saga'}",0.745811,0.740869,0.746381,0.743441,0.748071,0.744914,0.002508,1
8,4.070715,0.241546,0.102596,0.014276,500,saga,"{'model__max_iter': 500, 'model__solver': 'saga'}",0.745517,0.741163,0.746307,0.743514,0.747924,0.744885,0.002341,2
17,2.052243,0.290384,0.098102,0.010194,900,liblinear,"{'model__max_iter': 900, 'model__solver': 'lib...",0.745444,0.741089,0.746234,0.743514,0.747997,0.744856,0.002371,3
9,2.014789,0.21709,0.10998,0.022735,500,liblinear,"{'model__max_iter': 500, 'model__solver': 'lib...",0.745664,0.741016,0.746234,0.743735,0.747556,0.744841,0.002274,4
11,2.089401,0.314176,0.108895,0.024393,600,liblinear,"{'model__max_iter': 600, 'model__solver': 'lib...",0.745444,0.740869,0.746307,0.743588,0.747924,0.744826,0.002423,5
20,3.7513,0.366411,0.106542,0.016443,2000,saga,"{'model__max_iter': 2000, 'model__solver': 'sa...",0.745591,0.740942,0.746234,0.743441,0.747924,0.744826,0.002415,6
4,4.141758,0.35028,0.116004,0.016138,300,saga,"{'model__max_iter': 300, 'model__solver': 'saga'}",0.745738,0.740795,0.746234,0.743441,0.747924,0.744826,0.002473,7
3,2.276183,0.320937,0.106376,0.007692,200,liblinear,"{'model__max_iter': 200, 'model__solver': 'lib...",0.745517,0.740869,0.746381,0.743514,0.747703,0.744797,0.00239,8
5,2.158369,0.304533,0.112927,0.012365,300,liblinear,"{'model__max_iter': 300, 'model__solver': 'lib...",0.745517,0.740648,0.746307,0.743588,0.747924,0.744797,0.0025,8
19,1.926031,0.233702,0.095719,0.008485,1000,liblinear,"{'model__max_iter': 1000, 'model__solver': 'li...",0.745517,0.740722,0.746234,0.743588,0.747924,0.744797,0.002467,8


In [8]:
pipe.get_params()

{'memory': None,
 'steps': [('preprocessor',
   ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'),
                                    ['lead_bin', 'stay_bin', 'daypart',
                                     'sales_channel', 'trip_type', 'flight_day',
                                     'route', 'booking_origin',
                                     'wants_extra_baggage', 'wants_preferred_seat',
                                     'wants_in_flight_meals']),
                                   ('num', QuantileTransformer(),
                                    ['purchase_lead', 'length_of_stay',
                                     'flight_hour', 'flight_duration',
                                     'num_passengers'])])),
  ('model',
   LogisticRegression(max_iter=1000, random_state=42, solver='saga'))],
 'transform_input': None,
 'verbose': False,
 'preprocessor': ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'),
            

In [9]:
mod.best_params_

{'model__max_iter': 800, 'model__solver': 'saga'}

In [10]:
mod.best_estimator_

0,1,2
,steps,"[('preprocessor', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,n_quantiles,1000
,output_distribution,'uniform'
,ignore_implicit_zeros,False
,subsample,10000
,random_state,
,copy,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'saga'
,max_iter,800


In [11]:
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix

y_proba = mod.predict_proba(X_test)[:,1]
print("ROC-AUC:", roc_auc_score(y_test, y_proba))
print("F1:", f1_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

ROC-AUC: 0.7833956951997424
F1: 0.4353055286129971
Confusion Matrix:
 [[5967 2537]
 [ 374 1122]]


## Stronger Model: Random Forest/XGBoost

In [12]:
pipe.get_params()

{'memory': None,
 'steps': [('preprocessor',
   ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'),
                                    ['lead_bin', 'stay_bin', 'daypart',
                                     'sales_channel', 'trip_type', 'flight_day',
                                     'route', 'booking_origin',
                                     'wants_extra_baggage', 'wants_preferred_seat',
                                     'wants_in_flight_meals']),
                                   ('num', QuantileTransformer(),
                                    ['purchase_lead', 'length_of_stay',
                                     'flight_hour', 'flight_duration',
                                     'num_passengers'])])),
  ('model',
   LogisticRegression(max_iter=1000, random_state=42, solver='saga'))],
 'transform_input': None,
 'verbose': False,
 'preprocessor': ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'),
            

In [13]:

rf=RandomForestClassifier(
    random_state=42
)
rf_pipe=ImbPipeline( steps=
   [
       ("preprocessor",preprocessor),
       ("sampler",SMOTE()),
       ("model",rf)
   ]
)
param_grid = {
    'model__n_estimators': [100, 200],
    'model__max_depth': [10,  20],
    'model__min_samples_split': [2,5],
    'model__max_features': ['sqrt', 'log2'],
    'model__criterion': ['gini']
}

from sklearn.model_selection import RandomizedSearchCV

# More efficient alternative
mod = RandomizedSearchCV(
    estimator=rf_pipe,
    param_distributions=param_grid,  # Same param_grid
    n_iter=16,  
    cv=3,
    scoring='f1',
    n_jobs=-1,
    random_state=42
)
mod.fit(X_train,y_train)
y_pred=mod.predict(X_test)
print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

           0       0.92      0.75      0.83      8504
           1       0.31      0.64      0.42      1496

    accuracy                           0.73     10000
   macro avg       0.62      0.69      0.62     10000
weighted avg       0.83      0.73      0.77     10000



In [14]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer,roc_auc_score,precision_score, recall_score
model=RandomForestClassifier(
    class_weight={0:1,1:2},random_state=42
)
pipe=Pipeline( 
   [
       ("preprocessor",preprocessor),
       ("model",model)
   ]
)
coarse_grid={
    'model__n_estimators': [50, 100, 200],
    'model__max_depth': [10, 20],
    'model__max_features':['sqrt','log2'],
    'model__class_weight':[{0:1,1:x} for x in np.linspace(1,20,1)]
    
}
coarse_model=RandomizedSearchCV(
    estimator=pipe,
    param_distributions=coarse_grid,
    
    n_iter=10,
    cv=2,

    verbose=1,
    scoring='roc_auc',
    refit='f1_score',
    n_jobs=-1,
    random_state=42
)
coarse_model.fit(X_resampled,y_resampled)
y_pred=coarse_model.predict(X_test)
print(classification_report(y_test,y_pred))


Fitting 2 folds for each of 10 candidates, totalling 20 fits
              precision    recall  f1-score   support

           0       0.93      0.74      0.82      8504
           1       0.32      0.71      0.44      1496

    accuracy                           0.73     10000
   macro avg       0.63      0.72      0.63     10000
weighted avg       0.84      0.73      0.77     10000



In [15]:
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE, RandomOverSampler

# Sampler (you can switch between SMOTE or ROS)
sampler = SMOTE(random_state=42)

# Model
rf = RandomForestClassifier(
    random_state=42,
    class_weight="balanced_subsample"  # helps with imbalance inside trees
)

# Pipeline
rf_pipe = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("sampler", sampler),   # resample only on training folds
    ("model", rf)
])

# Param grid for search
rf_params = {
    "model__n_estimators": [100, 200],
    "model__max_depth": [10, 20, None],
    "model__max_features": ["sqrt", "log2"]
}

from sklearn.model_selection import RandomizedSearchCV

rf_search = RandomizedSearchCV(
    estimator=rf_pipe,
    param_distributions=rf_params,
    n_iter=10,  # random subset of configs
    cv=3,
    scoring="f1",   # better for imbalance than accuracy
    n_jobs=-1,
    random_state=42
)

rf_search.fit(X_train, y_train)


0,1,2
,estimator,Pipeline(step...m_state=42))])
,param_distributions,"{'model__max_depth': [10, 20, ...], 'model__max_features': ['sqrt', 'log2'], 'model__n_estimators': [100, 200]}"
,n_iter,10
,scoring,'f1'
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,n_quantiles,1000
,output_distribution,'uniform'
,ignore_implicit_zeros,False
,subsample,10000
,random_state,
,copy,True

0,1,2
,sampling_strategy,'auto'
,random_state,42
,k_neighbors,5

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [16]:
pd.DataFrame(coarse_model.cv_results_).sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_estimators,param_model__max_features,param_model__max_depth,param_model__class_weight,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score
3,31.655366,0.044095,1.201481,4.9e-05,200,sqrt,20,"{0: 1, 1: 1.0}","{'model__n_estimators': 200, 'model__max_featu...",0.862857,0.871199,0.867028,0.004171,1
9,14.881245,0.135143,0.717639,0.000999,100,sqrt,20,"{0: 1, 1: 1.0}","{'model__n_estimators': 100, 'model__max_featu...",0.861684,0.869382,0.865533,0.003849,2
0,7.750992,0.238928,0.807643,0.00805,100,log2,20,"{0: 1, 1: 1.0}","{'model__n_estimators': 100, 'model__max_featu...",0.852905,0.85436,0.853632,0.000728,3
7,13.994172,0.306039,1.221684,0.013987,200,log2,20,"{0: 1, 1: 1.0}","{'model__n_estimators': 200, 'model__max_featu...",0.85087,0.853511,0.85219,0.001321,4
1,4.194893,0.045122,0.525506,0.006288,50,log2,20,"{0: 1, 1: 1.0}","{'model__n_estimators': 50, 'model__max_featur...",0.848167,0.845295,0.846731,0.001436,5
8,2.913774,0.08037,0.666214,0.035254,100,log2,10,"{0: 1, 1: 1.0}","{'model__n_estimators': 100, 'model__max_featu...",0.792132,0.795238,0.793685,0.001553,6
4,5.469851,0.171538,1.283536,0.074018,200,log2,10,"{0: 1, 1: 1.0}","{'model__n_estimators': 200, 'model__max_featu...",0.793824,0.793208,0.793516,0.000308,7
5,8.621422,0.307433,1.166748,0.003604,200,sqrt,10,"{0: 1, 1: 1.0}","{'model__n_estimators': 200, 'model__max_featu...",0.784981,0.78892,0.78695,0.001969,8
6,4.737646,0.03335,0.691757,0.063108,100,sqrt,10,"{0: 1, 1: 1.0}","{'model__n_estimators': 100, 'model__max_featu...",0.782515,0.788772,0.785643,0.003128,9
2,2.594317,0.232683,0.564181,0.029487,50,sqrt,10,"{0: 1, 1: 1.0}","{'model__n_estimators': 50, 'model__max_featur...",0.780823,0.786887,0.783855,0.003032,10


In [17]:
mod.best_params_

{'model__n_estimators': 200,
 'model__min_samples_split': 5,
 'model__max_features': 'log2',
 'model__max_depth': 10,
 'model__criterion': 'gini'}

In [18]:
mod.best_estimator_

0,1,2
,steps,"[('preprocessor', ...), ('sampler', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,n_quantiles,1000
,output_distribution,'uniform'
,ignore_implicit_zeros,False
,subsample,10000
,random_state,
,copy,True

0,1,2
,sampling_strategy,'auto'
,random_state,
,k_neighbors,5

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,10
,min_samples_split,5
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'log2'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [19]:
pd.DataFrame(mod.cv_results_).sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_estimators,param_model__min_samples_split,param_model__max_features,param_model__max_depth,param_model__criterion,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
7,8.081139,0.375015,0.599749,0.105606,200,5,log2,10,gini,"{'model__n_estimators': 200, 'model__min_sampl...",0.41488,0.406871,0.401798,0.40785,0.005386,1
5,9.393741,0.27554,0.493131,0.01556,200,2,log2,10,gini,"{'model__n_estimators': 200, 'model__min_sampl...",0.409816,0.407368,0.402044,0.406409,0.003245,2
3,17.324061,0.546618,0.48448,0.017068,200,5,sqrt,10,gini,"{'model__n_estimators': 200, 'model__min_sampl...",0.414905,0.39986,0.400221,0.404995,0.007009,3
1,16.962746,0.313989,0.501728,0.002738,200,2,sqrt,10,gini,"{'model__n_estimators': 200, 'model__min_sampl...",0.412429,0.403367,0.397711,0.404503,0.006062,4
14,11.739645,0.057632,0.387739,0.01607,100,5,log2,20,gini,"{'model__n_estimators': 100, 'model__min_sampl...",0.408124,0.405234,0.39833,0.403896,0.004109,5
15,15.774118,0.807962,0.503159,0.058811,200,5,log2,20,gini,"{'model__n_estimators': 200, 'model__min_sampl...",0.405318,0.403838,0.4024,0.403852,0.001191,6
4,5.372017,0.140344,0.298893,0.02046,100,2,log2,10,gini,"{'model__n_estimators': 100, 'model__min_sampl...",0.400068,0.408948,0.401927,0.403648,0.003824,7
12,12.96193,0.72054,0.345928,0.015849,100,2,log2,20,gini,"{'model__n_estimators': 100, 'model__min_sampl...",0.407729,0.399198,0.403119,0.403349,0.003487,8
6,4.963914,0.053931,0.27825,0.012054,100,5,log2,10,gini,"{'model__n_estimators': 100, 'model__min_sampl...",0.402985,0.403679,0.402332,0.402999,0.00055,9
2,9.088432,0.572576,0.357001,0.070977,100,5,sqrt,10,gini,"{'model__n_estimators': 100, 'model__min_sampl...",0.406097,0.40007,0.401895,0.402687,0.002523,10


In [None]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
model=RandomForestClassifier()
pipe=Pipeline( 
   [
       ("preprocessor",preprocessor),
       ("model",model)
   ]
)
param_grid = {
    'model__n_estimators': [100, 200],
    'model__max_depth': [None,  20],
    'model__min_samples_split': [2, 10],
    'model__max_features': ['sqrt', 'log2', None],
    'model__criterion': ['gini', 'entropy']
}

halving_search = HalvingGridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    random_state=42,
    factor=2  # Halves candidates each round
)
halving_search.fit(X_train,y_train)
y_pred=halving_search.predict(X_test)
print(classification_report(y_test,y_pred))


In [None]:
pd.DataFrame(halving_search.cv_results_).sort_values(by='rank_test_score')

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__criterion,param_model__max_depth,param_model__max_features,param_model__min_samples_split,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
18,0,1250,0.667906,0.064930,0.047168,0.006058,gini,20,log2,10,...,0.8592,0.017417,1,0.8370,0.8350,0.857,0.8640,0.8560,0.8498,0.011617
19,0,1250,1.154541,0.074112,0.068070,0.012918,gini,20,log2,10,...,0.8592,0.017417,1,0.8370,0.8340,0.855,0.8640,0.8540,0.8488,0.011444
43,0,1250,1.132383,0.053901,0.062557,0.003049,entropy,20,log2,10,...,0.8592,0.017417,1,0.8370,0.8350,0.855,0.8640,0.8550,0.8492,0.011285
42,0,1250,0.590408,0.035077,0.045740,0.003126,entropy,20,log2,10,...,0.8592,0.017417,1,0.8380,0.8350,0.856,0.8660,0.8560,0.8502,0.011805
17,0,1250,1.438332,0.070335,0.067300,0.007412,gini,20,log2,2,...,0.8584,0.017817,5,0.8770,0.8610,0.889,0.9060,0.8960,0.8858,0.015587
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48,1,2500,8.730350,0.432294,0.067252,0.017294,entropy,,,2,...,0.8420,0.018719,90,1.0000,1.0000,1.000,1.0000,1.0000,1.0000,0.000000
60,1,2500,13.734163,0.353122,0.079998,0.013221,entropy,20,,10,...,0.8416,0.018304,92,0.9375,0.9255,0.915,0.9205,0.9245,0.9246,0.007432
11,0,1250,8.884584,0.898478,0.064576,0.009728,gini,,,10,...,0.8384,0.026605,93,0.9330,0.9400,0.935,0.9530,0.9460,0.9414,0.007338
9,0,1250,9.528269,0.625291,0.058470,0.004707,gini,,,2,...,0.8344,0.027317,94,1.0000,1.0000,1.000,1.0000,1.0000,1.0000,0.000000


## Threshold Tuning for F1

## Feature Importance (LR + Tree)

## Top 5 Drivers + 1-line interpretations

## Business Actions

## Model Card (short)