In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.metrics import classification_report, roc_auc_score

In [12]:
imported_credit_card_info = imported_credit_card_info.sample(frac=1, random_state=42).reset_index(drop=True)
imported_credit_card_info.head() #confirming order is now randomized

target_column = 'default payment next month'

# features, and yes a I know a lot of people use a capital X but it's giving me an OCD attack being inconsitent with a lowercase y
x = imported_credit_card_info.drop(columns=[target_column])

y = imported_credit_card_info[target_column] # target values, whether it was in default or not

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=43)

# I. 2nd Analysis, But With Boosting

### A. Boosting Tools

One boosting technique we can use is AdaBoost (name comes from "Adaptive Boosting").

###  B. Boosting Results
**90% recall** on deadbeats who are gonna default? That's wonderful, right? Problem is we only had a 28% precision rate, which means we were crying wolf 72% of the time. Let's trial and error the threshold upwards to see if we get something with a 50% precision, and see how much recall we'd have to give up.

In [24]:
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(random_state=47)
ada.fit(x_train, y_train)

y_proba = ada.predict_proba(x_test)[:, 1]
y_pred = (y_proba >= 0.3).astype(int)  # using your preferred threshold

print("=== AdaBoost (threshold=0.3) ===")
print("AUC:", roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))

=== AdaBoost (threshold=0.3) ===
AUC: 0.7664395307408565
              precision    recall  f1-score   support

           0       0.92      0.35      0.50      4685
           1       0.28      0.90      0.42      1315

    accuracy                           0.47      6000
   macro avg       0.60      0.62      0.46      6000
weighted avg       0.78      0.47      0.49      6000



###  C. Boosting Results After We Tone Down the Aggression

######  1. Recall at 56%
Which is less than our first go at AdaBoost, but still stronger than what we concluded for Random Forest. Let's take a wait-and-see approach i.e. do the stacking first.

######  2. Downlplaying F1
The F1 metric is defined as (precision*recall)/(precision + recall). The score is punished by extremeties i.e. one of those metrics being really low. This may be important in other scenarios, where there's a balance of priorities, I'm arguing that failing (low recall) to catch a deadbeat before they default is more more expensive than walking away from/irritating a happy customer.

In [25]:
ada = AdaBoostClassifier(random_state=47)
ada.fit(x_train, y_train)

y_proba = ada.predict_proba(x_test)[:, 1]
y_pred = (y_proba >= 0.385).astype(int)  # using a threshold a bit higher/stricter than before, which was 0.30

print("=== AdaBoost (threshold=0.385) ===")
print("AUC:", roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))

=== AdaBoost (threshold=0.385) ===
AUC: 0.7664395307408565
              precision    recall  f1-score   support

           0       0.87      0.84      0.86      4685
           1       0.50      0.56      0.53      1315

    accuracy                           0.78      6000
   macro avg       0.68      0.70      0.69      6000
weighted avg       0.79      0.78      0.78      6000



# I. 3nd Analysis, But With Stacking

### A. Stacking Tools
Now, I'm gonna put on my "coach" hat and see how 3 different players work independently of each other, and then with each other.

In [None]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

stack = StackingClassifier(
    estimators=[
        ('rf',  RandomForestClassifier(random_state=48, n_estimators=500)),
        ('gb',  GradientBoostingClassifier(random_state=48)),
        ('ada', AdaBoostClassifier(random_state=48)),
    ],
    final_estimator=LogisticRegression(max_iter=1000, class_weight='balanced'),
    passthrough=True,
    cv=5,
    n_jobs=-1
)

stack.fit(x_train, y_train)

y_proba = stack.predict_proba(x_test)[:, 1]
y_pred  = (y_proba >= 0.3).astype(int)

print("=== Stacking (threshold=0.3) ===")
print("AUC:", roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))

###  B. Stacking Results
**90% recall** on deadbeats who are gonna default? That's wonderful, right? Problem is we only had a 28% precision rate, which means we were crying wolf 72% of the time. Let's trial and error the threshold upwards to see if we get something with a 50% precision, and see how much recall we'd have to give up.

# III. Actionable Advice
