# Practical Task for Ensemble Algorithms

Dataset; **german_data_creditcard.csv**:

Note: The dataset is imbalanced, so I report **ROC-AUC**, **PR-AUC**, and **F1** instead of accuracy.

In [84]:
# Imports
import os
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    average_precision_score,
    f1_score,
 )

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

In [87]:
# Load data
df = pd.read_csv("german_data_creditcard.csv")
df.head()

Unnamed: 0,Creditability,Acc_Bal_bin,Pay_Status_bin,Value_SavStock_bin,Length_Emp_bin,S&M_Status_bin,No_of_Credits_bin,Co_Credits_bin,Purpose_bin,Telephone_bin,Instalment_bin,Apartment_bin,Most_Val_Asset_bin,Duration_Credit,Credit_Amount,Age_bin
0,1,a. no account,c. no problem,"a. none, < 100 DM","a. Unemp, < 1 yr","a. M div, M single",a. 1,b. None,"a. used car, others",a. No,a. < 20,a. owner-occ / free,b. Savings / Car / Other,18,1049,a < 25 yr
1,1,a. no account,c. no problem,"a. none, < 100 DM",b. [1-4) years,"b. M mar, M wid, F",b. > 1,b. None,"a. used car, others",a. No,c. [25-35),a. owner-occ / free,c. No assets,9,2799,b. >= 25 yr
2,1,b. no balance,b. paid up,"a. none, < 100 DM",c. above 4 years,"a. M div, M single",a. 1,b. None,"a. used car, others",a. No,c. [25-35),a. owner-occ / free,c. No assets,12,841,a < 25 yr
3,1,a. no account,c. no problem,"a. none, < 100 DM",b. [1-4) years,"b. M mar, M wid, F",b. > 1,b. None,"a. used car, others",a. No,b. [20-25),a. owner-occ / free,c. No assets,12,2122,b. >= 25 yr
4,1,a. no account,c. no problem,"a. none, < 100 DM",b. [1-4) years,"b. M mar, M wid, F",b. > 1,a. Other Banks or Dept Stores,"a. used car, others",a. No,a. < 20,b. rented flat,b. Savings / Car / Other,12,2171,b. >= 25 yr


In [93]:
# Split features/target
target_col = "Creditability"
X = df.drop(columns=[target_col])
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(X_train.shape, X_test.shape)

(800, 15) (200, 15)


In [89]:
# print metrics
def report_metrics(y_true, y_pred, y_proba):
    print("ROC-AUC:", roc_auc_score(y_true, y_proba))
    print("PR-AUC:", average_precision_score(y_true, y_proba))
    print("F1:", f1_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

## Task 1.	Report the best hypermeters which used to run Random Forest algorithm. Explain your results.

In [90]:
# Preprocess
cat_cols = X.select_dtypes(include=["object"]).columns
num_cols = X.select_dtypes(exclude=["object"]).columns

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", StandardScaler(), num_cols),
    ]
)

X_train_prep = preprocess.fit_transform(X_train, y_train)
X_test_prep = preprocess.transform(X_test)

# Random Forest + GridSearchCV
rf = RandomForestClassifier(random_state=42, n_jobs=-1)

params = {
    'n_estimators': [200, 400],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2'],
    'class_weight': [None, 'balanced'],
}

grid = GridSearchCV(
    rf,
    param_grid=params,
    cv=5,
    n_jobs=-1,
    scoring='roc_auc',
    verbose=1
)

grid.fit(X_train_prep, y_train)

print("Best Parameters:", grid.best_params_)
print("Best CV ROC-AUC:", grid.best_score_)

# Evaluate on test set
best_rf = grid.best_estimator_
rf_pred = best_rf.predict(X_test_prep)
rf_proba = best_rf.predict_proba(X_test_prep)[:, 1]

print("Test ROC-AUC:", roc_auc_score(y_test, rf_proba))
print("Test PR-AUC:", average_precision_score(y_test, rf_proba))
print("Test F1:", f1_score(y_test, rf_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_pred))
print("Classification Report:\n", classification_report(y_test, rf_pred))

See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#string-migration-select-dtypes for details on how to write code that works with pandas 2 and 3.
  cat_cols = X.select_dtypes(include=["object"]).columns


Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best Parameters: {'class_weight': 'balanced', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
Best CV ROC-AUC: 0.7791059280855199
Test ROC-AUC: 0.7588359788359788
Test PR-AUC: 0.8499969713002635
Test F1: 0.8337236533957846
Confusion Matrix:
 [[ 51  39]
 [ 32 178]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.57      0.59        90
           1       0.82      0.85      0.83       210

    accuracy                           0.76       300
   macro avg       0.72      0.71      0.71       300
weighted avg       0.76      0.76      0.76       300



#Best Parameters: {'class_weight': 'balanced', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}. These settings handle class imbalance and reduce overfitting.

#Using ROC-AUC, PR-AUC and F1 is more appropriate than accuracy for this dataset because dataset is imbalanced.

#Confusion matrix shows more errors for class 0 and the model is more sensitive to class 1. 

#PRâ€‘AUC (0.850) and F1 (0.834) shows strong performance for an imbalanced dataset.

## Task 2.	Run Adaboost and GB on the given data and compare. Explain your results.


In [91]:
# Train and evaluate AdaBoost
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

ada = AdaBoostClassifier(random_state=42)
ada.fit(X_train_prep, y_train)
ada_pred = ada.predict(X_test_prep)
ada_proba = ada.predict_proba(X_test_prep)[:, 1]

print("AdaBoost Results:")
print("ROC-AUC:", roc_auc_score(y_test, ada_proba))
print("PR-AUC:", average_precision_score(y_test, ada_proba))
print("F1:", f1_score(y_test, ada_pred))

# Train and evaluate Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train_prep, y_train)
gb_pred = gb.predict(X_test_prep)
gb_proba = gb.predict_proba(X_test_prep)[:, 1]

print("Gradient Boosting Results:")
print("ROC-AUC:", roc_auc_score(y_test, gb_proba))
print("PR-AUC:", average_precision_score(y_test, gb_proba))
print("F1:", f1_score(y_test, gb_pred))

AdaBoost Results:
ROC-AUC: 0.7895238095238095
PR-AUC: 0.8874002523391424
F1: 0.849438202247191
Gradient Boosting Results:
ROC-AUC: 0.7778835978835978
PR-AUC: 0.8772232235324358
F1: 0.8493150684931506


In [92]:
# Compare results
import pandas as pd

ada_roc = roc_auc_score(y_test, ada_proba)
ada_pr = average_precision_score(y_test, ada_proba)
ada_f1 = f1_score(y_test, ada_pred)

gb_roc = roc_auc_score(y_test, gb_proba)
gb_pr = average_precision_score(y_test, gb_proba)
gb_f1 = f1_score(y_test, gb_pred)

results = pd.DataFrame({
    'Model': ['AdaBoost', 'Gradient Boosting'],
    'ROC-AUC': [ada_roc, gb_roc],
    'PR-AUC': [ada_pr, gb_pr],
    'F1': [ada_f1, gb_f1]
})

print("Comparison Table:")
print(results.to_string(index=False))

Comparison Table:
            Model  ROC-AUC   PR-AUC       F1
         AdaBoost 0.789524 0.887400 0.849438
Gradient Boosting 0.777884 0.877223 0.849315


#AdaBoost achieves higher PR-AUC (0.887) and F1 (0.849), better for imbalanced data.

#Both show stable CV vs test scores and no overfitting.

#AdaBoost is preferred for credit risk