# CreditCardApproval MachineLearning Diana Max

In [None]:
from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
import random

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import (
    OneHotEncoder,
    MinMaxScaler,
    FunctionTransformer,
    TargetEncoder,
    label_binarize,
)
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    make_scorer,
    RocCurveDisplay,
    PrecisionRecallDisplay,
    DetCurveDisplay,
)

from sklearn.model_selection import (
    cross_validate,
    GridSearchCV,
    train_test_split,
    cross_validate,
)

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import make_column_transformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn.ensemble import AdaBoostClassifier

seed = 444
np.random.seed(seed)


credit_approval = fetch_ucirepo(id=27)

X = credit_approval.data.features
y = credit_approval.data.targets
df = credit_approval.data.original

# preprocessing
feature engineering, fully numerical feature space

In [None]:
column_tweaker_include_a7 = make_column_transformer(
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(sparse_output=False, drop="first"),
        ),
        ["A1", "A9", "A10", "A12"],
    ),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["u"])),
        ),
        ["A4"],
    ),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["g"])),
        ),
        ["A5"],
    ),
    (make_pipeline(TargetEncoder(), SimpleImputer(strategy="median")), ["A6"]),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["g"])),
        ),
        ["A13"],
    ),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["v"])),
        ),
        ["A7"],
    ),
    (
        make_pipeline(FunctionTransformer(np.log1p), SimpleImputer(strategy="median")),
        ["A2", "A3", "A8", "A11", "A14", "A15"],
    ),
    # leave everything else untouched
    remainder="drop",
)

In [None]:
# variant 2: exclude A7
column_tweaker_exclude_a7 = make_column_transformer(
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(sparse_output=False, drop="first"),
        ),
        ["A1", "A9", "A10", "A12"],
    ),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["u"])),
        ),
        ["A4"],
    ),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["g"])),
        ),
        ["A5"],
    ),
    (make_pipeline(TargetEncoder(), SimpleImputer(strategy="median")), ["A6"]),
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            FunctionTransformer(lambda col: label_binarize(col, classes=["g"])),
        ),
        ["A13"],
    ),
    (
        make_pipeline(FunctionTransformer(np.log1p), SimpleImputer(strategy="median")),
        ["A2", "A3", "A8", "A11", "A14", "A15"],
    ),
    # leave everything else untouched
    remainder="drop",
)

In [None]:
# feature scaling
column_scaler = MinMaxScaler()

# PCA
column_pca = PCA(n_components=0.99, svd_solver="full")

## pipelines

Pipeline 1: exclude A7 <br>
Pipeline 2: include A7

In [None]:
preproc_exclA7 = make_pipeline(column_tweaker_exclude_a7, column_scaler, column_pca)
#preproc_exclA7

In [None]:
preproc_inclA7 = make_pipeline(column_tweaker_include_a7, column_scaler, column_pca)
#preproc_inclA7

## train-test-vali split
choose which pipeline to use here

In [None]:
# Set random seed for reproducibility

y = y.values.ravel()
y[y == "+"] = 1
y[y == "-"] = 0
y = y.astype(np.int8)


# TODO: CHOOSE PREPROCESSING PIPELINE 
X_preprocessed = preproc_inclA7.fit_transform(X=X, y=y)


X_train, X_test_validate, y_train, y_test_validate = train_test_split(
    X_preprocessed, y, test_size=0.1, random_state=seed
)

X_test, X_validate, y_test, y_validate = train_test_split(
    X_test_validate, y_test_validate, test_size=0.5
)
#(X_test.shape, y_test.shape), (X_validate.shape, y_validate.shape)

X_train_test, y_train_test = np.concatenate((X_train, X_test)), np.concatenate(
    (y_train, y_test)
)

# train & optimise different models

On what to optimize ?

The task is o predict, if, given the different parameters, access to a credit card is denied or granted.

- The worst case would be to give access to a position that is obvious fraud (False Positive). Cost: The owner looses money.
- Rejecting access to a legit position (False Negative) would be inconvenient, since someone wouldn't be able to draw his money, but we consider it less of a problem than a False Positive.

- Obviously, a True Positive would be giving someone his money while a True Negative would be denying fraud.



In summary, we primarily optimize for the False Positive rate.

Additional metrics:
- The dataset is balanced therefore classificatino accuracy is a meaningfull metrix as well.

In [None]:
scoring = {
    "Precision": make_scorer(precision_score, pos_label=1),
    "Accuracy": make_scorer(accuracy_score),
}

How to compare classifiers ?
- Plot learning curves.
- Compare missclassified examples.
- Compare robustness in different CV-splits.

Next, we compare the performance of a variety of common ML classifiers. Since we are relativey naive to which classifiers could work, we try a diverse set.

##### Plotting function

In [None]:
def plot_mean_std(mean, std, ploth, color):

    plt.plot(mean, ploth, "o", color=color, markersize=10, label=f"Mean = {mean:.2f}")
    plt.plot(
        [mean - std, mean + std],
        [ploth, ploth],
        color=color,
        linewidth=1.5,
        label=f"STD = {std:.2f}",
    )
    plt.vlines(
        [mean - std, mean + std], ymin=ploth-0.1, ymax=ploth+0.1, color=color, linewidth=1.5
    )


def plot_hline(h):
    plt.axhline(
        y=h,
        color="grey",
        linestyle="-",
        linewidth=0.5,
    )

In [None]:
def plot_cv_scores(base, grid, offset=0):
    # Precision
    h = 0+offset
    plot_mean_std(base["test_Precision"].mean(), base["test_Precision"].std(), h, "black")
    plot_mean_std(grid["test_Precision"].mean(), grid["test_Precision"].std(), h, "magenta")
    plot_hline(h)

    # Accuracy
    h = 1+offset
    plot_mean_std(base["test_Accuracy"].mean(), base["test_Accuracy"].std(), h, "black")
    plot_mean_std(grid["test_Accuracy"].mean(), grid["test_Accuracy"].std(), h, "magenta")
    plot_hline(h)

    plt.ylabel("")
    plt.ylim(-0.8, 1.8)
    plt.yticks(ticks=[0, 1], labels=["Precision", "Accuracy"])

    plt.xlim(0.70, 0.95)
    plt.xlabel("")
    plt.suptitle("Mean and std of 10-fold CV")
    plt.title("baseline (black) vs estimator from GridSearch (magenta))")

In [None]:
def plot_cv_scores_2versions(base1, grid1, base2, grid2, label1=None, label2=None):
    off = 0.2
    plot_cv_scores(base1, grid1, offset=-off)
    plot_cv_scores(base2, grid2, offset=off)
    if label1 is not None and label2 is not None:
        ax = plt.gca()
        ax = ax.twinx()
        ax.set_ylim(-0.8, 1.8)
        ax.set_yticks([0 - off, 0 + off, 1 - off, 1 + off])
        ax.set_yticklabels([label1, label2, label1, label2])

## compare functionality

In [None]:
def train_estimator(pipe, X, y, estimator, param_grid):
    X = pipe.fit_transform(X=X, y=y)
    X_train_test, X_validate, y_train_test, y_validate = train_test_split(
        X, y, test_size=0.1, random_state=seed
    )

    base_cv = cross_validate(
        estimator, X_train_test, y_train_test, cv=10, scoring=scoring
    )

    grid = GridSearchCV(
        estimator=estimator,
        param_grid=param_grid,
        cv=10,
        scoring=scoring,
        refit="Accuracy",
    )
    grid.fit(X_train_test, y_train_test)
    grid.best_estimator_
    grid_cv = cross_validate(
        grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
    )

    return base_cv, grid_cv, grid

## Linear Discriminant Analysis

Parameters:
'priors' : The default value estimates the class proportions from the training set. Since our dataset with n<1000 is relatively small, class proportions might be slightly skewed leading to a suboptimal estimation from the training data .

### Baseline

In [None]:
lda_base_cv = cross_validate(
    LinearDiscriminantAnalysis(), X_train_test, y_train_test, cv=10, scoring=scoring
)

### GridSeach

In [None]:
param_grid = {
    "priors": [
        [0.05, 0.95],
        [0.1, 0.9],
        [0.15, 0.85],
        [0.2, 0.8],
        [0.25, 0.75],
        [0.3, 0.7],
        [0.35, 0.65],
        [0.4, 0.6],
        [0.45, 0.55],
        [0.5, 0.5],
        [0.55, 0.45],
        [0.6, 0.4],
        [0.65, 0.35],
        [0.7, 0.3],
        [0.75, 0.25],
        [0.8, 0.2],
        [0.85, 0.15],
        [0.9, 0.1],
        [0.95, 0.05],
    ]
}
lda_grid = GridSearchCV(
    estimator=LinearDiscriminantAnalysis(),
    param_grid=param_grid,
    cv=10,
    scoring=scoring,
    refit="Accuracy",
)
lda_grid.fit(X_train_test, y_train_test)
lda_grid.best_estimator_

In [None]:
lda_grid_cv = cross_validate(
    lda_grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
)

### compare pipelines

In [None]:
lda_base_cv2, lda_grid_cv2, lda_grid2 = train_estimator(
    preproc_inclA7, X, y, LinearDiscriminantAnalysis(), param_grid
)
plot_cv_scores_2versions(
    lda_base_cv, lda_grid_cv, lda_base_cv2, lda_grid_cv2, "excl. A7", "incl. A7"
)

## Quadratic Discriminant Analysis

### Baseline

In [None]:
qda_base_cv = cross_validate(
    QuadraticDiscriminantAnalysis(), X_train_test, y_train_test, cv=10, scoring=scoring
)

### GridSearch

In [None]:
param_grid = {
    "priors": [
        [0.05, 0.95],
        [0.1, 0.9],
        [0.15, 0.85],
        [0.2, 0.8],
        [0.25, 0.75],
        [0.3, 0.7],
        [0.35, 0.65],
        [0.4, 0.6],
        [0.45, 0.55],
        [0.5, 0.5],
        [0.55, 0.45],
        [0.6, 0.4],
        [0.65, 0.35],
        [0.7, 0.3],
        [0.75, 0.25],
        [0.8, 0.2],
        [0.85, 0.15],
        [0.9, 0.1],
        [0.95, 0.05],
    ]
}

qda_grid = GridSearchCV(
    estimator=QuadraticDiscriminantAnalysis(),
    param_grid=param_grid,
    cv=10,
    scoring=scoring,
    refit="Accuracy",
)
qda_grid.fit(X_train_test, y_train_test)
qda_grid.best_params_

In [None]:
qda_grid_cv = cross_validate(
    qda_grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
)

### compare pipelines

In [None]:
qda_base_cv2, qda_grid_cv2, qda_grid2 = train_estimator(
    preproc_inclA7, X, y, QuadraticDiscriminantAnalysis(), param_grid
)
plot_cv_scores_2versions(
    qda_base_cv, qda_grid_cv, qda_base_cv2, qda_grid_cv2, "excl. A7", "incl. A7"
)

## Random Forest

#### Baseline

In [None]:
rf_base_cv = cross_validate(
    RandomForestClassifier(), X_train_test, y_train_test, cv=10, scoring=scoring
)

#### GridSearchCV

In [None]:
# best values, so we don't need to run excessive GridSearch again
param_grid = {
    "n_estimators": [300],
    "max_depth": [None],
    "min_samples_split": [10],
    "min_samples_leaf": [4],
    "max_features": ["sqrt"],
    "bootstrap": [True],
    "criterion": ["entropy"],
}

# param_grid = {
#     'n_estimators': [100, 200, 300, 400, 500],
#     'max_depth': [None, 10, 20, 30, 40, 50],
#     'min_samples_split': [2, 5, 10, 15, 20],
#     'min_samples_leaf': [1, 2, 4, 6, 8, 10],
#     'max_features': ['auto', 'sqrt', 'log2'],
#     'bootstrap': [True, False],
#     'criterion': ['gini', 'entropy']
# }

rf_grid = GridSearchCV(
    estimator=RandomForestClassifier(),
    scoring=scoring,
    param_grid=param_grid,
    verbose=1,
    cv=10,
    refit="Accuracy",
    n_jobs=-1,
)

rf_grid.fit(X_train_test, y_train_test)

In [None]:
rf_grid_cv = cross_validate(
    rf_grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
)

### compare pipelines

In [None]:
rf_base_cv2, rf_grid_cv2, rf_grid2 = train_estimator(
    preproc_inclA7, X, y, RandomForestClassifier(), param_grid
)
plot_cv_scores_2versions(
    rf_base_cv, rf_grid_cv, rf_base_cv2, rf_grid_cv2, "excl. A7", "incl. A7"
)

## KNeighborsClassifier

#### Baseline

In [None]:
kn_base_cv = cross_validate(
    KNeighborsClassifier(), X_train_test, y_train_test, cv=10, scoring=scoring
)

#### GridSearch

In [None]:
param_grid = {
    "n_neighbors": [5, 10, 15],
    "weights": ["uniform", "distance"],
    "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
    "p": [1, 2],
}

kn_grid = GridSearchCV(
    estimator=KNeighborsClassifier(),
    scoring=scoring,
    param_grid=param_grid,
    verbose=1,
    cv=10,
    refit="Accuracy",
    n_jobs=-1,
)

kn_grid.fit(X_train_test, y_train_test)

In [None]:
kn_grid_cv = cross_validate(
    kn_grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
)

### compare pipelines

In [None]:
kn_base_cv2, kn_grid_cv2, kn_grid2 = train_estimator(
    preproc_inclA7, X, y, KNeighborsClassifier(), param_grid
)
plot_cv_scores_2versions(
    kn_base_cv, kn_grid_cv, kn_base_cv2, kn_grid_cv2, "excl. A7", "incl. A7"
)

## Adaboost
A ensemble method

#### Baseline

In [None]:
ada_base_cv = cross_validate(
    AdaBoostClassifier(n_estimators=100, algorithm="SAMME", random_state=0),
    X_train_test,
    y_train_test,
    cv=10,
    scoring=scoring,
)

### Grid Search

In [None]:
param_grid = {
    "algorithm": ["SAMME"],
    "n_estimators": [50, 100, 200],
    "learning_rate": [0.01, 0.1, 1.0],
}

# bigger version:
# param_grid = {
#     'n_estimators': [50, 100, 200],
#     'learning_rate': [0.01, 0.1, 1.0],
#     'algorithm': ['SAMME', 'SAMME.R'],
#     'base_estimator_max_depth': [1, 2, 3],
#     'base_estimator_min__samples_split': [2, 5],
#     'base_estimator_min__samples_leaf': [1, 2]
# }

ada_grid = GridSearchCV(
    AdaBoostClassifier(),
    param_grid,
    cv=10,
    scoring=scoring,
    refit="Accuracy",
    n_jobs=-1,
)
ada_grid.fit(X_train_test, y_train_test)

In [None]:
ada_grid_cv = cross_validate(
    ada_grid.best_estimator_, X_train_test, y_train_test, cv=10, scoring=scoring
)

### compare pipelines

In [None]:
ada_base_cv2, ada_grid_cv2, ada_grid2 = train_estimator(
    preproc_inclA7, X, y, AdaBoostClassifier(), param_grid
)
plot_cv_scores_2versions(
    ada_base_cv, ada_grid_cv, ada_base_cv2, ada_grid_cv2, "excl. A7", "incl. A7"
)

# model comparison

## compare scores on validation set

In [None]:
scores = pd.DataFrame(np.nan, index=("LDA","QDA","RandomForest", "KNeighbors","AdaBoost"), columns=("accuracy", "precision"))

for i, model in enumerate((lda_grid2, qda_grid2, rf_grid2, kn_grid2, ada_grid2)):
    y_pred = model.best_estimator_.predict(X_validate)
    scores.iloc[i,0], scores.iloc[i,1] = accuracy_score(y_validate, y_pred), precision_score(y_validate, y_pred)

fig, axes = plt.subplots(2,1, sharex=True)

axes[0].bar(scores.index, scores.accuracy, width=.33)
axes[0].set_ylim(.75, .9)
axes[0].set_title("Accuracy")


axes[1].bar(scores.index, scores.precision, width=.6)
axes[1].set_ylim(.75, .9)
axes[1].set_title("Precision")


## ROC

In [None]:
ax = plt.subplot()
#RocCurveDisplay.from_estimator(lda_grid.best_estimator_, X_validate, y_validate, ax=ax)
RocCurveDisplay.from_estimator(qda_grid2.best_estimator_, X_validate, y_validate, ax=ax)
RocCurveDisplay.from_estimator(rf_grid2.best_estimator_, X_validate, y_validate, ax=ax)
RocCurveDisplay.from_estimator(kn_grid2.best_estimator_, X_validate, y_validate, ax=ax)
RocCurveDisplay.from_estimator(ada_grid2.best_estimator_, X_validate, y_validate, ax=ax)
plt.title("ROC curve")

## Precision Recall Display

In [None]:
ax = plt.subplot()
#PrecisionRecallDisplay.from_estimator(lda_grid.best_estimator_, X_validate, y_validate, ax=ax)
PrecisionRecallDisplay.from_estimator(qda_grid2.best_estimator_, X_validate, y_validate, ax=ax)
PrecisionRecallDisplay.from_estimator(rf_grid2.best_estimator_, X_validate, y_validate, ax=ax)
PrecisionRecallDisplay.from_estimator(kn_grid2.best_estimator_, X_validate, y_validate, ax=ax)
PrecisionRecallDisplay.from_estimator(ada_grid2.best_estimator_, X_validate, y_validate, ax=ax)
plt.title("precision recall curve")

## DET curve

In [None]:
"""AdaBoost has best accuracy/precision, KN has best ROC-AUC, QDA has second best ROC-AUC"""

ax = plt.subplot()
#DetCurveDisplay.from_estimator(lda_grid.best_estimator_, X_validate, y_validate, ax=ax)
DetCurveDisplay.from_estimator(qda_grid.best_estimator_, X_validate, y_validate, ax=ax)
#DetCurveDisplay.from_estimator(rf_grid.best_estimator_, X_validate, y_validate, ax=ax)
DetCurveDisplay.from_estimator(kn_grid.best_estimator_, X_validate, y_validate, ax=ax)
DetCurveDisplay.from_estimator(ada_grid.best_estimator_, X_validate, y_validate, ax=ax);
ax.legend()

## old code

In [None]:
y_num = np.where(y_train == "+", 1, 0)
y_validate_num = np.where(y_validate == "+", 1, 0)

rf_num = search_rf.best_estimator_.fit(X_train, y_num)
knn_num = search_knn.best_estimator_.fit(X_train, y_num)

In [None]:
# DetCurveDisplay.from_predictions(y_validate, rf_best.predict)
# Get prediction probabilities
rf_best = search_rf.best_estimator_
# probabilities = rf_best.predict_proba(X_validate)
# print("Prediction Probabilities:\n", probabilities)

In [None]:
# Get class predictions (optional)
predictions_rf = rf_num.predict(X_validate)
print("Class Predictions:\n", predictions_rf)

predictions_knn = knn_num.predict(X_validate)
print("Class Predictions:\n", predictions_knn)

# DetCurveDisplay.from_predictions(y_validate_num, predictions_rf)
# DetCurveDisplay.from_predictions(y_validate_num, predictions_knn)

In [None]:
# Model evauation

# Since classes are balanced, accuracy is the correct evaluation metric.

# Plot ROC for different hyperparameters

# above: plot full results of grid search to showcase effects of different hyperparameters

### old feature engineering pipe

feature engineering: mixed numerical categorical feature space (this is old code, use fully numerical feature space for now)

In [None]:
make_column_transformer(
    (
        make_pipeline(
            SimpleImputer(strategy="most_frequent"), OneHotEncoder(drop="first")
        ),
        ["A1"],
    ),
    (make_pipeline(SimpleImputer(strategy="median"), MinMaxScaler()), ["A2"]),
    (
        make_pipeline(
            FunctionTransformer(lambda col: col.mask(col != "u", "non-u")),
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(drop="first"),
        ),
        ["A4"],
    ),
    (
        make_pipeline(
            FunctionTransformer(
                lambda col: col.replace(
                    {
                        "v": "v",
                        "h": "h",
                        "bb": "other",
                        "ff": "other",
                        "j": "other",
                        "z": "other",
                        "dd": "other",
                        "n": "other",
                        "o": "other",
                    }
                )
            ),
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(drop="first"),
        ),
        ["A7"],
    ),
    (
        make_pipeline(
            FunctionTransformer(
                lambda col: col.replace({"g": "g", "s": "non-g", "p": "non-g"})
            ),
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(drop="first"),
        ),
        ["A13"],
    ),
    (
        "test",
        make_pipeline(
            FunctionTransformer(np.log1p),
            SimpleImputer(strategy="median"),
            MinMaxScaler(),
        ),
        ["A11", "A14", "A15"],
    ),
    # continuous default
    (
        make_pipeline(
            MinMaxScaler(),
        ),
        ["A3", "A8"],
    ),
    # categorical default
    (
        make_pipeline(
            OneHotEncoder(drop="first"),
        ),
        ["A9", "A10", "A12"],
    ),
    # remove: A5, A6
    remainder="drop",
)

### AdaBoost GridSearch Parameter descriptions

##### 1. **Number of Estimators (`n_estimators`)**:
   - **Description**: This is the number of weak learners (or base estimators) to be used in the boosting process. Higher values can lead to better performance but also increase the risk of overfitting.
   - **Typical Range**: `[50, 100, 200, 300, 400, 500]`

##### 2. **Learning Rate (`learning_rate`)**:
   - **Description**: This shrinks the contribution of each weak learner by multiplying their weights. A lower learning rate requires a higher number of estimators.
   - **Typical Range**: `[0.001, 0.01, 0.1, 0.5, 1.0]`

##### 3. **Base Estimator (`base_estimator`)**:
   - **Description**: The weak learner to be used. Typically, a decision tree classifier is used, but this can be replaced by other classifiers.
   - **Typical Range**:
     - For Decision Trees: `DecisionTreeClassifier(max_depth=1)` (default), or vary `max_depth`, `min_samples_split`, and `min_samples_leaf`.
     - Other weak learners: `DecisionTreeClassifier`, `SVM`, etc.

##### 4. **Algorithm (`algorithm`)**:
   - **Description**: The algorithm used to choose the weights for the weak learners.
   - **Options**: `['SAMME', 'SAMME.R']`
   - **Explanation**:
     - `SAMME.R` uses the probability estimates and generally performs better, especially when `base_estimator` can output class probabilities.
     - `SAMME` is a more traditional approach.

##### 5. **Random State (`random_state`)**:
   - **Description**: Controls the randomness of the bootstrapping of the samples used when building trees. It’s important for reproducibility.
   - **Typical Range**: `[None, 42, 1]` (using fixed seeds like `42` can help reproduce results)

##### 6. **Max Depth of Base Estimator (`max_depth`)**:
   - **Description**: Depth of the decision tree if it is used as the base estimator. Shallow trees prevent overfitting.
   - **Typical Range**: `[1, 2, 3, 4, 5]`

##### 7. **Min Samples Split (`min_samples_split`)** (if decision tree is used as the base estimator):
   - **Description**: The minimum number of samples required to split an internal node.
   - **Typical Range**: `[2, 5, 10]`

##### 8. **Min Samples Leaf (`min_samples_leaf`)** (if decision tree is used as the base estimator):
   - **Description**: The minimum number of samples required to be at a leaf node.
   - **Typical Range**: `[1, 2, 5, 10]`