In [1]:
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
# generate a synthetic dataset
random_state = np.random.RandomState(0)
X, y = make_classification(
    n_samples=3000, 
    n_features=10, 
    n_informative=2, 
    flip_y=0.1, 
    weights=[0.15, 0.85], 
    random_state=random_state
)

# split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.2, random_state=random_state
)

In [3]:
# train a gradient boosting classifier
boost = GradientBoostingClassifier(
    n_estimators=200, # number of trees
    learning_rate=0.1, 
    max_depth=1, # depth of the trees
    random_state=random_state
).fit(X_train, y_train)

# evaluate the classifier
y_pred = boost.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.72      0.79       439
           1       0.94      0.98      0.96      1961

    accuracy                           0.93      2400
   macro avg       0.90      0.85      0.87      2400
weighted avg       0.93      0.93      0.93      2400



In [4]:
# train a decision tree classifier
tree = DecisionTreeClassifier(
    max_depth=3, 
    class_weight='balanced'
).fit(X_train, y_train)

# evaluate the classifier
y_pred = tree.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.49      0.71      0.58       439
           1       0.93      0.84      0.88      1961

    accuracy                           0.81      2400
   macro avg       0.71      0.77      0.73      2400
weighted avg       0.85      0.81      0.83      2400

