In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [3]:
# gradient boosting classifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
X, y = make_classification(n_samples = 1000, n_features=20, n_classes=2, random_state=1)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)

In [7]:
classifier = GradientBoostingClassifier()
classifier

In [8]:
classifier.fit(X_train, y_train)

In [10]:
y_pred = classifier.predict(X_test)
y_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [11]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[123  16]
 [ 25 136]]
              precision    recall  f1-score   support

           0       0.83      0.88      0.86       139
           1       0.89      0.84      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.87      0.86      0.86       300

0.8633333333333333


In [13]:
# hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {
    'learning_rate': [0.01, 0.1, 0.05, 0.2],
    'n_estimators': [100, 200, 300]
}

In [14]:
gbc = GradientBoostingClassifier()
grid = GridSearchCV(estimator=gbc, param_grid=param_grid, cv=5, verbose = 3)

In [15]:
grid

In [16]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.857 total time=   1.2s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.907 total time=   1.3s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.857 total time=   1.0s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.850 total time=   0.8s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.821 total time=   0.8s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.857 total time=   1.6s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.893 total time=   1.6s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.864 total time=   1.6s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.843 total time=   1.6s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.829 total time=   1.5s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.850 total time=   3.7s
[CV 2/5] END learning_rate

In [17]:
grid.best_params_

{'learning_rate': 0.1, 'n_estimators': 100}

In [18]:
best_model = grid.best_estimator_
best_model

In [19]:
y_pred = best_model.predict(X_test)
y_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [20]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[123  16]
 [ 26 135]]
              precision    recall  f1-score   support

           0       0.83      0.88      0.85       139
           1       0.89      0.84      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

0.86


In [23]:
# gradient boosting regressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [25]:
X, y = make_regression(n_samples = 1000, n_features= 2, noise = 10, random_state = 1)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)

In [27]:
regressor = GradientBoostingRegressor()
regressor

In [28]:
regressor.fit(X_train, y_train)

In [29]:
y_pred = regressor.predict(X_test)
y_pred

array([ -38.71704331,  -99.03188122,  166.30134236,   18.89846687,
         76.37511873,   18.21085953,  -18.0033098 ,  141.97698958,
       -162.20045225,  112.46240389,   60.73101866,   22.03835327,
        -83.33620862,  104.32795803,   21.28684668, -167.54995977,
        156.68484004,   41.69251473,  -26.63576241,  -13.337158  ,
       -173.26147027,   15.16130797,   16.1878131 ,   47.45522906,
         37.76361391,   17.06717851,  -76.69619185,  -84.92598933,
        137.3420746 ,  218.51964418,  209.97376501,   28.02959162,
        102.0518223 ,   32.75883828,   64.08362821,   28.66421558,
        -76.69619185,  107.12759838, -194.94233623,  -19.04053587,
        145.64306184,  135.54889032,  -45.09933542, -199.21419469,
        -32.52354918,   12.17308091,  -41.62985082,   85.70441734,
        -69.55076928,  -49.27437037,   47.53579414,   40.7584331 ,
          5.52393416,  -48.78473399,   -4.14056909,  -15.67872212,
        142.45705894,   58.79998979,  -94.05629335,  136.05528

In [30]:
print(r2_score(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.9816687854683488
9.331730796598531
151.5162217371331


In [32]:
# hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {
    'learning_rate': [0.01, 0.1, 0.05, 0.2],
    'n_estimators': [100, 200, 300]
}

In [33]:
model = GradientBoostingRegressor()
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, verbose = 3)

In [34]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.796 total time=   0.2s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.813 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.843 total time=   0.2s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.812 total time=   0.2s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.821 total time=   0.2s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.936 total time=   0.4s
[CV 2/5] END learning_rate=0.01, n_estimators=200;, score=0.946 total time=   0.4s
[CV 3/5] END learning_rate=0.01, n_estimators=200;, score=0.962 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=200;, score=0.947 total time=   0.4s
[CV 5/5] END learning_rate=0.01, n_estimators=200;, score=0.948 total time=   0.4s
[CV 1/5] END learning_rate=0.01, n_estimators=300;, score=0.966 total time=   0.5s
[CV 2/5] END learning_rate

In [35]:
best_model = grid.best_estimator_
best_model

In [36]:
y_pred = best_model.predict(X_test)
y_pred

array([-3.83318211e+01, -9.79563372e+01,  1.65361936e+02,  1.96376730e+01,
        7.58532010e+01,  1.64615421e+01, -1.62125976e+01,  1.44109835e+02,
       -1.63318581e+02,  1.12424008e+02,  6.15028405e+01,  2.21079240e+01,
       -8.26394630e+01,  1.04823248e+02,  2.22851342e+01, -1.64457192e+02,
        1.56246159e+02,  4.14132953e+01, -2.47217515e+01, -1.26550492e+01,
       -1.70237087e+02,  1.50846964e+01,  1.58950490e+01,  4.84015307e+01,
        3.86064006e+01,  1.97513623e+01, -7.58605048e+01, -8.48134916e+01,
        1.37358245e+02,  2.21749280e+02,  2.11427856e+02,  2.74024021e+01,
        1.01541456e+02,  3.00737277e+01,  6.31925416e+01,  2.79611071e+01,
       -7.59298541e+01,  1.07531400e+02, -1.91300086e+02, -1.79520024e+01,
        1.46139319e+02,  1.35078786e+02, -4.55984163e+01, -1.98121075e+02,
       -3.19562910e+01,  1.31274386e+01, -4.15057969e+01,  8.63399548e+01,
       -6.86742035e+01, -4.95866173e+01,  4.84420723e+01,  4.03604284e+01,
        3.37427571e+00, -

In [37]:
print(r2_score(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.9822113126455758
9.29163074734129
147.031976138391
