#Gradient Boosting for Classification

In [1]:
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier


In [2]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)

In [4]:
X.shape

(1000, 20)

In [8]:
X[:5]

array([[ 2.92994904e-01, -4.21223056e+00, -1.28833200e+00,
        -2.17849815e+00, -6.45276650e-01,  2.58097719e+00,
         2.84223878e-01, -7.18279280e+00, -1.91211104e+00,
         2.73729512e+00,  8.13956945e-01,  3.96973717e+00,
        -2.66939799e+00,  3.34692332e+00,  4.19791821e+00,
         9.99909984e-01, -3.02018750e-01, -4.43170633e+00,
        -2.82646737e+00,  4.49168082e-01],
       [-6.83990068e-02,  5.51884147e+00,  1.12389773e+01,
        -5.03969992e+00, -2.08678427e+00,  2.14968460e+00,
         5.59734187e-01,  1.51137767e+01, -3.07183375e+00,
        -2.57458353e+00,  3.32457612e+00,  2.06754191e+00,
        -5.24925807e+00, -2.15449971e+00,  4.93109130e+00,
         1.29673535e+00, -3.18613337e+00, -3.08994781e+00,
         1.19029898e+00,  1.62025622e+00],
       [ 7.31616218e-01, -6.84686328e-01, -9.81741943e-01,
        -2.55246528e+00, -5.27030762e+00, -1.56149846e+00,
        -1.16926915e+00, -2.10408711e+00, -1.13113880e+00,
         4.65477500e+00, -2.7

In [9]:
y[:5]

array([1, 1, 1, 0, 0])

In [10]:
# define the model
model = GradientBoostingClassifier()
# define the evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model on the dataset
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# report performance
print('Mean Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

Mean Accuracy: 0.899 (0.030)


# make predictions using gradient boosting for classification


In [11]:
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)
# define the model
model = GradientBoostingClassifier()
# fit the model on the whole dataset
model.fit(X, y)
# make a single prediction
row = [0.2929949, -4.21223056, -1.288332, -2.17849815, -0.64527665, 2.58097719, 0.28422388, -7.1827928, -1.91211104, 2.73729512, 0.81395695, 3.96973717, -2.66939799, 3.34692332, 4.19791821, 0.99990998, -0.30201875, -4.43170633, -2.82646737, 0.44916808]
yhat = model.predict([row])
# summarize prediction
print('Predicted Class: %d' % yhat[0])

Predicted Class: 1


#Gradient Boosting for Regression

In [12]:
#PythonGeeks code for Gradient Boosting for Regression
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.ensemble import GradientBoostingRegressor
# define dataset
X, y = make_regression(n_samples=1000, n_features=20, n_informative=15, noise=0.1, random_state=7)
# define the model
model = GradientBoostingRegressor()
# define the evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model
n_scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# report performance
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: -62.456 (3.244)


#Make a prediction

In [13]:
# gradient boosting ensemble for making predictions for regression
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
# define dataset
X, y = make_regression(n_samples=1000, n_features=20, n_informative=15, noise=0.1, random_state=7)
# define the model
model = GradientBoostingRegressor()
# fit the model on the whole dataset
model.fit(X, y)
# make a single prediction
row = [0.20543991, -0.97049844, -0.81403429, -0.23842689, -0.60704084, -0.48541492, 0.53113006, 2.01834338, -0.90745243, -1.85859731, -1.02334791, -0.6877744, 0.60984819, -0.70630121, -1.29161497, 1.32385441, 1.42150747, 1.26567231, 2.56569098, -0.11154792]
yhat = model.predict([row])
# summarize prediction
print('Prediction: %d' % yhat[0])

Prediction: 37
