# XGBoost Regressor

In [1]:
from xgboost import XGBRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
X, y = make_regression(n_samples = 1000, n_features = 2, noise = 10, random_state = 1)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 1)

In [4]:
X_train.shape, X_test.shape

((670, 2), (330, 2))

In [5]:
regressor = XGBRegressor()
regressor.fit(X_train, y_train)

In [6]:
y_pred = regressor.predict(X_test)
y_pred

array([-3.60156670e+01, -9.62838058e+01,  1.62519806e+02,  2.19694004e+01,
        7.39040375e+01,  6.85059118e+00, -1.37642279e+01,  1.45513046e+02,
       -1.65535385e+02,  1.08140694e+02,  6.21138000e+01,  2.46483135e+01,
       -9.09498901e+01,  1.04465912e+02,  2.74600086e+01, -1.71492279e+02,
        1.60038528e+02,  4.38138504e+01, -1.82620773e+01, -8.77775955e+00,
       -1.67459595e+02,  1.27157812e+01,  1.78047600e+01,  4.61400452e+01,
        2.75595360e+01,  1.80333958e+01, -8.30158310e+01, -8.16803284e+01,
        1.38046539e+02,  2.19817551e+02,  1.89733627e+02,  2.84617100e+01,
        9.36491852e+01,  3.00287533e+01,  6.46254654e+01,  3.00820084e+01,
       -8.02395096e+01,  1.04194626e+02, -1.90209793e+02, -9.17096233e+00,
        1.44125259e+02,  1.40122391e+02, -4.86224556e+01, -2.04977020e+02,
       -3.86700172e+01,  1.55876713e+01, -5.08480835e+01,  8.57863388e+01,
       -6.90055923e+01, -4.97215157e+01,  4.61035652e+01,  4.44011192e+01,
        3.50145912e+00, -

In [7]:
#Evaluate the model
print("Current model performance")
print(f"R2 score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current model performance
R2 score: 0.9775959731278183
Mean Absolute Error: 10.527663222328606
Mean Squared Error: 187.01416712634463


In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators' : [100, 200, 300],
    'learning_rate' : [0.01, 0.05, 0.1, 0.2]
}

In [9]:
grid_search = GridSearchCV(estimator = regressor, param_grid = param_grid, cv = 5, n_jobs = -1, verbose = 2)

In [10]:
grid_search

In [11]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [12]:
grid_search.best_params_

{'learning_rate': 0.05, 'n_estimators': 100}

In [13]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)
y_pred_tuned

array([-3.81126022e+01, -9.89765015e+01,  1.64464584e+02,  2.46330299e+01,
        7.86538086e+01,  1.36501665e+01, -1.57157211e+01,  1.42531509e+02,
       -1.64790573e+02,  1.11464653e+02,  6.53094330e+01,  2.34102020e+01,
       -8.60616074e+01,  1.06394341e+02,  2.14571743e+01, -1.73392807e+02,
        1.57570908e+02,  4.07061768e+01, -1.99035053e+01, -1.33966856e+01,
       -1.63329163e+02,  9.19030666e+00,  2.13840122e+01,  3.98775902e+01,
        2.39972382e+01,  1.87524738e+01, -7.79743576e+01, -8.26990509e+01,
        1.35476364e+02,  2.13927414e+02,  1.97759155e+02,  2.67137852e+01,
        9.74459457e+01,  3.17023487e+01,  6.45870743e+01,  2.65373230e+01,
       -7.79743576e+01,  1.03130646e+02, -1.86799149e+02, -1.52702322e+01,
        1.42278656e+02,  1.38348480e+02, -4.83246689e+01, -2.00461945e+02,
       -3.68630791e+01,  1.69061203e+01, -4.26657906e+01,  8.54929199e+01,
       -6.83709488e+01, -4.91922646e+01,  4.67005615e+01,  3.79474182e+01,
        7.01026344e+00, -

In [14]:
#Evaluate the tuned model
print("Tuned model performance:")
print(f"R2 Score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Tuned model performance:
R2 Score: 0.9794677038026033
Mean Absolute Error: 9.760364679170404
Mean Squared Error: 171.39018331188285


# XGBoost Classifier

In [15]:
from xgboost import XGBClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [16]:
X, y = make_classification(n_samples = 1000, n_features = 20, n_classes = 2, random_state = 1)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 1)

In [18]:
classifier = XGBClassifier()
classifier.fit(X_train, y_train)

In [19]:
y_pred = classifier.predict(X_test)
y_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,

In [20]:
#Evaluate the model
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.8484848484848485
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.86      0.84       154
           1       0.87      0.84      0.86       176

    accuracy                           0.85       330
   macro avg       0.85      0.85      0.85       330
weighted avg       0.85      0.85      0.85       330

Confusion Matrix:
[[132  22]
 [ 28 148]]


In [21]:
#Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators' : [100, 200, 300],
    'learning_rate' : [0.01, 0.05, 0.1, 0.2]
}

In [22]:
grid_search = GridSearchCV(estimator = classifier, param_grid = param_grid, cv = 5, n_jobs = -1, verbose = 3)

In [23]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [24]:
grid_search.best_params_

{'learning_rate': 0.2, 'n_estimators': 200}

In [25]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)
y_pred_tuned

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,

In [26]:
#Evaluate the tuned model
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

Tuned model performance:
Accuracy: 0.8424242424242424
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.86      0.84       154
           1       0.87      0.83      0.85       176

    accuracy                           0.84       330
   macro avg       0.84      0.84      0.84       330
weighted avg       0.84      0.84      0.84       330

Confusion Matrix:
[[132  22]
 [ 30 146]]
