## **XG Boost**

In [2]:
# importing necessary libraries
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# making the data
X, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_classes=2,
    random_state=42
)

In [4]:
# train test splitting
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# preparing the data compatability with xgboost
dtrain = xgb.DMatrix(x_train, label=y_train)
dtest = xgb.DMatrix(x_test, label=y_test)

In [6]:
# parameters for model training
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'eta': 0.1,  # learning rate
    'max_depth': 4,  # depth of each tree
    'subsample': 0.8, # row sampling
    'colsample_bytree': 0.8, # col sampling
    "seed": 42
}

In [7]:
num_rounds = 100 # num of trees

# training
bst = xgb.train(
    params = params,
    dtrain = dtrain,
    num_boost_round = num_rounds,
    evals = [(dtrain, 'train'), (dtest, 'test')],
    early_stopping_rounds = 10, # stops at 10 rounds, if no change found
    verbose_eval = 10
)

[0]	train-logloss:0.65294	test-logloss:0.65490
[10]	train-logloss:0.43133	test-logloss:0.44258
[20]	train-logloss:0.33769	test-logloss:0.35420
[30]	train-logloss:0.27982	test-logloss:0.30089
[40]	train-logloss:0.24861	test-logloss:0.27352
[50]	train-logloss:0.22559	test-logloss:0.25339
[60]	train-logloss:0.20887	test-logloss:0.23982
[70]	train-logloss:0.19488	test-logloss:0.23001
[80]	train-logloss:0.18553	test-logloss:0.22347
[90]	train-logloss:0.17469	test-logloss:0.21729
[99]	train-logloss:0.16356	test-logloss:0.20978


In [8]:
# test data prediction
y_pred_prob = bst.predict(dtest)
y_pred = (y_pred_prob > 0.5).astype(int)

In [9]:
# metrics scores
print(f"Accuracy score: {accuracy_score(y_test, y_pred)}")
print(f"Classification report:\n{classification_report(y_test, y_pred)}")

Accuracy score: 0.9185
Classification report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      1030
           1       0.91      0.92      0.92       970

    accuracy                           0.92      2000
   macro avg       0.92      0.92      0.92      2000
weighted avg       0.92      0.92      0.92      2000

