In [36]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score


df = pd.read_csv('heart.csv')
X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((242, 13), (61, 13), (242,), (61,))

In [18]:
xgbc = XGBClassifier()
xgbc.fit(X_train, y_train)
accuracy_score(y_test, xgbc.predict(X_test))

0.8032786885245902

In [23]:
parameter = {
    "n_estimators": [10, 30, 50],
    "max_depth": [20, 40, 60],
    "max_features": [0.6, 0.8, 1],
    "min_samples_leaf": [1, 5, 10]
}
xgbc_grid = GridSearchCV(xgbc, parameter, n_jobs=-1, cv=10, verbose=1)
xgbc_grid.fit(X_train, y_train)
xgbc_grid.best_params_

Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 232 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    5.0s finished


{'max_depth': 20,
 'max_features': 0.6,
 'min_samples_leaf': 1,
 'n_estimators': 30}

In [30]:
y_pred = xgbc_grid.predict(X_test)
accuracy_score(y_test, y_pred)

0.819672131147541

In [31]:
confusion_matrix(y_test, y_pred)

array([[19,  9],
       [ 2, 31]], dtype=int64)

In [32]:
y_test.values

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1], dtype=int64)

In [33]:
y_pred

array([0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1], dtype=int64)

In [37]:
acc = cross_val_score(xgbc_grid, X_train, y_train, cv=10)
acc.mean(), acc.std()

Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    6.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 304 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 304 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 340 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 304 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 340 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 304 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 81 candidates, totalling 810 fits


[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:    4.8s finished


(0.7935000000000001, 0.036319951664187125)

In [38]:
acc = cross_val_score(xgbc, X_train, y_train, cv=10)
acc.mean(), acc.std()

(0.79, 0.08856886837057618)