In [None]:
# !pip install xgboost

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn import tree

In [2]:
X,y = load_breast_cancer(return_X_y= True)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = .2, random_state = 42)

# XGB

In [4]:
from xgboost import XGBClassifier

In [5]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)

In [6]:
y_training_pred = xgb.predict(X_train)
y_test_pred = xgb.predict(X_test)

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score
accuracy_score(y_train, y_training_pred)

1.0

In [8]:
accuracy_score(y_test, y_test_pred)

0.956140350877193

In [9]:
confusion_matrix(y_test, y_test_pred)

array([[40,  3],
       [ 2, 69]], dtype=int64)

### Random Forest Classifier

In [10]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state = 0)
rfc.fit(X_train, y_train)

In [11]:
rfc_pred = rfc.predict(X_test)

In [12]:
confusion_matrix(y_test, rfc_pred)

array([[40,  3],
       [ 1, 70]], dtype=int64)

In [14]:
accuracy_score(y_test, rfc_pred)

0.9649122807017544

### Hyperparamter Tuning in XGB

In [15]:
params = {'learning_rate':[0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
          'max_depth':[3,4,5,6],
          'min_child_weight':[1,3,5,7],
         'gamma':[0, 0.1, 0.2, 0.3, 0.4]}

In [16]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

In [17]:
rs_xgb = XGBClassifier()
random_search = RandomizedSearchCV(rs_xgb, param_distributions = params, n_iter = 5, cv = 2)

In [18]:
random_search.fit(X_train, y_train)

In [19]:
random_search.best_estimator_

In [20]:
random_search.best_params_

{'min_child_weight': 1, 'max_depth': 5, 'learning_rate': 0.2, 'gamma': 0.3}

In [21]:
xgb_rs = XGBClassifier(min_child_weight = 1, max_depth = 5, learning_rate = 0.2, gamma = 0.3)

In [22]:
xgb_rs.fit(X_train, y_train)

In [23]:
y_training_pred = xgb_rs.predict(X_train)
y_test_pred = xgb_rs.predict(X_test)

In [24]:
confusion_matrix(y_test, y_test_pred)

array([[40,  3],
       [ 1, 70]], dtype=int64)

In [25]:
accuracy_score(y_test, y_test_pred)

0.9649122807017544

In [27]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(xgb_rs, X_train, y_train, cv = 5)
score

array([0.96703297, 0.96703297, 0.98901099, 0.96703297, 0.92307692])

In [28]:
(0.96703297 + 0.96703297 + 0.98901099 + 0.96703297 + 0.92307692) / 5

0.9626373640000001