In [1]:
import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
matplotlib.rcParams['figure.figsize'] = [20, 10]
matplotlib.rcParams['font.size'] = 20

In [3]:
def metrics(y, predicted, scores):
    return {
        'accuracy': accuracy_score(y, predicted),
        'precision': precision_score(y, predicted),
        'recall': recall_score(y, predicted),
        'f1-score': f1_score(y, predicted), 
        'auc': roc_auc_score(y, scores)}

In [4]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
target_classes = list(reversed(data.target_names))

y = data.target == 0 # if zero then we detect malignant tumor.
X = data.data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=666)

In [7]:
from xgboost import XGBClassifier

clf = XGBClassifier()
clf.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [17]:
y_test_hat = clf.predict(X_test)
y_test_score = clf.predict_proba(X_test)[:,1]

In [18]:
metrics(y_test, y_test_hat, y_test_score)

{'accuracy': 0.9649122807017544,
 'precision': 0.9384615384615385,
 'recall': 0.9682539682539683,
 'f1-score': 0.953125,
 'auc': 0.9923574368018812}

In [19]:
clf.feature_importances_

array([0.        , 0.11971831, 0.        , 0.0399061 , 0.03051643,
       0.00234742, 0.0258216 , 0.07042254, 0.00234742, 0.00469484,
       0.00938967, 0.01877934, 0.        , 0.05633803, 0.01173709,
       0.05399061, 0.00938967, 0.00234742, 0.00938967, 0.01173709,
       0.07511737, 0.07511737, 0.03051643, 0.10328639, 0.05868544,
       0.02347418, 0.07981221, 0.05868544, 0.01173709, 0.00469484],
      dtype=float32)