In [None]:
import numpy as np

from scipy.stats import uniform, randint

from sklearn.datasets import load_breast_cancer, load_diabetes, load_wine
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split

import xgboost as xgb

https://www.kaggle.com/code/nauman101/using-xgboost-with-scikit-learn/edit

In [None]:
def display_scores(scores):
    print("Scores: {0}\nMean: {1:.3f}\nStd: {2:.3f}".format(scores, np.mean(scores), np.std(scores)))

In [None]:
def report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/mhmmd-nauman/DataSets/master/black-box-models/CP-Nets/dsbc/original%20ds/breast-cancer%20-%20complete.csv')

In [None]:
data.head()
df = data

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder_X_age = LabelEncoder()
df['Age'] = labelencoder_X_age.fit_transform(df['Age'])
labelencoder_X_menopause = LabelEncoder()
df['menopause'] = labelencoder_X_menopause.fit_transform(df['menopause'])
labelencoder_X_tumor_size = LabelEncoder()
df['tumor-size'] = labelencoder_X_tumor_size.fit_transform(df['tumor-size'])
labelencoder_X_inv_nodes = LabelEncoder()
df['inv-nodes'] = labelencoder_X_inv_nodes.fit_transform(df['inv-nodes'])
labelencoder_X_node_caps = LabelEncoder()
df['node-caps'] = labelencoder_X_node_caps.fit_transform(df['node-caps'])
labelencoder_X_deg_malig = LabelEncoder()
df['deg-malig'] = labelencoder_X_deg_malig.fit_transform(df['deg-malig'])
labelencoder_X_breast = LabelEncoder()
df['breast'] = labelencoder_X_breast.fit_transform(df['breast'])
labelencoder_X_breast_quad = LabelEncoder()
df['breast-quad'] = labelencoder_X_breast_quad.fit_transform(df['breast-quad'])
labelencoder_X_irradiat = LabelEncoder()
df['irradiat'] = labelencoder_X_irradiat.fit_transform(df['irradiat'])
labelencoder_X_class = LabelEncoder()
df['Class'] = labelencoder_X_class.fit_transform(df['Class'])

In [None]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [None]:
df = clean_dataset(df)

In [None]:
X = data.iloc[:, 0:9].values
y = data['Class']

In [None]:
X

In [None]:
y

In [None]:
xgb_model = xgb.XGBClassifier(objective="binary:logistic", n_estimators=50, random_state=0, eval_metric=["auc", "error", "error@0.6"])

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3 ,random_state=0)

xgb_model.fit(X_train, y_train, early_stopping_rounds=5, eval_set=[(X_test, y_test)])

y_pred = xgb_model.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
Sp = tn / (tn+fp)
Sn = tp/(tp + fn)

In [None]:
print(cm)
print(Sp)
print(Sn)

In [None]:
import graphviz

#cancer = load_breast_cancer()

#X = cancer.data
#y = cancer.target

xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=0, eval_metric="auc")

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state=0)

xgb_model.fit(X_train, y_train, early_stopping_rounds=10, eval_set=[(X_test, y_test)], verbose=False)

xgb.plot_importance(xgb_model)

# plot the output tree via matplotlib, specifying the ordinal number of the target tree
# xgb.plot_tree(xgb_model, num_trees=xgb_model.best_iteration)

# converts the target tree to a graphviz instance
xgb.to_graphviz(xgb_model, num_trees=xgb_model.best_iteration)