# Breast Cancer identification with Gradient Boosting classifier

In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier

## Loading the Data


In [2]:
cancer_data = load_breast_cancer()

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer_data.data, cancer_data.target,stratify = cancer_data.target,
    shuffle = True,random_state=144)


## Creating and training the model

In [4]:
gb = GradientBoostingClassifier()
gb

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

## Analyze model performance and metrics

In [5]:
gb = gb.fit(X_train, y_train)

In [6]:
print("Training set score: {:.3f}".format(gb.score(X_train, y_train)))
print("Test set score: {:.3f}".format(gb.score(X_test, y_test)))


Training set score: 1.000
Test set score: 0.937


In [7]:
pred_gb = gb.predict(X_test)

In [8]:
confusion = confusion_matrix(y_test, pred_gb)
print("Confusion matrix:\n{}".format(confusion))

Confusion matrix:
[[49  4]
 [ 5 85]]


In [9]:
from sklearn.metrics import f1_score
f1_score(y_test, pred_gb)

0.9497206703910613

In [10]:
print(classification_report(y_test, pred_gb,
target_names=["malignant", "benign"]))

              precision    recall  f1-score   support

   malignant       0.91      0.92      0.92        53
      benign       0.96      0.94      0.95        90

   micro avg       0.94      0.94      0.94       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.94      0.94      0.94       143

