# Breast Cancer identification with Logistic Regression classifier

In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

## Loading the Data


In [2]:
cancer_data = load_breast_cancer()

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer_data.data, cancer_data.target,stratify = cancer_data.target,
    shuffle = True,random_state=144)


## Creating and training the model

In [4]:
rf = RandomForestClassifier(oob_score=True)
rf

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=True, random_state=None, verbose=0, warm_start=False)

In [5]:
rf = rf.fit(X_train, y_train)

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


In [6]:
print("Training set score: {:.3f}".format(rf.score(X_train, y_train)))
print("Test set score: {:.3f}".format(rf.score(X_test, y_test)))
print("OOB score: {:.3f}".format(rf.oob_score_) )


Training set score: 1.000
Test set score: 0.930
OOB score: 0.937


In [7]:
pred_rf = rf.predict(X_test)

In [8]:
confusion = confusion_matrix(y_test, pred_rf)
print("Confusion matrix:\n{}".format(confusion))

Confusion matrix:
[[50  3]
 [ 7 83]]


In [9]:
f1_score(y_test, pred_rf)

0.9431818181818181

In [10]:
print(classification_report(y_test, pred_rf,
target_names=["malignant", "benign"]))

              precision    recall  f1-score   support

   malignant       0.88      0.94      0.91        53
      benign       0.97      0.92      0.94        90

   micro avg       0.93      0.93      0.93       143
   macro avg       0.92      0.93      0.93       143
weighted avg       0.93      0.93      0.93       143

