In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [None]:
# Breast cancer dataset for classification
cancer = load_breast_cancer()
(X_cancer, y_cancer) = load_breast_cancer(return_X_y = True)

In [None]:
# import logistic regression
#Split train and test data

from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state = 0)

In [None]:
# Create an estimator object
#from sklearn import preprocessing
#preprocessing.StandardScaler().fit(X_train)
#preprocessing.StandardScaler().fit(X_test)

LR = LogisticRegression(max_iter=2000).fit(X_train, y_train)
print('Breast cancer dataset')
print('Accuracy of Logistic regression classifier on training set :{:.2f}'
     .format(LR.score(X_train, y_train)))

In [None]:
LogisticRegression()

In [None]:
print('Accuracy of Logistic regression classifier on test set: {:.2f}'
     .format(LR.score(X_test, y_test)))

In [None]:
# predict LR
from sklearn.metrics import confusion_matrix

LR_predict = LR.predict(X_test)
CM = confusion_matrix(y_test, LR_predict)

print('Logistic Regression classifier\n', CM)

In [None]:
# Evaluation metrics for binary classification

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Accuracy = TP + TN /(TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall or Sensitivity or True Positive Rate = TP / (TP + FN)
# F1 = 2* Precision * Recall / (Precision + Recall)

print( 'Accuracy : {:.2f}'.format(accuracy_score(y_test,LR_predict)))
print('Precision : {:.2f}'.format(precision_score(y_test, LR_predict)))
print('Recall: {:.2f}'.format(recall_score(y_test, LR_predict)))
print('F1: {:.2f}'.format(f1_score(y_test, LR_predict)))



In [None]:
y_0= y_cancer.copy()
y_0[y_0 != 1] = 0
print('Original labels:\t', y_cancer[1:10])
print('New binary labels:\t', y_0[1:10])

In [None]:
from sklearn.metrics import roc_curve, auc

X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_0, random_state=0)

y_score_lr = LR.fit(X_train, y_train).decision_function(X_test)
FPR_LR,TPR_LR, _ = roc_curve(y_test, y_score_lr)
roc_auc_lr = auc(FPR_LR, TPR_LR)
roc_auc_lr

In [None]:
plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
plt.plot(FPR_LR, TPR_LR, lw=3, label='LogRegr ROC curve (area = {:0.2f})'.format(roc_auc_lr))
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='navy', lw=3, linestyle='--')
plt.axes().set_aspect('equal')
plt.show()