In [1]:
import gradient_descent
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
dataset = load_breast_cancer()
print(dataset['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [3]:
X = dataset['data']
Y = dataset['target']

sc = StandardScaler()
X_Scaled = sc.fit_transform(X)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X_Scaled, Y)

In [5]:
algo = gradient_descent.LogisticRegression()
algo.fit(x_train, y_train)

In [6]:
algo.score(x_train, y_train)

0.9929577464788732

In [7]:
algo.score(x_test, y_test)

0.951048951048951

In [8]:
y_train_pred = algo.predict(x_train)
y_test_pred = algo.predict(x_test)

In [9]:
print(y_train_pred)

[0 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 0
 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 0 0 1 0 0 0 1 1 1 1
 1 1 0 0 1 0 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1
 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 0 0 0 0
 1 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 0 1
 1 0 1 1 0 1 1 0 1 0 1 0 0 0 0 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 0 0
 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 1 1 1 1
 0 0 0 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 0 1 1 0 0 0 1 1
 1 1 1 0 0 1 1 1 1 1 0 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1
 0 1 0 0 1 1 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 1 0 1 1 0 0 1 1
 1 1 1 0 0 1 1 1 1 1 0 1 0 0 1 0 0 1 0]


In [10]:
y_test_pred

array([1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0])

In [11]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_train_pred))

[[160   3]
 [  0 263]]


In [12]:
print(confusion_matrix(y_test, y_test_pred))

[[43  6]
 [ 1 93]]


In [13]:
print(classification_report(y_train, y_train_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       163
           1       0.99      1.00      0.99       263

    accuracy                           0.99       426
   macro avg       0.99      0.99      0.99       426
weighted avg       0.99      0.99      0.99       426



In [14]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.98      0.88      0.92        49
           1       0.94      0.99      0.96        94

    accuracy                           0.95       143
   macro avg       0.96      0.93      0.94       143
weighted avg       0.95      0.95      0.95       143



In [15]:
y_train_pred = algo.predict(x_train)

In [16]:
print(algo.predict_proba(x_train)[y_train - y_train_pred != 0])

[[0.08649382 0.91350618]
 [0.00910124 0.99089876]
 [0.14828883 0.85171117]]


In [17]:
y_test_pred = algo.predict(x_test)

In [18]:
print(algo.predict_proba(x_test)[y_test - y_test_pred != 0])

[[0.25744079 0.74255921]
 [0.02838552 0.97161448]
 [0.04395642 0.95604358]
 [0.59095    0.40905   ]
 [0.04044422 0.95955578]
 [0.32458382 0.67541618]
 [0.17772472 0.82227528]]
