In [1]:
import gradient_descent
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import math

In [2]:
dataset = load_breast_cancer()
print(dataset['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [3]:
X = dataset['data']
Y = dataset['target']

sc = StandardScaler()
X_Scaled = sc.fit_transform(X)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X_Scaled, Y)

In [5]:
algo = gradient_descent.LogisticRegression()
algo.fit(x_train, y_train)

In [6]:
print(1/(1+math.exp(-516.0386940074326)))

1.0


In [7]:
math.log(1e-10)

-23.025850929940457

In [8]:
algo.score(x_train, y_train)

0.9812206572769953

In [9]:
algo.score(x_test, y_test)

0.993006993006993

In [10]:
y_train_pred = algo.predict(x_train)
y_test_pred = algo.predict(x_test)

In [11]:
print(y_train_pred)

[0 1 0 0 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
 0 1 1 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 1 0 1 0 1 0 1 1 1 0 1 1 1 0
 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1
 1 1 1 1 0 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 1 1 0 0 0 0 1 0
 1 0 1 1 0 1 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0
 1 1 0 0 0 1 0 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0
 1 0 0 0 0 1 1 0 0 0 1 0 0 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1
 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 0 1
 1 0 0 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0 1 1 0 0 1 0 0 0 1 1 1 1 1 0 1 0 0 0
 1 1 1 1 0 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 1 0 1 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0 1 1
 0 1 1 1 1 0 0 0 1 0 1 0 0 0 1 0 1 1 1]


In [12]:
y_test_pred

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0])

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_train_pred))

[[155   6]
 [  2 263]]


In [14]:
print(confusion_matrix(y_test, y_test_pred))

[[50  1]
 [ 0 92]]


In [15]:
print(classification_report(y_train, y_train_pred))

              precision    recall  f1-score   support

           0       0.99      0.96      0.97       161
           1       0.98      0.99      0.99       265

    accuracy                           0.98       426
   macro avg       0.98      0.98      0.98       426
weighted avg       0.98      0.98      0.98       426



In [16]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        51
           1       0.99      1.00      0.99        92

    accuracy                           0.99       143
   macro avg       0.99      0.99      0.99       143
weighted avg       0.99      0.99      0.99       143

