In [13]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd
raw = load_breast_cancer()
x = raw.data
y = raw.target
# Show feature names
print('Feature Names:',list(raw.feature_names))

Feature Names: ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean concave points', 'mean symmetry', 'mean fractal dimension', 'radius error', 'texture error', 'perimeter error', 'area error', 'smoothness error', 'compactness error', 'concavity error', 'concave points error', 'symmetry error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst perimeter', 'worst area', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry', 'worst fractal dimension']


In [14]:
# Show dataset description
raw.DESCR




In [15]:
# Show target names
print('Target Names:',list(raw.target_names))

Target Names: ['malignant', 'benign']


In [16]:
# Show dimension of X
print('Dimension of X:',x.shape)

Dimension of X: (569, 30)


In [17]:
# Show dimension of y
print('Dimension of Y:',y.shape)

Dimension of Y: (569,)


In [18]:
# Split X, y into X_train, X_test, y_train, y_test with 7:3 ratio
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=0)
x_train, x_test, y_train, y_test

(array([[1.149e+01, 1.459e+01, 7.399e+01, ..., 7.431e-02, 2.941e-01,
         9.180e-02],
        [1.049e+01, 1.861e+01, 6.686e+01, ..., 6.528e-02, 2.213e-01,
         7.842e-02],
        [1.225e+01, 1.794e+01, 7.827e+01, ..., 8.211e-02, 3.113e-01,
         8.132e-02],
        ...,
        [9.436e+00, 1.832e+01, 5.982e+01, ..., 5.052e-02, 2.454e-01,
         8.136e-02],
        [9.720e+00, 1.822e+01, 6.073e+01, ..., 0.000e+00, 1.909e-01,
         6.559e-02],
        [1.151e+01, 2.393e+01, 7.452e+01, ..., 9.653e-02, 2.112e-01,
         8.732e-02]]),
 array([[1.340e+01, 2.052e+01, 8.864e+01, ..., 2.051e-01, 3.585e-01,
         1.109e-01],
        [1.321e+01, 2.525e+01, 8.410e+01, ..., 6.005e-02, 2.444e-01,
         6.788e-02],
        [1.402e+01, 1.566e+01, 8.959e+01, ..., 8.216e-02, 2.136e-01,
         6.710e-02],
        ...,
        [1.371e+01, 1.868e+01, 8.873e+01, ..., 1.284e-01, 2.849e-01,
         9.031e-02],
        [1.330e+01, 2.157e+01, 8.524e+01, ..., 5.614e-02, 2.637e-01,
   

In [19]:
# Build a logistic regression model of solver='liblinear' with X_train, y_train
from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression(solver='liblinear')
logReg.fit(x_train, y_train)

LogisticRegression(solver='liblinear')

In [20]:
# pridict y_pred from X_test
y_pred = logReg.predict(x_test)
y_pred

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1])

In [21]:
# Show confustion matrix
from sklearn.metrics import confusion_matrix
matrix=confusion_matrix(y_test, y_pred)
matrix, matrix.dtype

(array([[ 62,   1],
        [  5, 103]]),
 dtype('int64'))

In [22]:
# Show accuracy
score = logReg.score(x_test, y_test)
print('Accuracy:',score)

Accuracy: 0.9649122807017544


In [23]:
# Show precision
from sklearn.metrics import precision_score
precision=precision_score(y_test, y_pred)
print('Precision:', precision)

Precision: 0.9903846153846154


In [24]:
# Show recall
from sklearn.metrics import recall_score
recall=recall_score(y_test, y_pred)
print('Recall:',recall)

Recall: 0.9537037037037037
