In [12]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, precision_score, f1_score, accuracy_score, recall_score, roc_auc_score

In [2]:
cancer = datasets.load_breast_cancer()

In [3]:
print(cancer.keys())
print(cancer.DESCR)
print(cancer.data.shape)
print(cancer.feature_names)

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for ea

In [4]:
x = cancer.data
print(x.shape)

(569, 30)


In [5]:
y = cancer.target
print(y.shape)

(569,)


In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(455, 30)
(455,)
(114, 30)
(114,)


In [9]:
alg = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
alg.fit(x_train, y_train)
y_pred = alg.predict(x_test)
print(y_pred.shape)

(114,)


In [10]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[33  4]
 [ 3 74]]


In [11]:
precision = precision_score(y_test, y_pred)
print(precision * 100, '% \t Precision')
accuracy = accuracy_score(y_test, y_pred)
print(accuracy * 100, '% \t Accuracy')
recall = recall_score(y_test, y_pred)
print(recall * 100, '% \t Recall')
f1 = f1_score(y_test, y_pred)
print(f1 * 100, '% \t F1')
roc_auc = roc_auc_score(y_test, y_pred)
print(roc_auc * 100, '% \t ROC-AUC')

94.87179487179486 % 	 Precision
93.85964912280701 % 	 Accuracy
96.1038961038961 % 	 Recall
95.48387096774195 % 	 F1
92.64654264654266 % 	 ROC-AUC
