In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [3]:
cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [4]:
print(cancer['DESCR'])

Breast Cancer Wisconsin (Diagnostic) Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, field
        13 is Radius SE, field 23 is Worst Radius.

        

In [5]:
cancer['data'].shape

(569, 30)

In [6]:
X = cancer['data']
y = cancer['target']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [9]:
from sklearn.preprocessing import StandardScaler
# Transforms data such that distribution has mean of 0 and standard dev. of 1, makes it easier for the algorithm to run

In [10]:
scaler = StandardScaler()

In [11]:
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [13]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
#Training the model

In [15]:
from sklearn.neural_network import MLPClassifier
#This imports Multi-layer perceptron from scikit-learn's library

In [16]:
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))
#creates instance of the model and specifies hidden layer sizes, default parmeters are listed below

In [17]:
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [18]:
predictions = mlp.predict(X_test)

In [19]:
from sklearn.metrics import confusion_matrix,classification_report

In [21]:
print(confusion_matrix(y_test, predictions))
#presents data in a [{true negative, false positive}, {false negative, true positive}] format

[[45  1]
 [37 60]]


In [22]:
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.55      0.98      0.70        46
          1       0.98      0.62      0.76        97

avg / total       0.84      0.73      0.74       143



In [23]:
#precision: tp / (tp + fp), ability not to label a negative sample positive
#recall: tp / (tp + fn), ability to find all positive samples
#f1 score - harmonic mean of precision and recall
#support: number of samples of the true response that lie in that class