In [1]:
from sklearn.datasets import load_breast_cancer

In [2]:
cancer = load_breast_cancer()

In [3]:
cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [7]:
print(len(cancer['data']), len(cancer['data'][0]))

569 30


In [8]:
print(cancer['feature_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [9]:
print(len(cancer['feature_names']))

30


So we conclude here that there are 569 examples, each having 30 features.

In [10]:
print(cancer['target'])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0
 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1
 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 0 1 0 1 1 0 

In [11]:
print(len(cancer['target']))

569


In [13]:
print(cancer['target_names'])

['malignant' 'benign']


In [14]:
print(cancer['DESCR'])

Breast Cancer Wisconsin (Diagnostic) Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, field
        13 is Radius SE, field 23 is Worst Radius.

        

One can also use the shape property to retrieve the size of a 2D matrix which serves well here to find the size of the matrix of the feature vectors.

In [15]:
cancer['data'].shape

(569, 30)

Let's now split our data of 569 examples into training set and test set. We'll use our training set to train our machine learning model and test set to test the accuracy of our model.

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
X = cancer['data']
y = cancer['target']

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [19]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(426, 30) (143, 30) (426,) (143,)


We observe that the ratio in which the dataset is divided is 3:1.

Before applying any machine learning model, we need to do something called 'Data Preprocessing'.

In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [22]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

The preprocessing is done with the training set and is applied to both training and test set.

Now is the time to train our model. Let's select a neural network model to train.

In [23]:
from sklearn.neural_network import MLPClassifier

MLP stands for Multi-Layer Perceptron which indeed means a neural network with more than one layers.

Let's now create an instance of the MLPC classifier. Remember that we can pass many arguments to the cnstructor but one of the most important is the hidden_layer_sizes. A tuple is passed to hidden_layer_sizes which specifies the number of nodes at each hidden layer.

In [24]:
mlp = MLPClassifier(hidden_layer_sizes = (30, 30, 30))

In [26]:
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

We observe that ReLU is used as an activation function. Now that we have trained our model, we can predict the output for the samples in our test set.

In [30]:
pred1 = mlp.predict(X_train)
pred2 = mlp.predict(X_test)

Now let's look for the accuracy score, classification report and confusion matrix.

In [28]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

Let's first check the accuracy on the training set and test set.

In [32]:
print(accuracy_score(y_train, pred1)*100, accuracy_score(y_test, pred2)*100)

100.0 95.8041958042


Now let's check the confusion matrix of the test set.<br/>
But first let's see what the confusion matrix is.
![alt text](confusion_matrix.png "Confusion Matrix")

In [33]:
print(confusion_matrix(y_test, pred2))

[[45  3]
 [ 3 92]]


We can also look at the classification report to find the values of precision, recall and F1-score.<br/>
But first let's find out what actually precision, recall and F1-score are.
![alt text](classification_report.jpg "Classification Report")


In [34]:
print(classification_report(y_test, pred2))

             precision    recall  f1-score   support

          0       0.94      0.94      0.94        48
          1       0.97      0.97      0.97        95

avg / total       0.96      0.96      0.96       143



As you may guess, support tells the number of test examples that actually lie in a particular class. For example in the above report, 48 training examples are actually labelled 0 and 95 are actually labelled 1.

One can also extract the MLP <font color = "red"><b>weights and biases</b></font> in each layer after training the model.

In [35]:
print(mlp.coefs_)

[array([[ -6.58254569e-03,   9.03369890e-02,   2.51267833e-01,
          2.51367635e-01,   1.29824374e-01,   6.01760762e-02,
          2.52800240e-01,   1.75317676e-01,   1.99603296e-01,
          8.43400583e-02,   3.93563896e-01,  -6.57005889e-02,
          2.31228850e-01,  -6.14590128e-02,   3.09276653e-01,
         -1.59529666e-01,   2.28924744e-01,   9.17302796e-03,
         -9.52902531e-02,   1.58182750e-01,  -2.18350716e-01,
          6.23746705e-02,   1.23913472e-01,   3.11325302e-01,
         -1.75043088e-01,  -2.66849181e-01,   1.24360935e-01,
         -5.49822469e-02,   2.46902524e-01,  -3.19116274e-01],
       [  9.56685903e-02,  -3.47361302e-01,   1.45927042e-02,
         -1.54412407e-01,   1.26948821e-01,  -2.64834003e-01,
          2.73378421e-03,  -1.45353570e-01,   1.37409465e-01,
         -1.68246156e-01,   1.97219144e-01,   3.10334746e-01,
         -1.43410198e-01,   3.78379912e-02,  -1.46946384e-01,
         -9.47139814e-02,   2.54701834e-02,  -2.76062622e-01,
      

The last matrix contains all the weights in each layer in a separate list. From this matrix we can also predict the number of layers in our model and the number of hidden units in each hidden layer.

In [42]:
print(len(mlp.coefs_), " ( ", len(mlp.coefs_[0]), len(mlp.coefs_[1]), len(mlp.coefs_[2]), len(mlp.coefs_[3]), " )")

4  (  30 30 30 30  )


Hence we conclude that we have got 4 layers in our model, three of which are, obviously, hidden layers. And we have got 30 weights  

In [39]:
print(mlp.intercepts_)

[array([ 0.02047145,  0.26293088,  0.19671821,  0.03413485,  0.24612118,
       -0.13506287,  0.20009518, -0.16376145, -0.00878485,  0.04690711,
        0.04195848, -0.16542978, -0.16526493,  0.19476513,  0.08820789,
       -0.21859601,  0.17308464,  0.14144526,  0.3285034 ,  0.05620485,
       -0.20947093, -0.26934872,  0.22528931,  0.34552412,  0.16081693,
        0.26569331,  0.34766716,  0.1472701 ,  0.08203576,  0.08168188]), array([ 0.26884086,  0.27677775,  0.18922119, -0.12859842,  0.22039755,
       -0.11603987, -0.0214034 ,  0.05424476,  0.32902146,  0.10021394,
       -0.12347479,  0.07949354,  0.02835494,  0.30174971,  0.12522953,
        0.35837346, -0.22017406,  0.20552109, -0.15472781,  0.14661678,
        0.28625203,  0.09481724,  0.17597229,  0.2262648 ,  0.0408351 ,
       -0.08258176,  0.04511661,  0.34722962,  0.080089  ,  0.15687496]), array([-0.15189179,  0.11865389,  0.17615393,  0.38249776, -0.13315283,
        0.35434826, -0.11385072, -0.16847963, -0.04956787, 

This is a matrix containing all the biases in each layer in a separate list.

In [41]:
print(len(mlp.intercepts_), " ( ", len(mlp.intercepts_[0]), len(mlp.intercepts_[1]), len(mlp.intercepts_[2]), len(mlp.intercepts_[3]), " ) ")

4  (  30 30 30 1  ) 
