In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [5]:
# read in csv & create df
df = pd.read_csv("CLEANED_breast_cancer_prediction.csv")
df.drop(["Unnamed: 0"], axis=1)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [6]:
X = df.drop("diagnosis", axis = 1)
y = df["diagnosis"]
print(X.shape, y.shape)

(569, 32) (569,)


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [9]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [11]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

## **Create Deep Learning Model**

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [17]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=32))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [18]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [19]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               3300      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 13,602
Trainable params: 13,602
Non-trainable params: 0
_________________________________________________________________


In [20]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Train on 426 samples
Epoch 1/60
426/426 - 1s - loss: 0.5926 - accuracy: 0.8451
Epoch 2/60
426/426 - 0s - loss: 0.4166 - accuracy: 0.9225
Epoch 3/60
426/426 - 0s - loss: 0.2762 - accuracy: 0.9413
Epoch 4/60
426/426 - 0s - loss: 0.2039 - accuracy: 0.9343
Epoch 5/60
426/426 - 0s - loss: 0.1713 - accuracy: 0.9460
Epoch 6/60
426/426 - 0s - loss: 0.1475 - accuracy: 0.9437
Epoch 7/60
426/426 - 0s - loss: 0.1357 - accuracy: 0.9507
Epoch 8/60
426/426 - 0s - loss: 0.1272 - accuracy: 0.9507
Epoch 9/60
426/426 - 0s - loss: 0.1138 - accuracy: 0.9601
Epoch 10/60
426/426 - 0s - loss: 0.1090 - accuracy: 0.9577
Epoch 11/60
426/426 - 0s - loss: 0.1044 - accuracy: 0.9577
Epoch 12/60
426/426 - 0s - loss: 0.0996 - accuracy: 0.9648
Epoch 13/60
426/426 - 0s - loss: 0.1078 - accuracy: 0.9507
Epoch 14/60
426/426 - 0s - loss: 0.0863 - accuracy: 0.9648
Epoch 15/60
426/426 - 0s - loss: 0.0810 - accuracy: 0.9742
Epoch 16/60
426/426 - 0s - loss: 0.0776 - accuracy: 0.9671
Epoch 17/60
426/426 - 0s - loss: 0.0731 - ac

<tensorflow.python.keras.callbacks.History at 0x7fa7bd11d438>

## **Quantify the Trained Model**

In [21]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

143/1 - 0s - loss: 0.0948 - accuracy: 0.9790
Normal Neural Network - Loss: 0.07804083902310659, Accuracy: 0.9790209531784058


## **Make Predictions**

In [22]:
encoded_predictions = model.predict_classes(X_test_scaled[:20])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [28]:
print(f"Predicted Diagnosis: {prediction_labels}")
print(f"Actual Diagnosis: {list(y_test[:20])}")

Predicted Diagnosis: ['B' 'M' 'B' 'M' 'M' 'M' 'M' 'M' 'B' 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'B'
 'B' 'M']
Actual Diagnosis: ['B', 'M', 'B', 'M', 'M', 'M', 'M', 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M']


In [30]:
pd.DataFrame(
    {
        "Predicted Diagnosis": prediction_labels[:20],
        "Actual Diagnosis": y_test.values[:20]
    })

Unnamed: 0,Predicted Diagnosis,Actual Diagnosis
0,B,B
1,M,M
2,B,B
3,M,M
4,M,M
5,M,M
6,M,M
7,M,M
8,B,B
9,B,B
