# Breast Cancer Classification (K FOLD CROSS VALIDATION)

Dataset: https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic

In [1]:
import pandas as pd 
import tensorflow as tf
import sklearn 
import scikeras 

In [2]:
pd.__version__, tf.__version__, sklearn.__version__, scikeras.__version__

('2.2.2', '2.17.0', '1.5.1', '0.13.0')

In [3]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as k

In [4]:
X = pd.read_csv('../data/breast+cancer+wisconsin+diagnostic/x_bcwd.csv')
y = pd.read_csv('../data/breast+cancer+wisconsin+diagnostic/y_bcwd.csv')

In [9]:
def create_net():
    k.clear_session()
    neural_network = Sequential([
        tf.keras.layers.InputLayer(shape = (30,)),
        tf.keras.layers.Dense(units=16, activation = 'relu', kernel_initializer='random_uniform'),
        tf.keras.layers.Dropout(rate=0.2), # to avoid overfitting
        tf.keras.layers.Dense(units=16, activation = 'relu', kernel_initializer='random_uniform'),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units=1, activation='sigmoid')
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=0.5)
    neural_network.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['binary_accuracy'])

    return neural_network

neural_network = KerasClassifier(model=create_net, epochs=100, batch_size=10)

results = cross_val_score(estimator=neural_network, X=X, y=y, cv=10, scoring='accuracy')

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.5693 - loss: 2.9676  
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 903us/step - binary_accuracy: 0.6198 - loss: 0.6691
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 854us/step - binary_accuracy: 0.6169 - loss: 0.6152
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 825us/step - binary_accuracy: 0.6526 - loss: 0.5660
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 815us/step - binary_accuracy: 0.6652 - loss: 0.6040
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 826us/step - binary_accuracy: 0.6489 - loss: 0.5859
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 815us/step - binary_accuracy: 0.6387 - loss: 0.6449
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step - binary_accuracy

In [6]:
# without dropout
results

array([0.84210526, 0.8245614 , 0.8245614 , 0.85964912, 0.77192982,
       0.71929825, 0.80701754, 0.85964912, 0.61403509, 0.83928571])

In [7]:
# without dropout
results.mean()

0.7962092731829574

In [8]:
#standard deviation
results.std()

0.07309445098909494

In [10]:
# with dropout
results.mean(), results.std()

(0.8594611528822055, 0.035774747660146704)