## Improving results in breast cancer database
The goal is to improve the accuracy results using cross-validation and other configurations in the breast cancer database
 
Results: By simply normalizing the data, I achieved a mean accuracy of 0.94. You can compare the current results to those in /notebooks/02_binary_classification_cross_validation

In [56]:
# Importing the dependencies 

import pandas as pd 
import tensorflow as tf
import sklearn 
import scikeras 

In [57]:
pd.__version__, tf.__version__, sklearn.__version__, scikeras.__version__

('2.2.2', '2.17.0', '1.5.1', '0.13.0')

In [58]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as k


In [59]:
X = pd.read_csv('../data/breast+cancer+wisconsin+diagnostic/x_bcwd.csv')
y = pd.read_csv('../data/breast+cancer+wisconsin+diagnostic/y_bcwd.csv')

In [64]:
# Initializing the MinMaxScaler in a range of 0 to 1
scaler = MinMaxScaler(feature_range=(0, 1))

# Apply the normalization
X = scaler.fit_transform(X)

X

array([[1.03372594e-03, 2.26580994e-02, 5.45988529e-01, ...,
        9.12027491e-04, 5.58248834e-04, 3.69250425e-04],
       [1.29443102e-03, 2.72573554e-01, 6.15783291e-01, ...,
        6.39175258e-01, 5.05372410e-01, 1.96478695e-04],
       [1.20550835e-03, 3.90260399e-01, 5.95743211e-01, ...,
        8.35051546e-01, 3.76578924e-04, 1.88152346e-04],
       ...,
       [8.93268555e-04, 6.21237741e-01, 4.45788128e-01, ...,
        4.87285223e-04, 1.20071307e-04, 1.33915438e-04],
       [1.29746247e-03, 6.63510315e-01, 6.65537972e-01, ...,
        9.10652921e-01, 4.63736351e-04, 7.16672865e-01],
       [0.00000000e+00, 5.01521813e-01, 2.85398383e-02, ...,
        0.00000000e+00, 2.40142615e-04, 8.87565616e-05]])

In [61]:
def create_net():
    k.clear_session()
    neural_network = Sequential([
        tf.keras.layers.InputLayer(shape = (30,)),
        tf.keras.layers.Dense(units=16, activation = 'relu', kernel_initializer='random_uniform'),
        tf.keras.layers.Dropout(rate=0.2), # to avoid overfitting
        tf.keras.layers.Dense(units=16, activation = 'relu', kernel_initializer='random_uniform'),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(units=1, activation='sigmoid')
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=0.5)
    neural_network.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['binary_accuracy'])

    return neural_network

neural_network = KerasClassifier(model=create_net, epochs=100, batch_size=10)

results = cross_val_score(estimator=neural_network, X=X, y=y, cv=10, scoring='accuracy')

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.6342 - loss: 0.6891   
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 923us/step - binary_accuracy: 0.6361 - loss: 0.6642
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - binary_accuracy: 0.7087 - loss: 0.6072
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step - binary_accuracy: 0.8028 - loss: 0.4972
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 766us/step - binary_accuracy: 0.9009 - loss: 0.4231
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 776us/step - binary_accuracy: 0.9149 - loss: 0.3348
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 786us/step - binary_accuracy: 0.8924 - loss: 0.2961
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 915us/step - binary_accurac

In [62]:
results.mean(), results.std()

(0.9472431077694236, 0.020867201633650998)