# This notebook shows how to build a simple MLP using TensorFlow+Keras (Breast cancer dataset)

In [29]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [2]:
np.random.seed(42)

## Load Breast cancer dataset and prepare for classification

In [25]:
breast = datasets.load_breast_cancer()
X = breast.data
Y = breast.target
class_names = breast.target_names
X.shape, Y.shape, class_names

((569, 30), (569,), array(['malignant', 'benign'], dtype='<U9'))

In [26]:
scaler = StandardScaler()
X_rescaled = scaler.fit_transform(X)

In [27]:
X_train, X_test, Y_train, Y_test = train_test_split(X_rescaled, Y, test_size=0.2, random_state=0)
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((455, 30), (455,), (114, 30), (114,))

In [30]:
# for each output create a vector where the position of the class label for the observation is 1, other positions are 0's
Y_train = to_categorical(Y_train, len(class_names))
Y_test = to_categorical(Y_test, len(class_names))

## Build a model

Check available activation functions for your ANN setup: https://keras.io/api/layers/activations/ 

Check available loss functions for your ANN setup: https://keras.io/api/losses/ 

Check available weight initializers: https://keras.io/api/layers/initializers/

Check available optimizers: https://keras.io/api/optimizers/

In [42]:
# Figure out data dimensions
num_features = X_train.shape[1]
num_classes = len(class_names)

# Create the model
model = Sequential()
model.add(Dense(50, input_shape=(num_features,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(num_classes, activation='sigmoid'))

# Configure the model and start training
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## Train the model with training data

In [43]:
model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=1, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff2130b8550>

## Evaluate the model with test data

In [44]:
# Test the model after training
test_restul = model.evaluate(X_test, Y_test, verbose=1)
print(f'Test results - Loss: {test_restul[0]} - Accuracy: {test_restul[1]}%')

Test results - Loss: 0.4665887951850891 - Accuracy: 0.7982456088066101%


## Hmmmm, can we play with other MLP architectures to improve the performance?

In [53]:
# Figure out data dimensions
num_features = X_train.shape[1]
num_classes = len(class_names)

# Create the model
model = Sequential()
model.add(Dense(30, input_shape=(num_features,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(num_classes, activation='sigmoid'))

# Configure the model and start training
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=0, validation_split=0.2)

# Test the model after training
train_restul = model.evaluate(X_train, Y_train, verbose=0)
print(f'Train results - Loss: {train_restul[0]} - Accuracy: {train_restul[1]}%')

# Test the model after training
test_restul = model.evaluate(X_test, Y_test, verbose=0)
print(f'Test results - Loss: {test_restul[0]} - Accuracy: {test_restul[1]}%')

Train results - Loss: 0.5134559273719788 - Accuracy: 0.9210526347160339%
Test results - Loss: 0.3195848762989044 - Accuracy: 0.9298245906829834%
