In [1]:
# 5 Steps to building any Neural Network
# 0. Not a step... but you have to preprocess the data... no empty... all numerics... and scaled!
# 1. Define the model --- this will come from the Sequential or the Functional API (most likely the sequential & dense)
  # model = ...
# 2. Compile the model --- means you have first select a loss function and an optimizer
# compile the model
#  opt = SGD(learning_rate=0.01, momentum=0.9)   --- usually start with SGD OR adam
#  model.compile(optimizer=opt, loss='binary_crossentropy')
# For opitmizers:  https://www.tensorflow.org/api_docs/python/tf/keras/optimizers
# The three most common loss functions are:

# ‘binary_crossentropy‘ for binary classification
# ‘sparse_categorical_crossentropy‘ for multi-class classification
# ‘mse‘ (mean squared error) for regression
# For loss functions:  https://www.tensorflow.org/api_docs/python/tf/keras/losses
# you can also add metrics:
# model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
# metrics:  https://www.tensorflow.org/api_docs/python/tf/keras/metrics

# 3. Fit / Train the model
# fit the model
# model.fit(X, y, epochs=100, batch_size=32, verbose=0)

# 4. Evaluate the model
# evaluate the model
# loss = model.evaluate(X, y, verbose=0)  --- use the test data

# 5. Make predictions
# make a prediction
# yhat = model.predict(X)

In [2]:
import pandas as pd
path = 'https://raw.githubusercontent.com/fenago/datasets/main/iris.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Flower
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# Number of unique values of the target
unique_values = df['Flower'].nunique()
print(unique_values)

3


In [4]:

# mlp for multiclass classification
from numpy import argmax
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
# load the dataset
path = 'https://raw.githubusercontent.com/fenago/datasets/main/iris.csv'
df = read_csv(path)
# split into input and output columns
X, y = df.values[:, :-1], df.values[:, -1]
# ensure all data are floating point values
X = X.astype('float32')
# encode strings to integer
y = LabelEncoder().fit_transform(y)
# split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# determine the number of input features
n_features = X_train.shape[1]
# 1. define model
model = Sequential()
model.add(Dense(10, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(Dense(8, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(3, activation='softmax'))
# 2. compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 3. fit the model
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=1)
# 4. evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)
# 5. make a prediction
row = [5.1,3.5,1.4,0.2]
yhat = model.predict([row])
print('Predicted: %s (class=%d)' % (yhat, argmax(yhat)))

(100, 4) (50, 4) (100,) (50,)
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoc

# Find the Optimial Neural Network Architecture

In [5]:
# https://raw.githubusercontent.com/fenago/datasets/main/winequalityN.csv
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import itertools
import warnings
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

tf.random.set_seed(42)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings('ignore')


df = pd.read_csv('https://raw.githubusercontent.com/fenago/datasets/main/winequalityN.csv')
df.sample(5)

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
5292,red,12.7,0.6,0.65,2.3,0.063,6.0,25.0,0.9997,3.03,0.57,9.9,5
4606,white,6.9,0.29,0.32,5.8,0.04,16.0,112.0,0.993,3.04,0.58,11.2,5
347,white,6.7,0.19,0.41,15.6,0.056,75.0,155.0,0.9995,3.2,0.44,8.8,6
690,white,6.9,0.2,0.34,1.9,0.043,25.0,136.0,0.9935,3.31,0.6,10.1,4
2924,white,6.6,0.22,0.29,14.4,0.046,39.0,118.0,0.99834,3.05,0.5,9.1,6


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Prepare the data
df = df.dropna()
df['is_white_wine'] = [1 if typ == 'white' else 0 for typ in df['type']]
df['is_good_wine'] = [1 if quality >= 6 else 0 for quality in df['quality']]
df.drop(['type', 'quality'], axis=1, inplace=True)

# Train/test split
X = df.drop('is_good_wine', axis=1)
y = df['is_good_wine']
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
num_layers = 3
min_nodes_per_layer = 64
max_nodes_per_layer = 256
node_step_size = 64

In [8]:
node_options = list(range(
    min_nodes_per_layer,
    max_nodes_per_layer + 1,
    node_step_size
))
node_options

[64, 128, 192, 256]

In [9]:
two_layer_possibilities = [node_options, node_options]
two_layer_possibilities

[[64, 128, 192, 256], [64, 128, 192, 256]]

In [10]:
list(itertools.product(*two_layer_possibilities))

[(64, 64),
 (64, 128),
 (64, 192),
 (64, 256),
 (128, 64),
 (128, 128),
 (128, 192),
 (128, 256),
 (192, 64),
 (192, 128),
 (192, 192),
 (192, 256),
 (256, 64),
 (256, 128),
 (256, 192),
 (256, 256)]

In [11]:
layer_possibilities = [node_options] * num_layers
layer_node_permutations = list(itertools.product(*layer_possibilities))
layer_possibilities
layer_node_permutations

[(64, 64, 64),
 (64, 64, 128),
 (64, 64, 192),
 (64, 64, 256),
 (64, 128, 64),
 (64, 128, 128),
 (64, 128, 192),
 (64, 128, 256),
 (64, 192, 64),
 (64, 192, 128),
 (64, 192, 192),
 (64, 192, 256),
 (64, 256, 64),
 (64, 256, 128),
 (64, 256, 192),
 (64, 256, 256),
 (128, 64, 64),
 (128, 64, 128),
 (128, 64, 192),
 (128, 64, 256),
 (128, 128, 64),
 (128, 128, 128),
 (128, 128, 192),
 (128, 128, 256),
 (128, 192, 64),
 (128, 192, 128),
 (128, 192, 192),
 (128, 192, 256),
 (128, 256, 64),
 (128, 256, 128),
 (128, 256, 192),
 (128, 256, 256),
 (192, 64, 64),
 (192, 64, 128),
 (192, 64, 192),
 (192, 64, 256),
 (192, 128, 64),
 (192, 128, 128),
 (192, 128, 192),
 (192, 128, 256),
 (192, 192, 64),
 (192, 192, 128),
 (192, 192, 192),
 (192, 192, 256),
 (192, 256, 64),
 (192, 256, 128),
 (192, 256, 192),
 (192, 256, 256),
 (256, 64, 64),
 (256, 64, 128),
 (256, 64, 192),
 (256, 64, 256),
 (256, 128, 64),
 (256, 128, 128),
 (256, 128, 192),
 (256, 128, 256),
 (256, 192, 64),
 (256, 192, 128),
 (2

In [12]:
for permutation in layer_node_permutations[:2]:
    for nodes_at_layer in permutation:
        print(nodes_at_layer)
    print()

64
64
64

64
64
128



In [13]:
models = []

for permutation in layer_node_permutations:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(12,)))
    model_name = ''

    for nodes_at_layer in permutation:
        model.add(tf.keras.layers.Dense(nodes_at_layer, activation='relu'))
        model_name += f'dense{nodes_at_layer}_'

    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model._name = model_name[:-1]

    models.append(model)

In [14]:
models[1].summary()

Model: "dense64_dense64_dense128"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 64)                832       
                                                                 
 dense_8 (Dense)             (None, 64)                4160      
                                                                 
 dense_9 (Dense)             (None, 128)               8320      
                                                                 
 dense_10 (Dense)            (None, 1)                 129       
                                                                 
Total params: 13441 (52.50 KB)
Trainable params: 13441 (52.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
def get_models(num_layers: int,
               min_nodes_per_layer: int,
               max_nodes_per_layer: int,
               node_step_size: int,
               input_shape: tuple,
               hidden_layer_activation: str = 'relu',
               num_nodes_at_output: int = 1,
               output_layer_activation: str = 'sigmoid') -> list:

    node_options = list(range(min_nodes_per_layer, max_nodes_per_layer + 1, node_step_size))
    layer_possibilities = [node_options] * num_layers
    layer_node_permutations = list(itertools.product(*layer_possibilities))

    models = []
    for permutation in layer_node_permutations:
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        model_name = ''

        for nodes_at_layer in permutation:
            model.add(tf.keras.layers.Dense(nodes_at_layer, activation=hidden_layer_activation))
            model_name += f'dense{nodes_at_layer}_'

        model.add(tf.keras.layers.Dense(num_nodes_at_output, activation=output_layer_activation))
        model._name = model_name[:-1]
        models.append(model)

    return models

In [16]:
all_models = get_models(
    num_layers=3,
    min_nodes_per_layer=64,
    max_nodes_per_layer=256,
    node_step_size=64,
    input_shape=(12,)
)

In [17]:
def optimize(models: list,
             X_train: np.array,
             y_train: np.array,
             X_test: np.array,
             y_test: np.array,
             epochs: int = 50,
             verbose: int = 0) -> pd.DataFrame:

    # We'll store the results here
    results = []

    def train(model: tf.keras.Sequential) -> dict:
        # Change this however you want
        # We're not optimizing this part today
        model.compile(
            loss=tf.keras.losses.binary_crossentropy,
            optimizer=tf.keras.optimizers.Adam(),
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name='accuracy')
            ]
        )

        # Train the model
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            verbose=verbose
        )

        # Make predictions on the test set
        preds = model.predict(X_test)
        prediction_classes = [1 if prob > 0.5 else 0 for prob in np.ravel(preds)]

        # Return evaluation metrics on the test set
        return {
            'model_name': model.name,
            'test_accuracy': accuracy_score(y_test, prediction_classes),
            'test_precision': precision_score(y_test, prediction_classes),
            'test_recall': recall_score(y_test, prediction_classes),
            'test_f1': f1_score(y_test, prediction_classes)
        }

    # Train every model and save results
    for model in models:
        try:
            print(model.name, end=' ... ')
            res = train(model=model)
            results.append(res)
        except Exception as e:
            print(f'{model.name} --> {str(e)}')

    return pd.DataFrame(results)

In [18]:
optimization_results = optimize(
    models=all_models,
    X_train=X_train_scaled,
    y_train=y_train,
    X_test=X_test_scaled,
    y_test=y_test
)



In [19]:
optimization_results.sort_values(by='test_accuracy', ascending=False)

Unnamed: 0,model_name,test_accuracy,test_precision,test_recall,test_f1
56,dense256_dense192_dense64,0.807425,0.846059,0.847102,0.846580
31,dense128_dense256_dense256,0.806651,0.851064,0.838471,0.844720
47,dense192_dense256_dense256,0.804331,0.834532,0.858200,0.846201
48,dense256_dense64_dense64,0.803558,0.844252,0.842170,0.843210
24,dense128_dense192_dense64,0.802784,0.823256,0.872996,0.847397
...,...,...,...,...,...
49,dense256_dense64_dense128,0.777262,0.814681,0.834772,0.824604
16,dense128_dense64_dense64,0.777262,0.827284,0.815043,0.821118
3,dense64_dense64_dense256,0.775715,0.824408,0.816276,0.820322
0,dense64_dense64_dense64,0.774169,0.819975,0.819975,0.819975


# CallBacks

In [20]:
# https://raw.githubusercontent.com/fenago/datasets/main/winequalityN.csv
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import itertools
import warnings
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

tf.random.set_seed(42)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings('ignore')


df = pd.read_csv('https://raw.githubusercontent.com/fenago/datasets/main/winequalityN.csv')
df.sample(5)

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
5039,red,8.3,0.715,0.15,1.8,0.089,10.0,52.0,0.9968,3.23,0.77,9.5,5
1857,white,8.9,0.34,0.34,1.6,0.056,13.0,176.0,0.9946,3.14,0.47,9.7,5
2128,white,5.9,0.5,0.05,2.6,0.054,36.0,146.0,0.9948,3.43,0.5,9.2,6
774,white,9.1,0.27,0.45,10.6,0.035,28.0,124.0,0.997,3.2,0.46,10.4,9
3679,white,7.1,0.2,0.27,9.6,0.037,19.0,105.0,0.99444,3.04,0.37,10.5,7


In [21]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Prepare the data
df = df.dropna()
df['is_white_wine'] = [1 if typ == 'white' else 0 for typ in df['type']]
df['is_good_wine'] = [1 if quality >= 6 else 0 for quality in df['quality']]
df.drop(['type', 'quality'], axis=1, inplace=True)

# Train/test split
X = df.drop('is_good_wine', axis=1)
y = df['is_good_wine']
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [22]:
# Model Checkpoint
cb_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/model-{epoch:02d}-{val_accuracy:.2f}.hdf5',
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)

In [23]:
# ReduceLROnPlateau
cb_reducelr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    mode='min',
    factor=0.1,
    patience=10,
    verbose=1,
    min_lr=0.00001
)


In [24]:
# Early Stopping
cb_earlystop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    min_delta=0.001,
    patience=10,
    verbose=1
)

In [25]:
# CSVLogger
cb_csvlogger = tf.keras.callbacks.CSVLogger(
    filename='training_log.csv',
    separator=',',
    append=False
)

In [26]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    loss=tf.keras.losses.binary_crossentropy,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy')]
)

model.fit(
    X_train_scaled,
    y_train,
    epochs=1000,
    validation_data=(X_test_scaled, y_test),
    callbacks=[cb_checkpoint, cb_reducelr, cb_earlystop, cb_csvlogger]
)

Epoch 1/1000
Epoch 1: val_accuracy improved from -inf to 0.75561, saving model to checkpoints/model-01-0.76.hdf5
Epoch 2/1000
Epoch 2: val_accuracy did not improve from 0.75561
Epoch 3/1000
Epoch 3: val_accuracy improved from 0.75561 to 0.76179, saving model to checkpoints/model-03-0.76.hdf5
Epoch 4/1000
Epoch 4: val_accuracy did not improve from 0.76179
Epoch 5/1000
Epoch 5: val_accuracy improved from 0.76179 to 0.76721, saving model to checkpoints/model-05-0.77.hdf5
Epoch 6/1000
Epoch 6: val_accuracy did not improve from 0.76721
Epoch 7/1000
Epoch 7: val_accuracy improved from 0.76721 to 0.77262, saving model to checkpoints/model-07-0.77.hdf5
Epoch 8/1000
Epoch 8: val_accuracy did not improve from 0.77262
Epoch 9/1000
Epoch 9: val_accuracy did not improve from 0.77262
Epoch 10/1000
Epoch 10: val_accuracy improved from 0.77262 to 0.77340, saving model to checkpoints/model-10-0.77.hdf5
Epoch 11/1000
Epoch 11: val_accuracy did not improve from 0.77340
Epoch 12/1000
Epoch 12: val_accurac

<keras.src.callbacks.History at 0x7d3be3290850>