# Data preparation
- https://www.kaggle.com/shelvigarg/wine-quality-dataset
- Refer to https://github.com/better-data-science/TensorFlow/blob/main/003_TensorFlow_Classification.ipynb for detailed preparation instructions

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
!ln -fs /content/gdrive/My\ Drive /app

Mounted at /content/gdrive


In [2]:
!tar -xzvf /app/cuDNN/cudnn-10.0-linux-x64-v7.5.0.56.tgz -C /usr/local/
!chmod a+r /usr/local/cuda/include/cudnn.h

# 檢查是否安裝成功
!cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2

cuda/include/cudnn.h
cuda/NVIDIA_SLA_cuDNN_Support.txt
cuda/lib64/libcudnn.so
cuda/lib64/libcudnn.so.7
cuda/lib64/libcudnn.so.7.5.0
cuda/lib64/libcudnn_static.a
#define CUDNN_MAJOR 7
#define CUDNN_MINOR 5
#define CUDNN_PATCHLEVEL 0
--
#define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)

#include "driver_types.h"


In [3]:
%cd /app

/content/gdrive/My Drive


In [4]:
import os
import numpy as np
import pandas as pd
import itertools
import warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
warnings.filterwarnings('ignore')

df = pd.read_csv('/app/data/winequalityN.csv')
df.sample(5)

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1403,white,7.1,0.26,0.31,2.2,0.044,29.0,128.0,0.9937,3.34,0.64,10.9,8
1886,white,7.0,0.53,0.02,1.0,0.036,39.0,107.0,0.993,3.2,0.32,9.0,5
5333,red,12.3,0.39,0.63,2.3,0.091,6.0,18.0,1.0004,3.16,0.49,9.5,5
2441,white,7.2,0.2,0.28,1.6,0.028,13.0,168.0,0.99203,3.17,1.06,11.5,6
1415,white,5.9,0.415,0.02,0.8,0.038,22.0,63.0,0.9932,3.36,0.36,9.3,5


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Prepare the data
df = df.dropna()
df['is_white_wine'] = [1 if typ == 'white' else 0 for typ in df['type']]
df['is_good_wine'] = [1 if quality >= 6 else 0 for quality in df['quality']]
df.drop(['type', 'quality'], axis=1, inplace=True)

# Train/test split
X = df.drop('is_good_wine', axis=1)
y = df['is_good_wine']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

<br>

# How will we approach optimization

In [6]:
import tensorflow as tf
tf.random.set_seed(42)

- Let's declare some constants
    - We want to optimize a network with 3 hidden layers
    - Each hidden layer can have from 64 to 256 nodes
    - The step size between nodes is 64
        - So the possibilities are: 64, 128, 192, 256

In [7]:
num_layers = 3
min_nodes_per_layer, max_nodes_per_layer = 64, 256
node_step_size = 64

- Possibilities:

In [8]:
node_options = list(range(
    min_nodes_per_layer, 
    max_nodes_per_layer + 1, 
    node_step_size
))
node_options

[64, 128, 192, 256]

- Taking them to two layers:

In [9]:
two_layer_possibilities = [node_options, node_options]
two_layer_possibilities

[[64, 128, 192, 256], [64, 128, 192, 256]]

- And now it's just a task of calculating all permutations between these two lists:

In [10]:
list(itertools.product(*two_layer_possibilities))

[(64, 64),
 (64, 128),
 (64, 192),
 (64, 256),
 (128, 64),
 (128, 128),
 (128, 192),
 (128, 256),
 (192, 64),
 (192, 128),
 (192, 192),
 (192, 256),
 (256, 64),
 (256, 128),
 (256, 192),
 (256, 256)]

- We want to optimize a 3-layer-deep neural network, so we'll have a bit more possibilities:

In [11]:
layer_possibilities = [node_options] * num_layers
layer_possibilities

[[64, 128, 192, 256], [64, 128, 192, 256], [64, 128, 192, 256]]

- Here are the permutations:

In [12]:
layer_node_permutations = list(itertools.product(*layer_possibilities))
layer_node_permutations

[(64, 64, 64),
 (64, 64, 128),
 (64, 64, 192),
 (64, 64, 256),
 (64, 128, 64),
 (64, 128, 128),
 (64, 128, 192),
 (64, 128, 256),
 (64, 192, 64),
 (64, 192, 128),
 (64, 192, 192),
 (64, 192, 256),
 (64, 256, 64),
 (64, 256, 128),
 (64, 256, 192),
 (64, 256, 256),
 (128, 64, 64),
 (128, 64, 128),
 (128, 64, 192),
 (128, 64, 256),
 (128, 128, 64),
 (128, 128, 128),
 (128, 128, 192),
 (128, 128, 256),
 (128, 192, 64),
 (128, 192, 128),
 (128, 192, 192),
 (128, 192, 256),
 (128, 256, 64),
 (128, 256, 128),
 (128, 256, 192),
 (128, 256, 256),
 (192, 64, 64),
 (192, 64, 128),
 (192, 64, 192),
 (192, 64, 256),
 (192, 128, 64),
 (192, 128, 128),
 (192, 128, 192),
 (192, 128, 256),
 (192, 192, 64),
 (192, 192, 128),
 (192, 192, 192),
 (192, 192, 256),
 (192, 256, 64),
 (192, 256, 128),
 (192, 256, 192),
 (192, 256, 256),
 (256, 64, 64),
 (256, 64, 128),
 (256, 64, 192),
 (256, 64, 256),
 (256, 128, 64),
 (256, 128, 128),
 (256, 128, 192),
 (256, 128, 256),
 (256, 192, 64),
 (256, 192, 128),
 (2

We'll iterate over the permutations and then iterate again over the values of individual permutation to get the node count for each hidden layer:

In [13]:
for permutation in layer_node_permutations[:2]:
    for nodes_at_layer in permutation:
        print(nodes_at_layer)
    print()

64
64
64

64
64
128



- We'll create a new `Sequential` model at each iteration
    - And add an `InputLayer` to it with a shape of `(12,)` (the number of columns in our dataset)
- Then, we'll iterate over the items in a single permutation and add a `Dense` layer to the model with the current number of nodes
- Finally, we'll add a `Dense` output layer
- We'll also setting a name to the model so it's easier to compare them later:

In [14]:
models = []

for permutation in layer_node_permutations:
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(12,)))
    model_name = ''
    
    for nodes_at_layer in permutation:
        model.add(tf.keras.layers.Dense(nodes_at_layer, activation='relu'))
        model_name += f'dense{nodes_at_layer}_'
        
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model._name = model_name[:-1]
    
    models.append(model)

- Here's how a single model looks like:

In [15]:
models[0].summary()

Model: "dense64_dense64_dense64"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                832       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 9,217
Trainable params: 9,217
Non-trainable params: 0
_________________________________________________________________


- Not too bad, right?
- Let's wrap all this logic into a single function next.

<br><br>

# Get architecture possibilities from a function
- This one will have a lot of parameters
- But it doesn't do anything we haven't discussed so far:

In [16]:
def get_models(num_layers: int,
               min_nodes_per_layer: int,
               max_nodes_per_layer: int,
               node_step_size: int,
               input_shape: tuple,
               hidden_layer_activation: str = 'relu',
               num_nodes_at_output: int = 1,
               output_layer_activation: str = 'sigmoid') -> list:
    
    node_options = list(range(min_nodes_per_layer, max_nodes_per_layer + 1, node_step_size))
    layer_possibilities = [node_options] * num_layers
    layer_node_permutations = list(itertools.product(*layer_possibilities))
    
    models = []
    for permutation in layer_node_permutations:
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        model_name = ''

        for nodes_at_layer in permutation:
            model.add(tf.keras.layers.Dense(nodes_at_layer, activation=hidden_layer_activation))
            model_name += f'dense{nodes_at_layer}_'

        model.add(tf.keras.layers.Dense(num_nodes_at_output, activation=output_layer_activation))
        model._name = model_name[:-1]
        models.append(model)
        
    return models

- Let's test it:

In [17]:
all_models = get_models(
    num_layers=3, 
    min_nodes_per_layer=64, 
    max_nodes_per_layer=256, 
    node_step_size=64, 
    input_shape=(12,)
)

- Let's print the names and the count:

In [18]:
print(f'#Models = {len(all_models)}')
print()

for model in all_models:
    print(model.name)

#Models = 64

dense64_dense64_dense64
dense64_dense64_dense128
dense64_dense64_dense192
dense64_dense64_dense256
dense64_dense128_dense64
dense64_dense128_dense128
dense64_dense128_dense192
dense64_dense128_dense256
dense64_dense192_dense64
dense64_dense192_dense128
dense64_dense192_dense192
dense64_dense192_dense256
dense64_dense256_dense64
dense64_dense256_dense128
dense64_dense256_dense192
dense64_dense256_dense256
dense128_dense64_dense64
dense128_dense64_dense128
dense128_dense64_dense192
dense128_dense64_dense256
dense128_dense128_dense64
dense128_dense128_dense128
dense128_dense128_dense192
dense128_dense128_dense256
dense128_dense192_dense64
dense128_dense192_dense128
dense128_dense192_dense192
dense128_dense192_dense256
dense128_dense256_dense64
dense128_dense256_dense128
dense128_dense256_dense192
dense128_dense256_dense256
dense192_dense64_dense64
dense192_dense64_dense128
dense192_dense64_dense192
dense192_dense64_dense256
dense192_dense128_dense64
dense192_dense128_dense12

- So we have 64 models in total
- It will take some time to optimize
- Let's declare another function for that

<br><br>

# Model optimization function
- This one will accept the list of models, training and testing sets (both features and the target), and optionally a number of epochs and verbosity
    - It's advised to set verbosity to 0 so you don't get overwhelmed with the console output

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [20]:
def optimize(models: list,
             X_train: np.array,
             y_train: np.array,
             X_test: np.array,
             y_test: np.array,
             epochs: int = 50,
             verbose: int = 0) -> pd.DataFrame:
    
    # We'll store the results here
    results = []
    
    def train(model: tf.keras.Sequential) -> dict:
        # Change this however you want
        model.compile(
            loss=tf.keras.losses.binary_crossentropy,
            optimizer=tf.keras.optimizers.Adam(),
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name='accuracy')
            ]
        )
        
        # Train the model
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            verbose=verbose
        )
        
        # Make predictions on the test set
        preds = model.predict(X_test)
        prediction_classes = [1 if prob > 0.5 else 0 for prob in np.ravel(preds)]
        
        # Return evaluation metrics on the test set
        return {
            'model_name': model.name,
            'test_accuracy': accuracy_score(y_test, prediction_classes),
            'test_precision': precision_score(y_test, prediction_classes),
            'test_recall': recall_score(y_test, prediction_classes),
            'test_f1': f1_score(y_test, prediction_classes)
        }
    
    # Train every model and save results
    for model in models:
        try:
            print(model.name, end=' ... ')
            res = train(model=model)
            results.append(res)
        except Exception as e:
            print(f'{model.name} --> {str(e)}')
        
    return pd.DataFrame(results)

- Let's optimize the architecture!
- It will take some time

In [21]:
optimization_results = optimize(
    models=models,
    X_train=X_train_scaled,
    y_train=y_train,
    X_test=X_test_scaled,
    y_test=y_test
)

dense64_dense64_dense64 ... dense64_dense64_dense128 ... dense64_dense64_dense192 ... dense64_dense64_dense256 ... dense64_dense128_dense64 ... dense64_dense128_dense128 ... dense64_dense128_dense192 ... dense64_dense128_dense256 ... dense64_dense192_dense64 ... dense64_dense192_dense128 ... dense64_dense192_dense192 ... dense64_dense192_dense256 ... dense64_dense256_dense64 ... dense64_dense256_dense128 ... dense64_dense256_dense192 ... dense64_dense256_dense256 ... dense128_dense64_dense64 ... dense128_dense64_dense128 ... dense128_dense64_dense192 ... dense128_dense64_dense256 ... dense128_dense128_dense64 ... dense128_dense128_dense128 ... dense128_dense128_dense192 ... dense128_dense128_dense256 ... dense128_dense192_dense64 ... dense128_dense192_dense128 ... dense128_dense192_dense192 ... dense128_dense192_dense256 ... dense128_dense256_dense64 ... dense128_dense256_dense128 ... dense128_dense256_dense192 ... dense128_dense256_dense256 ... dense192_dense64_dense64 ... dense192_de

In [22]:
optimization_results.sort_values(by='test_accuracy', ascending=False)

Unnamed: 0,model_name,test_accuracy,test_precision,test_recall,test_f1
60,dense256_dense256_dense64,0.810518,0.851990,0.844636,0.848297
58,dense256_dense192_dense192,0.807425,0.837740,0.859433,0.848448
28,dense128_dense256_dense64,0.806651,0.835125,0.861899,0.848301
26,dense128_dense192_dense192,0.802784,0.850126,0.832306,0.841121
61,dense256_dense256_dense128,0.800464,0.835152,0.849568,0.842298
...,...,...,...,...,...
7,dense64_dense128_dense256,0.777262,0.818514,0.828607,0.823529
45,dense192_dense256_dense128,0.777262,0.807286,0.847102,0.826715
29,dense128_dense256_dense128,0.776489,0.826250,0.815043,0.820608
51,dense256_dense64_dense256,0.774942,0.820197,0.821208,0.820702


- And there you have it!