In [2]:
# Dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Model
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.models import Model
import tensorflow as tf
from tensorflow.keras.models import load_model

# Tuner
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Training
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Visualization
import matplotlib.pyplot as plt

## Dataset
### General
You can find the Dataset online via: https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009.
The Dataset consists of 1143 samples, divided into trainings data and test data. The trainings data contains of 800 samples and the test data contains of 343 samples, the distribution is choosen by the author of this code.
### Output data
The output data is the "${\bf quality}$" collumn in the dataframe and got 6 different values. Therefore the output data is scaled with an One-Hot-Encoder.
### Input data
The input data contains 11 features (the "id" feature is not used, because it reveals no relevant information about the wine). Some features have a very high mean and therefore are scaled with a standard scaler. The scaled features are: "${\bf fixed acidity}$"(mean=8.3), "${\bf residual sugar}$"(mean=2.5), "${\bf free sulfur dioxide}$"(mean=15.6), "${\bf total sulfur dioxide}$"(mean=45.9), "${\bf pH}$"(mean=3.3), "${\bf alcohol}$"(mean=10.4).The features, which are not scaled are: "${\bf volatile acidity}$", "${\bf citric acid}$", "${\bf chlorides}$", "${\bf density}$", "${\bf sulphates}$".



In [3]:
TRAININGS_DATA_LENGTH = 800
wine_csv_path = "WineQT.csv"

data_frame = pd.read_csv(wine_csv_path)

# scale the input data
features_to_scale = ["fixed acidity", "residual sugar", "free sulfur dioxide", "total sulfur dioxide", "pH", "alcohol"]
scaler = StandardScaler()
for feature in features_to_scale:
    data_frame[feature] = scaler.fit_transform(data_frame[feature].to_frame())
  

data_frame = data_frame.drop("Id", axis=1)
X = data_frame.drop("quality", axis=1)
X = np.asarray(X)
Y = data_frame[["quality"]]
Y = np.asarray(Y)

# encod the output data
encoder = OneHotEncoder()
Y = encoder.fit_transform(Y).toarray()

x_train, x_test = np.split(X, indices_or_sections=[TRAININGS_DATA_LENGTH])
y_train, y_test = np.split(Y, indices_or_sections=[TRAININGS_DATA_LENGTH])


## Model Builder

In [4]:
class Model_builder:
    
    input_dim = x_train.shape[1]
    output_dim = y_train.shape[1]
    learning_rate = 0
    batch_size = 0
    saved_model_path = "/Users/tombohlmann/Desktop/ml/wineTesting/safedModel"
    
    def __init__(self):
        self.learning_rate = 0.01
        self.batch_size = 50
    
    
    """
    Returns a neuronal Network with 3 hidden layers.
    Input: layer_1_units: amount of nodes in the first hidden layer
           layer_2_units: amount of nodes in the second hidden layer
           layer_3_units: amount of nodes in the third hidden layer
           act_fct: Activation function used in all 3 hidden layers
           loss: loss Function used for the model
           dr: Dropout rate in every hidden layer
    Output: Dense neuronal Network (DNN) with 3 hidden layer. The optimizer is Adam 
                and the metrics for the network is the accuracy
            The input-dimensions are self.inpt_dim and and the output-dimensions are self.output_dim.
    """
    def build_model (self, layer_1_units=6, layer_2_units=16, layer_3_units=14, act_fct='sigmoid', loss='mse', dr=0.5):
        
        if self.saved_model_path != None:
            print("---model loaded---")
            return load_model(self.saved_model_path)
            
        
        
        input_vec = Input(shape=self.input_dim, name='input')
        
        # 1
        layer = Dense(layer_1_units, name="dense_1") (input_vec)
        layer = Dropout(dr, name="dr_1") (layer)
        layer = Activation(act_fct, name="activation_1") (layer)
        
        # 2
        layer = BatchNormalization() (layer)
        layer = Dense(layer_2_units, name="dense_2") (layer)
        layer = Dropout(dr, name="dr_2") (layer)
        layer = Activation(act_fct, name="activation_2") (layer)
        
        # 3
        layer = BatchNormalization() (layer)
        layer = Dense(layer_3_units, name="dense_3") (layer)
        layer = Dropout(dr, name="dr_3") (layer)
        layer = Activation(act_fct, name="activation_3") (layer)
        
        output = Dense(self.output_dim, activation='sigmoid', name='output') (layer)
        
        opt = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        
        model = Model(input_vec, output, name='classifier')
        model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])
    
        return model
    
    """
    To find optimal hyperparameters for the DNN the KerasClassifier evaluates different settings for the hyperparameters.
    Output: Best hyperparameters (HP) for the DNN.
    
    """
    def find_hyperparameters(self):
        kc = KerasClassifier(build_fn=self.build_model, epochs=20, batch_size=50, verbose=0)
        grid_space = dict(layer_1_units=[6, 8, 10, 12, 14, 16],
                         layer_2_units=[6, 8, 10, 12, 14, 16],
                         layer_3_units=[6, 8, 10, 12, 14, 16],
                         act_fct=['sigmoid', 'relu'])

        gscv = GridSearchCV(estimator=kc, param_grid=grid_space, n_jobs=1, cv=2, verbose=2)
        gscv_res = gscv.fit(x_train, y_train, validation_data=(x_test, y_test))
        return gscv_res.best_params_
    
    
    """
    Train the DNN for 200 epochs. The model is trained on the trainings data and tested on the test data.
    The trainings data is divided into batches, which are shuffled before every trainings iteration. If no progress
    in the metrics occurs, the learning rate is lowered or the trainings process is stopped early.
    Input: verbose: 0 means, nothing is displayed during the trainings process. 
                    1 means, information about the iteration of a trianings process is displayed.
    Output: -Trained model 
            - history of the trainings process
    """
    def train_model(self, verbose=1):
        epochs = 200
        model = self.build_model()
    
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, factor=0.1, mode='min', verbose=verbose)
        early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True, verbose=verbose)
        hst = model.fit(x_train, y_train, epochs=epochs, batch_size=self.batch_size, shuffle=True, callbacks=[reduce_lr, early_stop], validation_data=(x_test, y_test), verbose=verbose)
        
        
        model.save(self.saved_model_path)
        return model, hst
    
    """
    Input: beam_size: amount of different trained models.
    Output: List of trained models, their trainings history and their accuracy, sorted by their accuracy.
    """
    def beam_search(self, beam_size=10):
        
            models = []

            for i in range(beam_size):
                print("Model_" + str(i) + " start Trainig.")

                model, hst = self.train_model(verbose=0)
                accuracy = model.evaluate(x_test, y_test, verbose=0)[1]
                models.append((model, hst, accuracy))

                print("Model_" + str(i) + " Training done. Accuracy=" + str(accuracy))
                print("------ Next ------")

            models.sort(key= lambda tupl : tupl[2], reverse=True)
            print("!!!Models sorted by accuracy (increasingly)!!!")
            return models

In [5]:
builder = Model_builder()

model = builder.build_model()

---model loaded---


2022-03-05 11:19:51.556992: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-05 11:19:51.557459: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 4. Tune using inter_op_parallelism_threads for best performance.
