In [71]:
import pandas
import time
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from pickle import dump,load
import keras_tuner
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
import numpy as np
from tensorflow.keras.models import save_model

### Red Wines Dataset

In [72]:
#import data
label_quality = LabelEncoder()
data_red = pandas.read_csv('winequality-red.csv', delimiter=';')
data_red['quality'] = label_quality.fit_transform(data_red['quality'])
y = data_red['quality']
X = data_red[data_red.columns[:-1]]

onehot_encoder = OneHotEncoder(sparse=False)
y = np.array(list(y)).reshape(len(y), 1)
y = onehot_encoder.fit_transform(y).astype(int)

# 80-20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [73]:
# Normalize
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

N_test_red = len(X_test)

In [74]:
def build_model(hp):
    clf = Sequential()
    # First Dense layer with Dropout regularisation
    clf.add(Dense(hp.Int('input_unit', min_value=50,max_value=400,step=50), kernel_initializer='he_uniform', input_dim=X_train.shape[1]))
    clf.add(Dropout(0.2))

    # Consequent Dense layers
    for i in range(hp.Int('n_layers', 1, 12)):
        clf.add(Dense(hp.Int(f'lstm_{i}_units',min_value=50,max_value=400,step=50)))
        clf.add(Dropout(hp.Float(f'Dropout_rate',min_value=0,max_value=0.5,step=0.1)))

    # Another lSTM layer 
    clf.add(Dense(hp.Int('layer_2_neurons',min_value=50,max_value=400,step=50)))
    clf.add(Dropout(hp.Float('Dropout_rate',min_value=0,max_value=0.5,step=0.1)))

    # The output layer
    clf.add(Dense(units=6, activation='softmax'))

    # Compiling the NN
    clf.compile(optimizer='adam',loss='mse', metrics=['mse'])

    return clf

In [75]:
#Neural Network to generate predictions
def predNN_2(X_train, y_train, X_test, y_test, wine_color = 'red'):
    tuner = RandomSearch(
            build_model,
            objective='mse',
            max_trials=60, # different trials - different hyperparams
            executions_per_trial=1, # executions w/in same trial - SAME hyperparams! 
            project_name='multi-hidden-layer-fcnn_red',
#             overwrite=True # add overwrite=True to start over 
        ) 
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    
    print("Fitting")
    t0 = time.time()
    tuner.search(
        x=X_train,
        y=y_train,
        epochs=100, 
        batch_size=32,
        validation_data=(X_test,y_test),
        callbacks=[es]
    )
    t1 = time.time()
        
    best_model = tuner.get_best_models(num_models=1)[0]
    print('Random search CV time for', wine_color, 'wines took', t1 - t0, 'seconds')
    print("Optimized parameters:", best_model)
    
    return best_model

In [76]:
#Calling Neural Network for Predictions
classifier = predNN_2(X_train, y_train, X_test, y_test, 'red')
preds = classifier.predict(X_test)
preds = preds.round()

Trial 60 Complete [00h 00m 05s]
mse: 0.14413630962371826

Best mse So Far: 0.09031747281551361
Total elapsed time: 00h 06m 16s
INFO:tensorflow:Oracle triggered exit
Random search CV time for red wines took 376.85603404045105 seconds
Optimized parameters: <keras.engine.sequential.Sequential object at 0x1521f61c0>


  updates=self.state_updates,


In [77]:
train_accNN = accuracy_score(y_train, classifier.predict(X_train).round()) 
train_accNN

0.5215011727912432

In [78]:
#test accuracy
test_accNN_red = accuracy_score(y_test, preds) 
test_accNN_red

0.525

In [79]:
# save model
save_model(classifier,'multilayerFCNN_red_randomCV.h5')
# dump(self.sc, open('data_scaler.pkl', 'wb'))

### White Wines Dataset

In [80]:
#import data
label_quality = LabelEncoder()
data_white = pandas.read_csv('winequality-white.csv', delimiter=';')
data_white['quality'] = label_quality.fit_transform(data_white['quality'])
y = data_white['quality']
X = data_white[data_white.columns[:-1]]

onehot_encoder = OneHotEncoder(sparse=False)
y = np.array(list(y)).reshape(len(y), 1)
y = onehot_encoder.fit_transform(y).astype(int)

# 80-20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [81]:
# Normalize
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

N_test_white = len(X_test)

In [82]:
def build_model_2(hp):
    clf = Sequential()
    # First Dense layer with Dropout regularisation
    clf.add(Dense(hp.Int('input_unit', min_value=50,max_value=400,step=50), kernel_initializer='he_uniform', input_dim=X_train.shape[1]))
    clf.add(Dropout(0.2))

    # Consequent Dense layers
    for i in range(hp.Int('n_layers', 1, 12)):
        clf.add(Dense(hp.Int(f'lstm_{i}_units',min_value=50,max_value=400,step=50)))
        clf.add(Dropout(hp.Float(f'Dropout_rate',min_value=0,max_value=0.5,step=0.1)))

    # Another lSTM layer 
    clf.add(Dense(hp.Int('layer_2_neurons',min_value=50,max_value=400,step=50)))
    clf.add(Dropout(hp.Float('Dropout_rate',min_value=0,max_value=0.5,step=0.1)))

    # The output layer
    clf.add(Dense(units=7, activation='softmax'))

    # Compiling the NN
    clf.compile(optimizer='adam',loss='mse', metrics=['mse'])

    return clf

In [83]:
#Neural Network to generate predictions
def predNN_3(X_train, y_train, X_test, y_test, wine_color = 'red'):
    tuner = RandomSearch(
            build_model_2,
            objective='mse',
            max_trials=60, # different trials - different hyperparams
            executions_per_trial=1, # executions w/in same trial - SAME hyperparams! 
            project_name='multi-hidden-layer-fcnn_white',
#             overwrite=True # add overwrite=True to start over
        ) 
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    
    print("Fitting")
    t0 = time.time()
    tuner.search(
        x=X_train,
        y=y_train,
        epochs=100, 
        batch_size=32,
        validation_data=(X_test,y_test),
        callbacks=[es]
    )
    t1 = time.time()
        
    best_model = tuner.get_best_models(num_models=1)[0]
    print('Random search CV time for', wine_color, 'wines took', t1 - t0, 'seconds')
    print("Optimized parameters:", best_model)
    
    return best_model

In [84]:
#Calling Neural Network for Predictions
classifier = predNN_3(X_train, y_train, X_test, y_test, 'white')
preds = classifier.predict(X_test)
preds = preds.round()

Trial 60 Complete [00h 00m 15s]
mse: 0.14825348556041718

Best mse So Far: 0.08640273660421371
Total elapsed time: 00h 13m 24s
INFO:tensorflow:Oracle triggered exit
Random search CV time for white wines took 804.2011730670929 seconds
Optimized parameters: <keras.engine.sequential.Sequential object at 0x151108a00>


  updates=self.state_updates,


In [85]:
train_accNN = accuracy_score(y_train, classifier.predict(X_train).round()) 
train_accNN

0.2664624808575804

In [86]:
#test accuracy
test_accNN_white = accuracy_score(y_test, preds) 
test_accNN_white

0.25204081632653064

In [87]:
# save model
save_model(classifier,'multilayerFCNN_white_randomCV.h5')
# dump(self.sc, open('data_scaler.pkl', 'wb'))

### Weighted Accuracy

In [88]:
(N_test_red * test_accNN_red + N_test_white * test_accNN_white) / (N_test_white + N_test_red)

0.3192307692307692