# Multi-Layered Perceptron via Randomized Search CV # 

Instructions:

Play around with these parameters here
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html 

Dataset:
Use the UCI Wine Quality Dataset 
https://archive.ics.uci.edu/ml/datasets/wine+quality 

In [2]:
#import libraries
import numpy as np
import pandas
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import classification_report
import time
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler, LabelEncoder


import keras_tuner
from keras_tuner.tuners import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters
from tensorflow.keras.models import save_model
from tensorflow.keras.models import model_from_json
import warnings
warnings.simplefilter("ignore", UserWarning)
from keras.callbacks import EarlyStopping

### Red Wines Dataset ###

In [3]:
#import data
label_quality = LabelEncoder()
data_red = pandas.read_csv('winequality-red.csv', delimiter=';')
data_red['quality'] = label_quality.fit_transform(data_red['quality'])
y = data_red['quality']
X = data_red[data_red.columns[:-1]]

# 80-20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [4]:
# Normalize
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [5]:
N_test_red = len(X_test)

In [6]:
#Neural Network to generate predictions
def predNN_2(X_train, y_train, wine_color = 'red'):
    clf = RandomizedSearchCV(MLPClassifier(max_iter=50000, random_state=42),
                      {'early_stopping' : [True, False],
                      'hidden_layer_sizes': [100, 200, 300, 400, 500],
                      'activation': ['relu', 'tanh', 'logistic'],
                      'learning_rate': ['constant', 'invscaling', 'adaptive'],
                      'learning_rate_init': [.0001, .001, .01, .1]},
                      cv=5,n_jobs=-1,verbose=6,random_state=1,n_iter=20)
    
    # Note: GridSearchCV by default performs 5-fold CV. 
    
    # Pick 20 sample configurations to perform CV on. 
    
    print("Fitting")
    t0 = time.time()
    clf.fit(X_train, y_train)
    t1 = time.time()
    print('Grid search CV time for', wine_color, 'wines took', t1 - t0, 'seconds')
    print("Optimized parameters:", clf.best_params_)
    print("Weighted validation score:", clf.best_score_)
    return clf

In [7]:
#Calling Neural Network for Predictions
classifier = predNN_2(X_train, y_train, 'red')
preds = classifier.predict(X_test)

Fitting
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Grid search CV time for red wines took 151.85131907463074 seconds
Optimized parameters: {'learning_rate_init': 0.001, 'learning_rate': 'adaptive', 'hidden_layer_sizes': 300, 'early_stopping': False, 'activation': 'relu'}
Weighted validation score: 0.6216115196078431


In [8]:
# write cv results (such as mean fit times for each hyperparam configuration) to file
with open('fcnn_randomcv_red_adam.txt', 'w') as file:
    file.write(str(classifier.cv_results_))

In [9]:
activation = classifier.best_params_['activation']
hidden_layer_sizes = classifier.best_params_['hidden_layer_sizes']
learning_rate = classifier.best_params_['learning_rate']
learning_rate_init = classifier.best_params_['learning_rate_init']
early_stopping = classifier.best_params_['early_stopping']

classifier = MLPClassifier(early_stopping=early_stopping, activation=activation, hidden_layer_sizes=hidden_layer_sizes, max_iter=50000, learning_rate=learning_rate, learning_rate_init=learning_rate_init)
t0 = time.time()
classifier.fit(X_train, y_train)
t1 = time.time()
print('Training time time for red wines via Adam took', t1 - t0, 'seconds')

Training time time for red wines via Adam took 15.565123796463013 seconds


In [10]:
#training accuracy
train_accNN = accuracy_score(y_train, classifier.predict(X_train)) 
train_accNN

0.9953088350273651

In [11]:
#test accuracy
test_accNN_red = accuracy_score(y_test, classifier.predict(X_test)) 
test_accNN_red

0.6375

### White Wines Dataset ###

In [12]:
#import data
label_quality = LabelEncoder()
data_white = pandas.read_csv('winequality-white.csv', delimiter=';')
data_white['quality'] = label_quality.fit_transform(data_white['quality'])
y = data_white['quality']
X = data_white[data_white.columns[:-1]]

# 80-20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [13]:
# Normalize
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [14]:
N_test_white = len(X_test)

In [15]:
#Calling Neural Network for Predictions
classifier = predNN_2(X_train, y_train, 'white')
preds = classifier.predict(X_test)

Fitting
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Grid search CV time for white wines took 688.1731026172638 seconds
Optimized parameters: {'learning_rate_init': 0.001, 'learning_rate': 'adaptive', 'hidden_layer_sizes': 500, 'early_stopping': False, 'activation': 'relu'}
Weighted validation score: 0.6135774233064873


In [16]:
# write cv results (such as mean fit times for each hyperparam configuration) to file
with open('fcnn_randomcv_white_adam.txt', 'w') as file:
    file.write(str(classifier.cv_results_))

In [17]:
activation = classifier.best_params_['activation']
hidden_layer_sizes = classifier.best_params_['hidden_layer_sizes']
learning_rate = classifier.best_params_['learning_rate']
learning_rate_init = classifier.best_params_['learning_rate_init']
early_stopping = classifier.best_params_['early_stopping']

classifier = MLPClassifier(early_stopping=early_stopping, activation=activation, hidden_layer_sizes=hidden_layer_sizes, max_iter=50000, learning_rate=learning_rate, learning_rate_init=learning_rate_init)
t0 = time.time()
classifier.fit(X_train, y_train)
t1 = time.time()
print('Training time time for white wines via Adam took', t1 - t0, 'seconds')

Training time time for white wines via Adam took 46.393786907196045 seconds


In [18]:
#training accuracy
train_accNN = accuracy_score(y_train, classifier.predict(X_train)) 
train_accNN

0.9461459928534967

In [19]:
#test accuracy
test_accNN_white = accuracy_score(y_test, classifier.predict(X_test)) 
test_accNN_white

0.6183673469387755

### Weighted Accuracy ###

In [20]:
(N_test_red * test_accNN_red + N_test_white * test_accNN_white) / (N_test_white + N_test_red)

0.6230769230769231