In [1]:
# Import libraries

# Pandas for data handling
import pandas # https://pandas.pydata.org/
# from pandas.plotting import scatter_matrix

# pretty tables
from IPython.display import display

# NumPy for numerical computing
import numpy # https://numpy.org/

# MatPlotLib+Seaborn for visualization
import matplotlib.pyplot as pl  # https://matplotlib.org/
import seaborn as sns

# assessment
from sklearn import model_selection # for model comparisons
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import cohen_kappa_score

# algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# data preprocessing / feature selection
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

# combining
from sklearn.pipeline import make_pipeline

#########

In [2]:
# Load the data
print('Loading data from file ...')  
dataset = pandas.read_csv('winequality-white.csv')
print('Removing rows with missing data ...')  
dataset = dataset.dropna()
print('Reading list of problem variables X and y...')
X_name = [ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ] 
y_name = 'quality'
X = dataset[X_name]   
y = dataset[y_name]   

Loading data from file ...
Removing rows with missing data ...
Reading list of problem variables X and y...


In [3]:
# setting the seed allows for repeatability
seed = 5 

print('Partitioning data into parts: formative (for development) and summative (for testing) ...')
test_size = 0.20   # means 20 percent
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=test_size, random_state=seed)

print('done \n')

Partitioning data into parts: formative (for development) and summative (for testing) ...
done 



In [4]:
import warnings

import matplotlib.pyplot as plt

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

In [5]:
# Chose the Algorithms

seed = 42 # setting the seed allows for repeatability

mlp1 = MLPClassifier(hidden_layer_sizes=(50,), # one hidden layer with 50 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp2 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'logistic',  # ReLU is the default option
                    # activation = {'identity','logistic','tanh','relu'}  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    # solver={'lbfgs','sgd','adam'}  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp3 = MLPClassifier(hidden_layer_sizes=(50,60), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

print('Reading list of algorithms to train ...')
models = []
models.append(( 'relu_MLP_hidden-layer(50)', mlp1 ))
models.append(( 'logistic_MLP_hidden-layer(50)', mlp2 ))
models.append(( 'relu_MLP_hidden-layer(50,60)', mlp3 ))
print('done \n')

Reading list of algorithms to train ...
done 



In [6]:
# Train the classifier
# NOTE: this example won't converge because our max_iter choice is too few epochs 
# (otherwise it will take too long for a live demo), 
# so we catch the warning and ignore it here
from sklearn.exceptions import ConvergenceWarning
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    
for name, model in models:   # Select each model in turn
    print(" ++ NOW WORKING ON ALGORITHM %s ++" % name)  
    selected_model = model
    selected_model.fit(X_train, y_train)
    print("Training set score: %f" % selected_model.score(X_train, y_train))
print('Done')

 ++ NOW WORKING ON ALGORITHM relu_MLP_hidden-layer(50) ++
Iteration 1, loss = 14.07085299
Iteration 2, loss = 1.55507896
Iteration 3, loss = 1.34487144
Iteration 4, loss = 1.31187600
Iteration 5, loss = 1.30618500
Iteration 6, loss = 1.30358940
Iteration 7, loss = 1.30210391
Iteration 8, loss = 1.30134136
Iteration 9, loss = 1.30084004
Iteration 10, loss = 1.30069790
Training set score: 0.443594
 ++ NOW WORKING ON ALGORITHM logistic_MLP_hidden-layer(50) ++
Iteration 1, loss = 1.52649482
Iteration 2, loss = 1.34100473
Iteration 3, loss = 1.31572096
Iteration 4, loss = 1.32434303
Iteration 5, loss = 1.35156698
Iteration 6, loss = 1.33249435
Iteration 7, loss = 1.32191958
Iteration 8, loss = 1.33927079
Iteration 9, loss = 1.34767041
Iteration 10, loss = 1.38084202
Training set score: 0.298111
 ++ NOW WORKING ON ALGORITHM relu_MLP_hidden-layer(50,60) ++
Iteration 1, loss = 8.89549420
Iteration 2, loss = 1.37955823
Iteration 3, loss = 1.29772836
Iteration 4, loss = 1.28415097
Iteration 5, l



Iteration 6, loss = 1.25998813
Iteration 7, loss = 1.25757507
Iteration 8, loss = 1.24956329
Iteration 9, loss = 1.29216297
Iteration 10, loss = 1.26315874
Training set score: 0.454569
Done




In [7]:
# Classification report
for name, model in models:   # Select each model in turn
    print(" clasification report on %s " % name) 
    selected_model = model
    selected_model.fit(X_train, y_train)
    y_predicted = selected_model.predict(X_test)
    print(classification_report(y_test, y_predicted))  # compare predictions with ground truth
print('Done')

# y_predicted = mlp.predict(X_test)   # use the trained classifier to predict on the test set

 clasification report on relu_MLP_hidden-layer(50) 
Iteration 1, loss = 14.07085299
Iteration 2, loss = 1.55507896
Iteration 3, loss = 1.34487144
Iteration 4, loss = 1.31187600
Iteration 5, loss = 1.30618500
Iteration 6, loss = 1.30358940
Iteration 7, loss = 1.30210391
Iteration 8, loss = 1.30134136
Iteration 9, loss = 1.30084004
Iteration 10, loss = 1.30069790
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         4
           4       0.00      0.00      0.00        34
           5       0.00      0.00      0.00       289
           6       0.47      1.00      0.64       460
           7       0.00      0.00      0.00       168
           8       0.00      0.00      0.00        25

    accuracy                           0.47       980
   macro avg       0.08      0.17      0.11       980
weighted avg       0.22      0.47      0.30       980

 clasification report on logistic_MLP_hidden-layer(50) 
Iteration 1, loss = 1.52649482
Iterat

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 9, loss = 1.34767041
Iteration 10, loss = 1.38084202
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         4
           4       0.00      0.00      0.00        34
           5       0.29      1.00      0.46       289
           6       0.00      0.00      0.00       460
           7       0.00      0.00      0.00       168
           8       0.00      0.00      0.00        25

    accuracy                           0.29       980
   macro avg       0.05      0.17      0.08       980
weighted avg       0.09      0.29      0.13       980

 clasification report on relu_MLP_hidden-layer(50,60) 
Iteration 1, loss = 8.89549420
Iteration 2, loss = 1.37955823
Iteration 3, loss = 1.29772836
Iteration 4, loss = 1.28415097
Iteration 5, loss = 1.26633062


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 6, loss = 1.25998813
Iteration 7, loss = 1.25757507
Iteration 8, loss = 1.24956329
Iteration 9, loss = 1.29216297




Iteration 10, loss = 1.26315874
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         4
           4       0.00      0.00      0.00        34
           5       0.43      0.21      0.28       289
           6       0.47      0.87      0.61       460
           7       0.00      0.00      0.00       168
           8       0.00      0.00      0.00        25

    accuracy                           0.47       980
   macro avg       0.15      0.18      0.15       980
weighted avg       0.35      0.47      0.37       980

Done


  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# Chose the Algorithms

seed = 42 # setting the seed allows for repeatability

mlp1 = MLPClassifier(hidden_layer_sizes=(50,), # one hidden layer with 50 neurons
                    activation = 'relu',  # ReLU is the default option
                    solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp2 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # activation = {'identity','logistic','tanh','relu'}  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    # solver={'lbfgs','sgd','adam'}  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=50,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp3 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.01 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

print('Reading list of algorithms to train ...')
models = []
models.append(( 'sgd-solver_relu_MLP_hidden-layer(50)', mlp1 ))
models.append(( '50-iter-epoch_relu_MLP_hidden-layer(50)', mlp2 ))
models.append(( '0.01-learning-rate_relu_MLP_hidden-layer(50)', mlp3 ))
print('done \n')

Reading list of algorithms to train ...
done 



In [9]:
# Train the classifier
# NOTE: this example won't converge because our max_iter choice is too few epochs 
# (otherwise it will take too long for a live demo), 
# so we catch the warning and ignore it here
from sklearn.exceptions import ConvergenceWarning
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    
for name, model in models:   # Select each model in turn
    print(" %s " % name)  
    selected_model = model
    selected_model.fit(X_train, y_train)
    print("Training set score: %f" % selected_model.score(X_train, y_train))
print('Done')

 sgd-solver_relu_MLP_hidden-layer(50) 
Iteration 1, loss = 5.06014007
Iteration 2, loss = 2.10351281
Iteration 3, loss = 2.16555570
Iteration 4, loss = 2.16858144
Iteration 5, loss = 2.16705411
Iteration 6, loss = 2.16544330
Iteration 7, loss = 2.16461398
Iteration 8, loss = 2.16386446
Iteration 9, loss = 2.16329669
Iteration 10, loss = 2.16326857
Training set score: 0.443594
 50-iter-epoch_relu_MLP_hidden-layer(50) 
Iteration 1, loss = 14.07085299
Iteration 2, loss = 1.55507896
Iteration 3, loss = 1.34487144
Iteration 4, loss = 1.31187600
Iteration 5, loss = 1.30618500




Iteration 6, loss = 1.30358940
Iteration 7, loss = 1.30210391
Iteration 8, loss = 1.30134136
Iteration 9, loss = 1.30084004
Iteration 10, loss = 1.30069790
Iteration 11, loss = 1.30068136
Iteration 12, loss = 1.30117097
Iteration 13, loss = 1.30056102
Iteration 14, loss = 1.30023016
Iteration 15, loss = 1.30017349
Iteration 16, loss = 1.30109968
Iteration 17, loss = 1.30082135
Iteration 18, loss = 1.30111085
Iteration 19, loss = 1.30070001
Iteration 20, loss = 1.30001552
Iteration 21, loss = 1.30100383
Iteration 22, loss = 1.30062363
Iteration 23, loss = 1.30204229
Iteration 24, loss = 1.30058687
Iteration 25, loss = 1.30085163
Iteration 26, loss = 1.30134970
Iteration 27, loss = 1.30204623
Iteration 28, loss = 1.30051690
Iteration 29, loss = 1.30014349
Iteration 30, loss = 1.30043647
Iteration 31, loss = 1.30055471
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Training set score: 0.443594
 0.01-learning-rate_relu_MLP_hidden-layer(50) 
Iterat



In [10]:
# Chose the Algorithms

mlp1 = MLPClassifier(hidden_layer_sizes=(50,), # one hidden layer with 50 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp2 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'logistic',  # ReLU is the default option
                    # activation = {'identity','logistic','tanh','relu'}  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    # solver={'lbfgs','sgd','adam'}  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp3 = MLPClassifier(hidden_layer_sizes=(50,60), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp4 = MLPClassifier(hidden_layer_sizes=(50,), # one hidden layer with 50 neurons
                    activation = 'relu',  # ReLU is the default option
                    solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp5 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # activation = {'identity','logistic','tanh','relu'}  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    # solver={'lbfgs','sgd','adam'}  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=50,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )
mlp6 = MLPClassifier(hidden_layer_sizes=(50), # two hidden layer with 50 and 60 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.01 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

print('Reading list of algorithms to train ...')
models = []
models.append(( 'relu_MLP_hidden-layer(50)', mlp1 ))
models.append(( 'logistic_MLP_hidden-layer(50)', mlp2 ))
models.append(( 'relu_MLP_hidden-layer(50,60)', mlp3 ))
models.append(( 'sgd-solver_relu_MLP_hidden-layer(50)', mlp4 ))
models.append(( '50-iter-epoch_relu_MLP_hidden-layer(50)', mlp2 ))
models.append(( '0.01-learning-rate_relu_MLP_hidden-layer(50)', mlp3 ))
print('done \n')

Reading list of algorithms to train ...
done 



In [11]:
# Train the classifier
# NOTE: this example won't converge because our max_iter choice is too few epochs 
# (otherwise it will take too long for a live demo), 
# so we catch the warning and ignore it here
from sklearn.exceptions import ConvergenceWarning
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    
for name, model in models:   # Select each model in turn
    print(" %s " % name)  
    selected_model = model
    selected_model.fit(X_train, y_train)
    print("Training set score: %f" % selected_model.score(X_train, y_train))
print('Done')

 relu_MLP_hidden-layer(50) 
Iteration 1, loss = 14.07085299
Iteration 2, loss = 1.55507896
Iteration 3, loss = 1.34487144
Iteration 4, loss = 1.31187600
Iteration 5, loss = 1.30618500
Iteration 6, loss = 1.30358940
Iteration 7, loss = 1.30210391
Iteration 8, loss = 1.30134136
Iteration 9, loss = 1.30084004
Iteration 10, loss = 1.30069790
Training set score: 0.443594
 logistic_MLP_hidden-layer(50) 
Iteration 1, loss = 1.52649482
Iteration 2, loss = 1.34100473
Iteration 3, loss = 1.31572096




Iteration 4, loss = 1.32434303
Iteration 5, loss = 1.35156698
Iteration 6, loss = 1.33249435
Iteration 7, loss = 1.32191958
Iteration 8, loss = 1.33927079
Iteration 9, loss = 1.34767041
Iteration 10, loss = 1.38084202
Training set score: 0.298111
 relu_MLP_hidden-layer(50,60) 
Iteration 1, loss = 8.89549420
Iteration 2, loss = 1.37955823
Iteration 3, loss = 1.29772836
Iteration 4, loss = 1.28415097




Iteration 5, loss = 1.26633062
Iteration 6, loss = 1.25998813
Iteration 7, loss = 1.25757507
Iteration 8, loss = 1.24956329
Iteration 9, loss = 1.29216297




Iteration 10, loss = 1.26315874
Training set score: 0.454569
 sgd-solver_relu_MLP_hidden-layer(50) 
Iteration 1, loss = 5.06014007
Iteration 2, loss = 2.10351281
Iteration 3, loss = 2.16555570
Iteration 4, loss = 2.16858144
Iteration 5, loss = 2.16705411
Iteration 6, loss = 2.16544330
Iteration 7, loss = 2.16461398
Iteration 8, loss = 2.16386446
Iteration 9, loss = 2.16329669
Iteration 10, loss = 2.16326857
Training set score: 0.443594
 50-iter-epoch_relu_MLP_hidden-layer(50) 
Iteration 1, loss = 1.52649482
Iteration 2, loss = 1.34100473
Iteration 3, loss = 1.31572096
Iteration 4, loss = 1.32434303
Iteration 5, loss = 1.35156698
Iteration 6, loss = 1.33249435
Iteration 7, loss = 1.32191958
Iteration 8, loss = 1.33927079




Iteration 9, loss = 1.34767041
Iteration 10, loss = 1.38084202




Training set score: 0.298111
 0.01-learning-rate_relu_MLP_hidden-layer(50) 
Iteration 1, loss = 8.89549420
Iteration 2, loss = 1.37955823
Iteration 3, loss = 1.29772836
Iteration 4, loss = 1.28415097
Iteration 5, loss = 1.26633062
Iteration 6, loss = 1.25998813
Iteration 7, loss = 1.25757507
Iteration 8, loss = 1.24956329
Iteration 9, loss = 1.29216297
Iteration 10, loss = 1.26315874
Training set score: 0.454569




Done


In [12]:
# Make predictions on test dataset
selected_model = KNeighborsClassifier()
selected_model.fit(X_train, y_train)
predictions = selected_model.predict(X_test)

print("Algorithm: %s " % selected_model)
print('\n clasification report:\n', classification_report(y_test, predictions))
print("Training set score: %f" % selected_model.score(X_train, y_train))      
print('done \n')


Algorithm: KNeighborsClassifier() 

 clasification report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         4
           4       0.16      0.09      0.11        34
           5       0.47      0.53      0.50       289
           6       0.53      0.57      0.55       460
           7       0.41      0.35      0.38       168
           8       0.29      0.08      0.12        25

    accuracy                           0.49       980
   macro avg       0.31      0.27      0.28       980
weighted avg       0.47      0.49      0.48       980

Training set score: 0.651608
done 



  _warn_prf(average, modifier, msg_start, len(result))
