In [5]:
# Import libraries

import pandas # https://pandas.pydata.org/
import numpy as np
from pandas.plotting import scatter_matrix

# pretty tables
from IPython.display import display

# NumPy for numerical computing
import numpy # https://numpy.org/

# MatPlotLib + Seaborn for visualization
import matplotlib.pyplot as pl  # https://matplotlib.org/
import seaborn as sns   # https://seaborn.pydata.org/

# assessment
from sklearn import model_selection # for model comparisons
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score


# data preprocessing / feature selection
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import warnings
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.exceptions import ConvergenceWarning

from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage  

import matplotlib.pyplot as plt

* **Load Data**

* **Separate the data into training and testing datasets**


In [6]:
print('Loading data from file ...')  # Now let's load the data
dataset = pandas.read_csv('winequality-white.csv') # default is header=infer, change if column names are not in first row
print('done \n')

print('Removing rows with missing data ...')  # Make things simple
dataset = dataset.dropna()  # default is to drop any row that contains at least one missing value
print('done \n')


X_name = [ 'fixed acidity', 'volatile acidity', 'citric acid' , 'residual sugar', 'chlorides', 
          'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ] 
y_name = 'quality'
X = dataset[X_name]   
y = dataset[y_name] 


# setting the seed allows for repeatability
seed = 42 

print('Partitioning data into parts: formative (for development) and summative (for testing) ...')
test_size = 0.20   # means 20 percent
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=test_size, random_state=seed)

print('done \n')


# standardization 
norm = StandardScaler()

X_train = norm.fit_transform(X_train)
X_test= norm.transform( X_test)


Loading data from file ...
done 

Removing rows with missing data ...
done 

Partitioning data into parts: formative (for development) and summative (for testing) ...
done 



# <font color='red'>Part 1</font> Train and tune the MLPClassifier



* **First Combination**


In [7]:
# Design the classifier neural network
mlp_1 = MLPClassifier(hidden_layer_sizes=(50,), # one hidden layer with 50 neurons
                    activation = 'relu',  # ReLU is the default option
                    # solver='adam',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

# Train the classifier

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    mlp_1.fit(X_train, y_train)

print("Training set score: %f" % mlp_1.score(X_train, y_train))
print('Done')




Iteration 1, loss = 1.30755799
Iteration 2, loss = 1.12818167
Iteration 3, loss = 1.08311290
Iteration 4, loss = 1.08268734
Iteration 5, loss = 1.05676321
Iteration 6, loss = 1.06623657
Iteration 7, loss = 1.04764248
Iteration 8, loss = 1.04253996
Iteration 9, loss = 1.02248645
Iteration 10, loss = 1.02081959
Training set score: 0.547984
Done


* **Second Combination**


In [8]:
# Design the classifier neural network
mlp_2 = MLPClassifier(hidden_layer_sizes=(70,10), # one hidden layer with 50 neurons
                    activation = 'tanh',  # ReLU is the default option
                    # solver='adam',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

# Train the classifier

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    mlp_2.fit(X_train, y_train)

print("Training set score: %f" % mlp_2.score(X_train, y_train))
print('Done')



Iteration 1, loss = 1.25717287
Iteration 2, loss = 1.13190938
Iteration 3, loss = 1.10248209
Iteration 4, loss = 1.08688077
Iteration 5, loss = 1.07586170
Iteration 6, loss = 1.06316864
Iteration 7, loss = 1.05434857
Iteration 8, loss = 1.06500985
Iteration 9, loss = 1.05289737
Iteration 10, loss = 1.07007771
Training set score: 0.564829
Done


* **Third Combination**
two hidden layers, 100 neurons, activation =relu, learning rate=1e-4

In [9]:
# Design the classifier neural network
mlp_3 = MLPClassifier(hidden_layer_sizes=(60,3), # one hidden layer with 50 neurons
                    activation = 'identity',  # ReLU is the default option
                    # solver='adam',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.1 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=10,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

# Train the classifier

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    mlp_3.fit(X_train, y_train)

print("Training set score: %f" % mlp_3.score(X_train, y_train))
print('Done')



Iteration 1, loss = 1.35094347
Iteration 2, loss = 1.17126506
Iteration 3, loss = 1.13445636
Iteration 4, loss = 1.11696921
Iteration 5, loss = 1.12174861
Iteration 6, loss = 1.12211773
Iteration 7, loss = 1.11820783
Iteration 8, loss = 1.11775899
Iteration 9, loss = 1.11723379
Iteration 10, loss = 1.12026474
Training set score: 0.521184
Done


#  <font color='red'>Part 2</font>

* **Study and describe the performance impact of varying at least three different combinations of optimizer parameter values**

Here we choose the second combination

In [10]:
# Design the classifier neural network
mlp = MLPClassifier(hidden_layer_sizes=(70,10), # one hidden layer with 50 neurons
                    activation = 'tanh',  # ReLU is the default option
                    # solver='sgd',  # default is Adam
                    alpha=1e-4,  # regulariztion parameter, set to default=0.0001 (increase up to 1.0 for stronger regularization)
                    learning_rate_init=.01 ,  # initial step-size for updating the weights, default is 0.001
                    max_iter=50,  # number of epochs, default=200
                    random_state=42,
                    verbose=10, 
                    )

# Train the classifier

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    mlp.fit(X_train, y_train)

print("Training set score: %f" % mlp.score(X_train, y_train))
print('Done')



Iteration 1, loss = 1.29980690
Iteration 2, loss = 1.09971481
Iteration 3, loss = 1.07319292
Iteration 4, loss = 1.05663549
Iteration 5, loss = 1.04281709
Iteration 6, loss = 1.03171139
Iteration 7, loss = 1.02828198
Iteration 8, loss = 1.02073831
Iteration 9, loss = 1.01678975
Iteration 10, loss = 1.00534610
Iteration 11, loss = 0.99550122
Iteration 12, loss = 0.99183760
Iteration 13, loss = 0.98713338
Iteration 14, loss = 0.98308534
Iteration 15, loss = 0.97637727
Iteration 16, loss = 0.97130720
Iteration 17, loss = 0.97004320
Iteration 18, loss = 0.96036115
Iteration 19, loss = 0.95053406
Iteration 20, loss = 0.93995578
Iteration 21, loss = 0.93865588
Iteration 22, loss = 0.93129942
Iteration 23, loss = 0.92587846
Iteration 24, loss = 0.92078958
Iteration 25, loss = 0.91836592
Iteration 26, loss = 0.91942653
Iteration 27, loss = 0.90897057
Iteration 28, loss = 0.90020841
Iteration 29, loss = 0.89667108
Iteration 30, loss = 0.89626852
Iteration 31, loss = 0.89488263
Iteration 32, los

Here we set smaller learning which requires more training epochs. As we can see, loss values is decreasing smoothly and effectively.

#  <font color='red'>Part 3</font>

* **Test the performance of the best MLPClassifier**



In [18]:

y_predicted = mlp.predict(X_test)   # use the trained classifier to predict on the test set

print('\n clasification report:\n', classification_report(y_test, y_predicted))  # compare predictions with ground truth

print('\n')        
print('done \n')


 clasification report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.43      0.24      0.31        25
           5       0.58      0.65      0.61       291
           6       0.57      0.59      0.58       432
           7       0.52      0.48      0.50       192
           8       0.50      0.17      0.26        35
           9       0.00      0.00      0.00         0

    accuracy                           0.56       980
   macro avg       0.37      0.31      0.32       980
weighted avg       0.55      0.56      0.55       980



done 



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#  <font color='red'>Part 4</font>

* **Train and tune a different classifier that is not a neural network**

We are using Random Forest Classifier.

In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

scoring = 'f1_macro'


# Random Forest
print('Tuning model...')
selected_model = RandomForestClassifier()
hyperparameters = {'max_depth':[None, 3, 4, 5], 'criterion':['gini', 'entropy'], 'n_estimators':[10, 50] }
clf = GridSearchCV(selected_model, hyperparameters, cv=5, scoring=scoring)
clf.fit(X_train, y_train)
print("Best hyperparameters found on development set for Random Forest:")
print(clf.best_params_)
tuned_model_RF = clf.best_estimator_

y_pred = tuned_model_RF.predict(X_test)
print( 'f1_score is')
print( f1_score(y_test, y_pred, average='macro') )




Tuning model...
Best hyperparameters found on development set for Random Forest:
{'criterion': 'entropy', 'max_depth': None, 'n_estimators': 50}
f1_score is
0.506057317549482


In [20]:
from sklearn.pipeline import make_pipeline

seed = 42, 

# Make predictions on test dataset

selected_model = make_pipeline( RandomForestClassifier(criterion= 'entropy', max_depth=None, n_estimators=50) ) 
selected_model.fit(X_train, y_train)
predictions = selected_model.predict(X_test)
print(" ++++ Detailed classification report for the selected model ++++ " )
print("Algorithm: %s " % selected_model)
print("This model was trained and tuned on the development set using CV.")
print("The following results are computed on the separate test set:")
#
predictions = selected_model.predict(X_test)

#
print('\n clasification report:\n', classification_report(y_test, predictions))
print('Cohen Kappa Score:', cohen_kappa_score(y_test, predictions))
print('\n')        
print('done \n')


 ++++ Detailed classification report for the selected model ++++ 
Algorithm: Pipeline(steps=[('randomforestclassifier',
                 RandomForestClassifier(criterion='entropy', n_estimators=50))]) 
This model was trained and tuned on the development set using CV.
The following results are computed on the separate test set:

 clasification report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.57      0.32      0.41        25
           5       0.70      0.67      0.68       291
           6       0.64      0.77      0.70       432
           7       0.74      0.57      0.64       192
           8       0.94      0.43      0.59        35

    accuracy                           0.67       980
   macro avg       0.60      0.46      0.50       980
weighted avg       0.68      0.67      0.67       980

Cohen Kappa Score: 0.49582417793762


done 



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Based on accuracy, Random Forest is a better choice than neural networks. 

Random Forest is the ensemble of decision trees. Each decision tree processes the data and predicts the label. Decision trees in the ensemble are independent, so each can predict the final response. But neural network is made of connected neurons. The neurons cannot operate without other neurons. It process data in each layer and pass forward to the next layers. The last layer of neurons is making decisions.