In [13]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, mean_squared_error
import matplotlib.pyplot as plt
from neupy import algorithms, environment
%matplotlib inline

In [14]:
environment.reproducible()

In [15]:
df = pd.read_csv("winequality-white.csv")

In [16]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [17]:
df['quality'].value_counts()

6    2198
5    1457
7     880
8     175
4     163
3      20
9       5
Name: quality, dtype: int64

In [18]:
data = df.drop('quality',axis=1)
target = df['quality']

In [19]:
data_scaler = preprocessing.MinMaxScaler((-20,20))

In [20]:
data = data_scaler.fit_transform(data)

In [22]:
pd.DataFrame(data).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
count,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0
mean,-8.250817,-12.225838,-11.947193,-16.446985,-15.635329,-15.357758,-7.994371,-14.665605,-2.972122,-7.448982,-3.778922
std,3.245647,3.952727,2.91614,3.111692,2.593231,2.370333,3.944136,2.306464,5.490931,5.308178,7.939488
min,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0,-20.0
25%,-10.384615,-14.901961,-13.493976,-19.325153,-16.795252,-17.073171,-10.812065,-16.443031,-6.545455,-11.162791,-10.322581
50%,-8.461538,-12.941176,-12.289157,-17.177914,-15.964392,-15.54007,-8.399072,-14.887218,-3.272727,-8.372093,-4.516129
75%,-6.538462,-10.588235,-10.60241,-14.294479,-15.133531,-13.867596,-5.336427,-13.067284,0.363636,-4.651163,1.935484
max,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0


In [23]:
x_train, x_test, y_train, y_test = train_test_split( data, target, train_size=0.75, random_state = 123)

In [96]:
grnnet = algorithms.GRNN(std=1, verbose=True, step = 0.001)


Main information

[ALGORITHM] GRNN

[OPTION] verbose = True
[OPTION] epoch_end_signal = None
[OPTION] show_epoch = 1
[OPTION] shuffle_data = False
[OPTION] step = 0.001
[OPTION] train_end_signal = None
[OPTION] std = 1



In [97]:
grnnet.train(x_train, y_train)

In [98]:
y_predicted = grnnet.predict(x_test).astype(int)

In [99]:
y_predicted

array([[6],
       [6],
       [6],
       ..., 
       [5],
       [7],
       [4]])

In [100]:
print(confusion_matrix(y_test,y_predicted))

[[  0   0   3   1   0   0   0]
 [  0  12  20   4   2   0   0]
 [  0  25 290  46   1   0   0]
 [  0   9 266 281  15   3   0]
 [  0   3  31 122  40   2   0]
 [  0   0   1  19  26   0   0]
 [  0   0   1   1   1   0   0]]


In [101]:
print(classification_report(y_test,y_predicted))

             precision    recall  f1-score   support

          3       0.00      0.00      0.00         4
          4       0.24      0.32      0.28        38
          5       0.47      0.80      0.60       362
          6       0.59      0.49      0.54       574
          7       0.47      0.20      0.28       198
          8       0.00      0.00      0.00        46
          9       0.00      0.00      0.00         3

avg / total       0.50      0.51      0.48      1225



In [102]:
score = accuracy_score(y_test, y_predicted)
print("Validation accuracy: {:.2%}".format(score))

Validation accuracy: 50.86%


In [103]:
mse = mean_squared_error(y_test, pd.DataFrame(y_predicted))
print("MSE: {:.2%}".format(mse))

MSE: 72.98%


In [104]:
y_GRNN2 = pd.DataFrame(y_predicted)
y_GRNN2.to_excel('y_GRNN2.xlsx')