In [104]:
import ast 
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.multiclass import OneVsOneClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix

In [46]:
%ls data/

[0m[01;32mred_test.csv[0m*  [01;32mred_train.csv[0m*  [01;32mwhite_test.csv[0m*  [01;32mwhite_train.csv[0m*  [34;42mwine[0m/


In [105]:
rtrain = pd.read_csv('data/red_train.csv')
wtrain = pd.read_csv('data/white_train.csv')

rtest = pd.read_csv('data/red_test.csv')
wtest = pd.read_csv('data/white_test.csv')

wx_test = wtest.iloc[:,:-2].to_numpy()
wy_test = wtest.iloc[:,-1].to_numpy()
rx_test = rtest.iloc[:,:-2].to_numpy()
ry_test = rtest.iloc[:,-1].to_numpy()

wx = wtrain.iloc[:,:-2].to_numpy()
wy = wtrain.iloc[:,-1].to_numpy()
rx = rtrain.iloc[:,:-2].to_numpy()
ry = rtrain.iloc[:,-1].to_numpy()

# Multi-layer Perceptron Classifier

### Red wine quality

In [106]:
clf = MLPClassifier()
clf.fit(rx, ry)
ry_pred = clf.predict(rx_test)
print('accuracy_score:   \t', accuracy_score(ry_pred, ry_test),
      '\nprecision_score:\t', precision_score(ry_pred, ry_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(ry_pred, ry_test, average='weighted'),
      '\nf1_score:       \t', f1_score(ry_pred, ry_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(ry_pred, ry_test))

accuracy_score:   	 0.5662878787878788 
precision_score:	 0.6049774685290771 
recall_score:   	 0.5662878787878788 
f1_score:       	 0.5824525900088132
confusion_matrix:
 [[  0   1   0   0   0   0]
 [  1   0   4   0   0   0]
 [  2  19 160  49   7   0]
 [  0   5  78 122  33   1]
 [  0   0   2  23  17   4]
 [  0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


### White wine quality

In [107]:
clf = MLPClassifier()
clf.fit(wx, wy)
wy_pred = clf.predict(wx_test)
print('accuracy_score:   \t', accuracy_score(wy_pred, wy_test),
      '\nprecision_score:\t', precision_score(wy_pred, wy_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(wy_pred, wy_test, average='weighted'),
      '\nf1_score:       \t', f1_score(wy_pred, wy_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(wy_pred, wy_test))

accuracy_score:   	 0.5528756957328386 
precision_score:	 0.6015721379070603 
recall_score:   	 0.5528756957328386 
f1_score:       	 0.5732238175676083
confusion_matrix:
 [[  0   0   0   0   0   0   0]
 [  0   4   0   1   0   0   0]
 [  2  30 280 156   6   0   0]
 [  3  16 178 473 163  33   1]
 [  3   2   8  90 137  28   0]
 [  0   0   2   1   0   0   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


# Support Vector Machine

### Red wine quality

In [90]:
clf = SVC(kernel='rbf')
clf.fit(rx, ry)
ry_pred = clf.predict(rx_test)
print('accuracy_score:   \t', accuracy_score(ry_pred, ry_test),
      '\nprecision_score:\t', precision_score(ry_pred, ry_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(ry_pred, ry_test, average='weighted'),
      '\nf1_score:       \t', f1_score(ry_pred, ry_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(ry_pred, ry_test))

accuracy_score:   	 0.6363636363636364 
precision_score:	 0.7106798580298775 
recall_score:   	 0.6363636363636364 
f1_score:       	 0.6672560477747453
confusion_matrix:
 [[  0   0   0   0   0   0]
 [  0   0   0   0   0   0]
 [  3  19 186  52   9   0]
 [  0   6  56 136  34   2]
 [  0   0   2   6  14   3]
 [  0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


### White wine quality

In [92]:
clf = SVC(kernel='rbf')
clf.fit(wx, wy)
wy_pred = clf.predict(wx_test)
print('accuracy_score:   \t', accuracy_score(wy_pred, wy_test),
      '\nprecision_score:\t', precision_score(wy_pred, wy_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(wy_pred, wy_test, average='weighted'),
      '\nf1_score:       \t', f1_score(wy_pred, wy_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(wy_pred, wy_test))

accuracy_score:   	 0.5609152752009895 
precision_score:	 0.7011262094712949 
recall_score:   	 0.5609152752009895 
f1_score:       	 0.6052349070645138
confusion_matrix:
 [[  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  1  32 270 128   3   0   0]
 [  7  19 196 572 238  55   1]
 [  0   1   2  21  65   6   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


# Gaussian Naive Bayes

### Red wine quality

In [66]:
clf = GaussianNB()
clf.fit(rx, ry)
ry_pred = clf.predict(rx_test)
print('accuracy_score:   \t', accuracy_score(ry_pred, ry_test),
      '\nprecision_score:\t', precision_score(ry_pred, ry_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(ry_pred, ry_test, average='weighted'),
      '\nf1_score:       \t', f1_score(ry_pred, ry_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(ry_pred, ry_test))

accuracy_score:   	 0.5132575757575758 
precision_score:	 0.49856987357092986 
recall_score:   	 0.5132575757575758 
f1_score:       	 0.5033352245167579
confusion_matrix:
 [[  0   0  11   6   0   0]
 [  1   5  12   2   3   0]
 [  2  14 139  48   7   0]
 [  0   6  71 102  21   1]
 [  0   0  11  36  25   4]
 [  0   0   0   0   1   0]]


### White wine quality

In [67]:
clf = GaussianNB()
clf.fit(wx, wy)
wy_pred = clf.predict(wx_test)
print('accuracy_score:   \t', accuracy_score(wy_pred, wy_test),
      '\nprecision_score:\t', precision_score(wy_pred, wy_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(wy_pred, wy_test, average='weighted'),
      '\nf1_score:       \t', f1_score(wy_pred, wy_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(wy_pred, wy_test))

accuracy_score:   	 0.46629560915275203 
precision_score:	 0.4824817382539501 
recall_score:   	 0.46629560915275203 
f1_score:       	 0.47116172611000834
confusion_matrix:
 [[  0   0   0   0   0   0   0]
 [  1  11  29  33   1   1   0]
 [  4  16 238 156  27  12   0]
 [  1  19 164 346 113  21   1]
 [  2   6  37 182 159  27   0]
 [  0   0   0   3   5   0   0]
 [  0   0   0   1   1   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


# Bernoulli Naive Bayes

### Red wine quality

In [72]:
clf = BernoulliNB()
clf.fit(rx, ry)
ry_pred = clf.predict(rx_test)
print('accuracy_score:   \t', accuracy_score(ry_pred, ry_test),
      '\nprecision_score:\t', precision_score(ry_pred, ry_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(ry_pred, ry_test, average='weighted'),
      '\nf1_score:       \t', f1_score(ry_pred, ry_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(ry_pred, ry_test))

accuracy_score:   	 0.5132575757575758 
precision_score:	 0.5520670974733143 
recall_score:   	 0.5132575757575758 
f1_score:       	 0.5276999683664018
confusion_matrix:
 [[  0   0   0   0   0   0]
 [  0   0   0   0   0   0]
 [  3  21 157  69  11   0]
 [  0   4  68  85  17   0]
 [  0   0  19  40  29   5]
 [  0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


### White wine quality

In [100]:
clf = BernoulliNB()
clf.fit(wx, wy)
wy_pred = clf.predict(wx_test)
print('accuracy_score:   \t', accuracy_score(wy_pred, wy_test),
      '\nprecision_score:\t', precision_score(wy_pred, wy_test, average='weighted'),
      '\nrecall_score:   \t', recall_score(wy_pred, wy_test, average='weighted'),
      '\nf1_score:       \t', f1_score(wy_pred, wy_test, average='weighted'))
print('confusion_matrix:\n',confusion_matrix(wy_pred, wy_test))

accuracy_score:   	 0.4489795918367347 
precision_score:	 0.4827024027072978 
recall_score:   	 0.4489795918367347 
f1_score:       	 0.4640206801995971
confusion_matrix:
 [[  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  2  22 239 186  45  12   0]
 [  3  20 173 357 131  28   1]
 [  3  10  56 178 130  21   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
