# Wine Quality classifier


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm  import SVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
# from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
%matplotlib inline 

In [None]:
wine  = pd.read_csv('../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv')
wine.head(10)


In [None]:
wine.info()
wine.isnull().sum() 

In [None]:
#Preprocessing Data
bins = (2, 6.5, 8)
group_names = ['bad', 'good']
wine['quality'] = pd.cut(wine['quality'], bins= bins, labels=group_names)
wine['quality'].unique()

In [None]:
label_quality = LabelEncoder()

In [None]:
wine['quality'] = label_quality.fit_transform(wine['quality'])

In [None]:
wine.head(10)

In [None]:
sns.countplot(wine['quality'])

In [None]:
#Now separate the data set as response variable and feature variable
X = wine.drop('quality', axis=1)
y = wine['quality']

In [None]:
#Train and test split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#apply standard scaling to get optimized results
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train[:11]

# 1. RANDOM FOREST CLASSIFIER

In [None]:
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train, y_train)
pred_rfc = rfc.predict(X_test)

In [None]:
#How well our model perform
print(classification_report(y_test, pred_rfc))
print(confusion_matrix(y_test,pred_rfc))

# 2. SVM CLASSIFIER

In [None]:
clf = svm.SVC()
clf.fit(X_train, y_train)
pred_clf = clf.predict(X_test)


In [None]:
#How well our model perform
print(classification_report(y_test, pred_clf))
print(confusion_matrix(y_test,pred_clf))

# 3. NEURAL NETWORKS

In [None]:
mlpc = MLPClassifier(hidden_layer_sizes=(11,11,11), max_iter=500)
mlpc.fit(X_train, y_train)
pred_mlpc = mlpc.predict(X_test)

In [None]:
#How well our model perform
print(classification_report(y_test, pred_mlpc))
print(confusion_matrix(y_test,pred_mlpc))

In [None]:
from sklearn.metrics import accuracy_score
cm = accuracy_score(y_test, pred_rfc)
cm 

In [None]:
#try new data
Xnew =  [[7.3, 0.58, 0.00, 2.0, 0.065, 15.0, 21.0, 0.9946, 3.36, 0.47, 10.0]]
Xnew = sc.transform(Xnew)
ynew = rfc.predict(Xnew)
ynew