In [1]:
# Import Lib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
sns.set_style("whitegrid")

In [2]:
wine = pd.read_csv('winequality-red.csv')

In [3]:
wine.shape

(1599, 12)

In [4]:
wine.head(10)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
5,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5
6,7.9,0.6,0.06,1.6,0.069,15.0,59.0,0.9964,3.3,0.46,9.4,5
7,7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0,7
8,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,7
9,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,5


In [5]:
wine['quality'] = np.where(wine['quality']>5,"Good","Bad")


In [6]:
wine['quality'].head()

0     Bad
1     Bad
2     Bad
3    Good
4     Bad
Name: quality, dtype: object

In [7]:
wine.loc[(wine['quality']=='Bad'),'quality'] = 0.0
wine.loc[(wine['quality']=='Good'),'quality'] = 1.0
wine['quality'].head()

0    0
1    0
2    0
3    1
4    0
Name: quality, dtype: int64

In [8]:
wine.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
fixed acidity,1599.0,8.319637,1.741096,4.6,7.1,7.9,9.2,15.9
volatile acidity,1599.0,0.527821,0.17906,0.12,0.39,0.52,0.64,1.58
citric acid,1599.0,0.270976,0.194801,0.0,0.09,0.26,0.42,1.0
residual sugar,1599.0,2.538806,1.409928,0.9,1.9,2.2,2.6,15.5
chlorides,1599.0,0.087467,0.047065,0.012,0.07,0.079,0.09,0.611
free sulfur dioxide,1599.0,15.874922,10.460157,1.0,7.0,14.0,21.0,72.0
total sulfur dioxide,1599.0,46.467792,32.895324,6.0,22.0,38.0,62.0,289.0
density,1599.0,0.996747,0.001887,0.99007,0.9956,0.99675,0.997835,1.00369
pH,1599.0,3.311113,0.154386,2.74,3.21,3.31,3.4,4.01
sulphates,1599.0,0.658149,0.169507,0.33,0.55,0.62,0.73,2.0


In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix,mean_squared_error,r2_score,accuracy_score
import sklearn.metrics as metric
from sklearn.linear_model import LogisticRegression

In [10]:
df_pre = wine.copy()

In [11]:
X = df_pre.drop('quality',axis=1)
y = df_pre['quality']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.25,random_state=42)

In [13]:
X_train.shape,X_test.shape

((1199, 11), (400, 11))

# Standardization

In [14]:
scaler = StandardScaler()
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# MLP

In [26]:
mlp = MLPClassifier(hidden_layer_sizes=(11,16,10),max_iter=1000,activation='logistic')
mlp.fit(X_train,y_train)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(11, 16, 10), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [27]:
predictions = mlp.predict(X_test)

In [28]:
accuracy = accuracy_score(y_test,predictions)
err = mean_squared_error(y_test,predictions)
print("Accuracy: %.4f%%" % (accuracy * 100.0))
print("Mean Square Error: %.4f%%" % (err * 100.0))

Accuracy: 73.7500%
Mean Square Error: 26.2500%


# DecisionTreeClassifier

In [18]:
from sklearn.tree import DecisionTreeClassifier

In [19]:
dtree=DecisionTreeClassifier()
dtree.fit(X_train,y_train)
predictions = dtree.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
err = mean_squared_error(y_test,predictions)
print("Accuracy: %.4f%%" % (accuracy * 100.0))
print("Mean Square Error: %.4f%%" % (err * 100.0))

Accuracy: 72.2500%
Mean Square Error: 27.7500%


# XGBClassifier

In [20]:
from numpy import loadtxt
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
predictions = xgb.predict(X_test)

In [21]:
accuracy = accuracy_score(y_test, predictions)
err = mean_squared_error(y_test,predictions)
print("Accuracy: %.4f%%" % (accuracy * 100.0))
print("Mean Square Error: %.4f%%" % (err * 100.0))

Accuracy: 77.2500%
Mean Square Error: 22.7500%
