[](http://)<h1>Red wine quality classifier using deep feedforward network</h1>

Data provided by the UCI:
     https://archive.ics.uci.edu/ml/datasets/wine+quality 

<h3>Imports</h3>

Libraries import

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.preprocessing import StandardScaler, LabelEncoder

Data import

In [None]:
wine_data = pd.read_csv('../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv')
display(wine_data.head())

In [None]:
display(wine_data.describe())
wine_data['quality'].value_counts()

<h3>Handling misssing data</h3>

In [None]:
wine_data.isnull().sum()

<h2>Exploratory Data Analysis</h2>

<h4>Correlation matrix</h4>

In [None]:
fig , ax = plt.subplots(figsize = (10,7))
sb.heatmap(wine_data.corr() , annot = True , ax = ax)
plt.show()

<h4>Scatter plot: Fixed acidity - Density</h4>

In [None]:
plt.figure(figsize = (10 , 7))
sb.scatterplot(x = "fixed acidity", y = "density", hue = "quality",data = wine_data, alpha = 0.7, palette = "Set1")

<h2>Data preprocessing</h2>

<h3>Features reduction</h3>

<h4>Using random forrest to analyse the feature importance</h4>

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=1, max_depth=12)
x = wine_data.drop(['quality'] , axis = 1)
wd = pd.get_dummies(wine_data)
model.fit(x, wine_data.quality)
display(model.feature_importances_)

In [None]:
features = wd.columns
importances = model.feature_importances_
indices = np.argsort(importances)[:]
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()

<h4>Removing the least important feautres according to the random forrest results</h4>

In [None]:
del x['fixed acidity']
del x['free sulfur dioxide']
del x['citric acid']

<h4>Encoding the quality</h4>

In [None]:
le = LabelEncoder()
y = le.fit_transform(wine_data.iloc[: , -1])
y = pd.DataFrame(y.reshape(len(y),1))

### Data Over sampling using SMOTE

In [None]:
from imblearn.over_sampling import SMOTE

strategy = {0:1700, 1:1700, 2:1700, 3:1700, 4:1700, 5:1700}
oversample = SMOTE(sampling_strategy=strategy)
x, y = oversample.fit_resample(x, y)

In [None]:
x.shape

<h4>Spliting data</h4>

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

<h4>Transforming quality to categorical data</h4>

In [None]:
y_train_cat = tf.keras.utils.to_categorical(y_train, 6)
y_test_cat = tf.keras.utils.to_categorical(y_test, 6)

<h4>Scaling features</h4>

In [None]:
sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

<h2>Machine learning modeling</h2>

<h3>Artificial neural network</h3>

In [None]:
ann = tf.keras.models.Sequential(layers = None , name = None)

In [None]:
ann.add(tf.keras.layers.Input(shape = 8,))

In [None]:
ann.add(tf.keras.layers.Dense(units = 16 , activation = "relu" ))

In [None]:
ann.add(tf.keras.layers.Dense(units = 8 , activation = "relu" ))

In [None]:
ann.add(tf.keras.layers.Dense(units = 6 , activation = "sigmoid"))

In [None]:
ann.summary()

In [None]:
ann.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' ,metrics= ['accuracy'])

In [None]:
history = ann.fit(x_train, y_train_cat,  batch_size= 32, epochs = 150 , validation_data = (x_test,y_test_cat))

In [None]:
plt.plot(history.history['loss'], label='MAE training data')
plt.plot(history.history['val_loss'], label='MAE validation data')
plt.legend()
plt.title('MAE for model')
plt.ylabel('MAE')
plt.xlabel('epoch')
plt.show()

In [None]:
plt.plot(history.history['accuracy'], label='Accuracy training data')
plt.plot(history.history['val_accuracy'], label='Accuracy validation data')
plt.legend()
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('epoch')
plt.show()