# Task for Today  

***

## White Wine Quality Prediction  

Given *data about various white wines*, let's try to predict the **quality** of a particular wine, according to experts.  
  
We will use a TensorFlow ANN to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/white-wine-quality/winequality-white.csv', delimiter=';')

In [None]:
data

# Visualization

In [None]:
corr = data.corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, vmin=-1.0, vmax=1.0)
plt.show()

# Preprocessing

In [None]:
data.info()

In [None]:
print("Total null values:", data.isna().sum().sum())

In [None]:
data['quality'].unique()

In [None]:
encoder = LabelEncoder()

data['quality'] = encoder.fit_transform(data['quality'])
{index: label for index, label in enumerate(encoder.classes_)}

In [None]:
y = data['quality']
X = data.drop('quality', axis=1)

In [None]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=34)

# Modeling and Training

In [None]:
num_features = X.shape[1]
print(num_features)

In [None]:
num_classes = len(y.unique())
print(num_classes)

In [None]:
inputs = tf.keras.Input(shape=(num_features,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


batch_size = 32
epochs = 100

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()]
)

# Results

In [None]:
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "Epoch", 'y': "Loss"},
    title="Loss Over Time"
)

fig.show()

In [None]:
model.evaluate(X_test, y_test)

# Re-structure the task:

Let's try quantile-splitting the outputs into two categories: high quality wines and low quality wines.

In [None]:
data['quality'].value_counts()

In [None]:
pd.qcut(data['quality'], q=2, labels=[0,1]).value_counts()

In [None]:
y = pd.qcut(data['quality'], q=2, labels=[0,1])
X = data.drop('quality', axis=1)

In [None]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=34)

In [None]:
inputs = tf.keras.Input(shape=(num_features,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


batch_size = 32
epochs = 100

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()]
)

In [None]:
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "Epoch", 'y': "Loss"},
    title="Loss Over Time"
)

fig.show()

In [None]:
model.evaluate(X_test, y_test)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/gybAf1Jv2M8