<a href="https://colab.research.google.com/github/shoboske/wine-quality-deep-learning/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install ucimlrepo scikit-learn pandas numpy matplotlib tensorflow

In [4]:
from ucimlrepo import fetch_ucirepo
import numpy as np
from sklearn.model_selection import train_test_split

# fetch dataset
wine_quality = fetch_ucirepo(id=186)

# data (as pandas dataframes)
X = wine_quality.data.features
y = wine_quality.data.targets

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

scaler = StandardScaler()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Feature Extraction using SelectKBest and ANOVA F-value
selector = SelectKBest(f_classif, k=11) # Select top 5 features
X_train_selected = selector.fit_transform(X_train, y_train.values.ravel())
X_test_selected = selector.transform(X_test)

scaler.fit(X_train_selected)
X_train_selected = scaler.transform(X_train_selected)
X_test_selected = scaler.transform(X_test_selected)

# Create a simple neural network classifier
mlp = MLPClassifier(hidden_layer_sizes=(100,200), max_iter=1000, alpha=1e-4, activation='relu',
                    solver='adam', batch_size=50, early_stopping=True, validation_fraction=0.2,
                    random_state=None,learning_rate_init=.1)
mlp_tanh = MLPClassifier(hidden_layer_sizes=(500,500,500,500,500), max_iter=1000, alpha=1e-4, activation='relu',
                    solver='adam', batch_size=50, early_stopping=True, validation_fraction=0.2,
                    random_state=None,learning_rate_init=.1)

model_keras = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=X_train_selected.shape[1:]),
  tf.keras.layers.Dense(100, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(200, activation='relu'),
  tf.keras.layers.Dropout(0.3),
  tf.keras.layers.Dense(200, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(11),
  tf.keras.layers.Softmax()
])

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model_keras.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

# Train the model
mlp.fit(X_train_selected, y_train.values.ravel())
mlp_tanh.fit(X_train_selected, y_train.values.ravel())
model_keras.fit(X_train_selected, y_train.values.ravel(), epochs=50)

In [28]:
# Evaluate the model (you can use various metrics like accuracy, precision, recall, etc.)
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import pandas as pd

# Make predictions on the test set
y_pred = mlp.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0.0)
f1 = f1_score(y_test, y_pred, average='weighted')

y_pred_tanh = mlp_tanh.predict(X_test_selected)

accuracy_tanh = accuracy_score(y_test, y_pred_tanh)
recall_tanh = recall_score(y_test, y_pred_tanh, average='weighted')
precision_tanh = precision_score(y_test, y_pred_tanh, average='weighted', zero_division=0.0)
f1_tanh = f1_score(y_test, y_pred_tanh, average='weighted')

y_pred_keras = model_keras.predict(X_test_selected)
# Convert predictions to discrete class labels by selecting the index of the highest probability
y_pred_keras_classes = np.argmax(y_pred_keras, axis=1)  # Get class labels

accuracy_keras = accuracy_score(y_test, y_pred_keras_classes)
recall_keras = recall_score(y_test, y_pred_keras_classes, average='weighted')
precision_keras = precision_score(y_test, y_pred_keras_classes, average='weighted', zero_division=0.0)
f1_keras = f1_score(y_test, y_pred_keras_classes, average='weighted')

data = [
    ["MLP (100,200)", accuracy, recall, precision, f1],
    ["MLP (500,500,500,500,500)", accuracy_tanh, recall_tanh, precision_tanh, f1_tanh],
    ["Keras (100,200,200)", accuracy_keras, recall_keras, precision_keras, f1_keras]
]


headers = ["Model", "Accuracy", "Recall", "Precision", "F1 Score"]

print(pd.DataFrame(data, None, headers))

[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
                       Model  Accuracy    Recall  Precision  F1 Score
0              MLP (100,200)  0.525700  0.525700   0.402124  0.455455
1  MLP (500,500,500,500,500)  0.448138  0.448138   0.200828  0.277360
2        Keras (100,200,200)  0.567251  0.567251   0.552487  0.554665
