In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical



# Loading the Dataset

df_data = pd.read_csv("winequality-red.csv")


In [None]:
df_data.tail()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1594,6.2,0.6,0.08,2.0,0.09,32.0,44.0,0.9949,3.45,0.58,10.5,5
1595,5.9,0.55,0.1,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5
1598,6.0,0.31,0.47,3.6,0.067,18.0,42.0,0.99549,3.39,0.66,11.0,6


In [None]:
df_data.shape

(1599, 12)

In [None]:
df_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [None]:
df_data.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [None]:
df_dataX = df_data.drop(columns=["quality"]).values  # Features
df_dataY = df_data["quality"].values  # Target



In [None]:
scaler = MinMaxScaler()
X_sc = scaler.fit_transform(df_dataX)


In [None]:
classification = True  # Set True for classification, False for regression

if classification:
    # Classification: One-hot encode target
    y1 = to_categorical(df_dataY - df_dataY.min())  # Ensure class indices start at 0

else:
    # Regression: Reshape target to (samples, 1)
    y1 = df_dataY.reshape(-1, 1)



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_sc, y1, test_size=0.2, random_state=42)

# Step 6: Reshape input for RNN
X_train = np.expand_dims(X_train, axis=1)  # Add timestep dimension
X_test = np.expand_dims(X_test, axis=1)


In [None]:
output_units = y_train.shape[1] if classification else 1
activation = 'softmax' if classification else 'linear'
loss = 'categorical_crossentropy' if classification else 'mse'

model = Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2])),  # Input shape: (timesteps, features)
    LSTM(64, return_sequences=False),  # Produce a single output at the last timestep
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(output_units, activation=activation)  # Output layer matches target shape
])

In [None]:
model.compile(
    optimizer='adam',
    loss=loss,
    metrics=['accuracy'] if classification else ['mae']
)

# Step 8: Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)


Epoch 1/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.2921 - loss: 1.7617 - val_accuracy: 0.4023 - val_loss: 1.6093
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4540 - loss: 1.5502 - val_accuracy: 0.4023 - val_loss: 1.3200
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4254 - loss: 1.3241 - val_accuracy: 0.4023 - val_loss: 1.2029
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4154 - loss: 1.2792 - val_accuracy: 0.4023 - val_loss: 1.1691
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4333 - loss: 1.2100 - val_accuracy: 0.4023 - val_loss: 1.1516
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4566 - loss: 1.1570 - val_accuracy: 0.4062 - val_loss: 1.1377
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━

In [None]:
# Step 9: Evaluate the model
results = model.evaluate(X_test, y_test)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5721 - loss: 0.9508 


In [None]:
loss, metric = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Metric: {metric}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5721 - loss: 0.9508 
Test Loss: 0.9594404101371765, Test Metric: 0.559374988079071
