In [279]:
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, Normalization
from keras.callbacks import EarlyStopping, TensorBoard

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize

import pandas as pd
import numpy as np
import os


In [280]:
winequality_red = pd.read_csv(
      "data/winequality-red.csv", 
      names=[
            "Fixed acidity",
            "Volatile acidity",
            "Citric acid",
            "Residual sugar",
            "Chlorides",
            "Free sulfur dioxide",
            "Total sulfur dioxide",
            "Density",
            "pH",
            "Sulphates",
            "Alcohol",
            "Quality",
      ]
)

winequality_red.head()

Unnamed: 0,Fixed acidity,Volatile acidity,Citric acid,Residual sugar,Chlorides,Free sulfur dioxide,Total sulfur dioxide,Density,pH,Sulphates,Alcohol,Quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [281]:
winequality_red.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Fixed acidity         1599 non-null   float64
 1   Volatile acidity      1599 non-null   float64
 2   Citric acid           1599 non-null   float64
 3   Residual sugar        1599 non-null   float64
 4   Chlorides             1599 non-null   float64
 5   Free sulfur dioxide   1599 non-null   float64
 6   Total sulfur dioxide  1599 non-null   float64
 7   Density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   Sulphates             1599 non-null   float64
 10  Alcohol               1599 non-null   float64
 11  Quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [282]:
winequality_red["Quality"].value_counts()

Quality
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

In [284]:
winequality_red_quality_conditions = [
    (winequality_red['Quality'] < 6),
    (winequality_red['Quality'] > 6),
    (winequality_red['Quality'] == 6),
]

winequality_red_quality_values = [0, 1, 2] # ['Bad', 'Great', 'Good']

winequality_red["Quality"] = np.select(winequality_red_quality_conditions, winequality_red_quality_values)


In [285]:
winequality_red_array_features = np.array(winequality_red.drop('Quality', axis=1))

winequality_red_array_features, winequality_red_array_features.shape

(array([[ 7.4  ,  0.7  ,  0.   , ...,  3.51 ,  0.56 ,  9.4  ],
        [ 7.8  ,  0.88 ,  0.   , ...,  3.2  ,  0.68 ,  9.8  ],
        [ 7.8  ,  0.76 ,  0.04 , ...,  3.26 ,  0.65 ,  9.8  ],
        ...,
        [ 6.3  ,  0.51 ,  0.13 , ...,  3.42 ,  0.75 , 11.   ],
        [ 5.9  ,  0.645,  0.12 , ...,  3.57 ,  0.71 , 10.2  ],
        [ 6.   ,  0.31 ,  0.47 , ...,  3.39 ,  0.66 , 11.   ]]),
 (1599, 11))

In [286]:
winequality_red_array_target = np.array(winequality_red['Quality'])

winequality_red_array_target, winequality_red_array_target.shape

(array([0, 0, 0, ..., 2, 0, 2]), (1599,))

In [287]:
winequality_red_array_target_train, winequality_red_array_target_test, winequality_red_array_features_train, winequality_red_array_features_test = train_test_split(winequality_red_array_features, winequality_red_array_target, test_size=0.2)

winequality_red_array_target_train.shape,winequality_red_array_target_test.shape, winequality_red_array_features_train.shape, winequality_red_array_features_test.shape

((1279, 11), (320, 11), (1279,), (320,))

In [288]:
winequality_red_array_features_test

array([2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 2,
       0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 0, 2, 2, 2, 0, 0, 0,
       0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 1, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2,
       2, 0, 2, 2, 2, 2, 2, 0, 1, 0, 0, 2, 0, 1, 2, 2, 0, 2, 0, 2, 2, 0,
       1, 2, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 2, 0, 2, 2, 2, 0, 0, 0, 0,
       2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 0, 2, 2, 2, 1, 1, 0, 0, 2, 2, 2, 2,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 2, 2, 2, 0, 2, 1, 2, 0, 2, 0, 0, 0, 0,
       1, 0, 0, 2, 2, 2, 0, 2, 0, 0, 1, 0, 2, 0, 1, 2, 2, 2, 0, 0, 0, 0,
       0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 0, 1, 0, 1, 2, 2, 2, 0, 2,
       0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 1, 2, 1, 2, 0, 2, 0, 0, 0,
       2, 0, 0, 1, 2, 0, 2, 2, 0, 2, 0, 0, 1, 2, 1, 0, 2, 0, 2, 0, 0, 0,
       0, 0, 2, 1, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2,
       0, 1, 0, 2, 2, 0, 1, 2, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1,
       2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 0, 1, 2, 2,