In [None]:
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, Normalization
from keras.callbacks import EarlyStopping, TensorBoard

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os


In [6]:
winequality_red = pd.read_csv(
      "data/winequality-red.csv", 
      names=[
            "Fixed acidity",
            "Volatile acidity",
            "Citric acid",
            "Residual sugar",
            "Chlorides",
            "Free sulfur dioxide",
            "Total sulfur dioxide",
            "Density",
            "pH",
            "Sulphates",
            "Alcohol",
            "Quality",
      ]
)

winequality_red

Unnamed: 0,Fixed acidity,Volatile acidity,Citric acid,Residual sugar,Chlorides,Free sulfur dioxide,Total sulfur dioxide,Density,pH,Sulphates,Alcohol,Quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [7]:
winequality_red.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Fixed acidity         1599 non-null   float64
 1   Volatile acidity      1599 non-null   float64
 2   Citric acid           1599 non-null   float64
 3   Residual sugar        1599 non-null   float64
 4   Chlorides             1599 non-null   float64
 5   Free sulfur dioxide   1599 non-null   float64
 6   Total sulfur dioxide  1599 non-null   float64
 7   Density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   Sulphates             1599 non-null   float64
 10  Alcohol               1599 non-null   float64
 11  Quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [8]:
winequality_red["Quality"].value_counts()

Quality
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

In [9]:
winequality_red_quality_conditions = [
    (winequality_red['Quality'] < 6),
    (winequality_red['Quality'] == 6),
    (winequality_red['Quality'] > 6),
]

winequality_red_quality_values = [0, 1, 2] # ['Bad', 'Great', 'Good']

winequality_red["Quality"] = np.select(winequality_red_quality_conditions, winequality_red_quality_values)


In [13]:
winequality_red["Quality"].value_counts()

Quality
0    744
1    638
2    217
Name: count, dtype: int64

In [10]:
winequality_red_array_features = np.array(winequality_red.drop('Quality', axis=1))

winequality_red_array_features, winequality_red_array_features.shape

(array([[ 7.4  ,  0.7  ,  0.   , ...,  3.51 ,  0.56 ,  9.4  ],
        [ 7.8  ,  0.88 ,  0.   , ...,  3.2  ,  0.68 ,  9.8  ],
        [ 7.8  ,  0.76 ,  0.04 , ...,  3.26 ,  0.65 ,  9.8  ],
        ...,
        [ 6.3  ,  0.51 ,  0.13 , ...,  3.42 ,  0.75 , 11.   ],
        [ 5.9  ,  0.645,  0.12 , ...,  3.57 ,  0.71 , 10.2  ],
        [ 6.   ,  0.31 ,  0.47 , ...,  3.39 ,  0.66 , 11.   ]]),
 (1599, 11))

In [16]:
winequality_red_array_target = np.array(winequality_red['Quality'])

winequality_red_array_target, winequality_red_array_target.shape

(array([0, 0, 0, ..., 1, 0, 1]), (1599,))

In [18]:
winequality_red_array_features_train, winequality_red_array_features_test, winequality_red_array_target_train, winequality_red_array_target_test = train_test_split(winequality_red_array_features, winequality_red_array_target, test_size=0.2)

winequality_red_array_features_train.shape, winequality_red_array_features_test.shape, winequality_red_array_target_train.shape, winequality_red_array_target_test.shape

((1279, 11), (320, 11), (1279,), (320,))

In [23]:
winequality_red_train_normalize_layer = Normalization()

winequality_red_train_normalize_layer.adapt(winequality_red_array_features_train)

winequality_red_train_normalize_layer(winequality_red_array_features_train[:10])

<tf.Tensor: shape=(10, 11), dtype=float32, numpy=
array([[-0.77404505, -0.545863  ,  0.44607934, -0.6491378 ,  0.03100955,
        -0.18068899, -0.29179052, -0.33352524,  0.19939546, -0.56976354,
        -1.1413474 ],
       [ 0.42818028, -0.71314   ,  1.5358776 ,  1.3828769 , -0.15865558,
        -0.9483077 , -0.7989369 , -0.13494575, -0.1858418 , -0.51159567,
         1.987844  ],
       [ 2.3746397 , -0.88041705,  1.4320874 ,  0.02820036, -0.09543393,
         0.87478673,  0.63300586,  1.4645677 , -1.0205225 ,  0.9426019 ,
        -0.0034599 ],
       [-1.2892845 , -1.1592121 , -0.12476748, -0.17500108, -0.34832057,
         0.6828821 ,  0.33468446, -0.23690689,  0.71304667,  1.8151207 ,
         0.18618879],
       [ 2.0311465 , -0.8246581 ,  1.1207165 ,  0.09593428,  0.22067453,
        -0.37259367, -0.1426298 ,  1.9476274 , -0.9563163 , -0.27892393,
        -0.09828334],
       [ 1.2296635 , -0.76889914,  0.9650309 , -0.24273485, -0.26402497,
        -0.18068899, -0.5901119 , -0.