In [134]:
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, Normalization
from keras.callbacks import EarlyStopping, TensorBoard

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os


# Підготовка даних

In [135]:
winequality_red = pd.read_csv(
      "data/winequality-red.csv", 
      names=[
            "Fixed acidity",
            "Volatile acidity",
            "Citric acid",
            "Residual sugar",
            "Chlorides",
            "Free sulfur dioxide",
            "Total sulfur dioxide",
            "Density",
            "pH",
            "Sulphates",
            "Alcohol",
            "Quality",
      ]
)

winequality_red

Unnamed: 0,Fixed acidity,Volatile acidity,Citric acid,Residual sugar,Chlorides,Free sulfur dioxide,Total sulfur dioxide,Density,pH,Sulphates,Alcohol,Quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [136]:
winequality_red.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Fixed acidity         1599 non-null   float64
 1   Volatile acidity      1599 non-null   float64
 2   Citric acid           1599 non-null   float64
 3   Residual sugar        1599 non-null   float64
 4   Chlorides             1599 non-null   float64
 5   Free sulfur dioxide   1599 non-null   float64
 6   Total sulfur dioxide  1599 non-null   float64
 7   Density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   Sulphates             1599 non-null   float64
 10  Alcohol               1599 non-null   float64
 11  Quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [137]:
winequality_red["Quality"].value_counts()

Quality
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

In [138]:
winequality_red_quality_conditions = [
    (winequality_red['Quality'] < 6),
    (winequality_red['Quality'] == 6),
    (winequality_red['Quality'] > 6),
]

winequality_red_quality_values = [0, 1, 2] # ['Bad', 'Great', 'Good']

winequality_red["Quality"] = np.select(winequality_red_quality_conditions, winequality_red_quality_values)


In [139]:
winequality_red["Quality"].value_counts()

Quality
0    744
1    638
2    217
Name: count, dtype: int64

In [140]:
winequality_red_array_features = np.array(winequality_red.drop('Quality', axis=1))

winequality_red_array_features, winequality_red_array_features.shape

(array([[ 7.4  ,  0.7  ,  0.   , ...,  3.51 ,  0.56 ,  9.4  ],
        [ 7.8  ,  0.88 ,  0.   , ...,  3.2  ,  0.68 ,  9.8  ],
        [ 7.8  ,  0.76 ,  0.04 , ...,  3.26 ,  0.65 ,  9.8  ],
        ...,
        [ 6.3  ,  0.51 ,  0.13 , ...,  3.42 ,  0.75 , 11.   ],
        [ 5.9  ,  0.645,  0.12 , ...,  3.57 ,  0.71 , 10.2  ],
        [ 6.   ,  0.31 ,  0.47 , ...,  3.39 ,  0.66 , 11.   ]]),
 (1599, 11))

In [141]:
winequality_red_array_target = np.array(winequality_red['Quality'])

winequality_red_array_target, winequality_red_array_target.shape

(array([0, 0, 0, ..., 1, 0, 1]), (1599,))

In [142]:
winequality_red_array_features_train, winequality_red_array_features_test, winequality_red_array_target_train, winequality_red_array_target_test = train_test_split(winequality_red_array_features, winequality_red_array_target, test_size=0.2)

winequality_red_array_features_train.shape, winequality_red_array_features_test.shape, winequality_red_array_target_train.shape, winequality_red_array_target_test.shape

((1279, 11), (320, 11), (1279,), (320,))

In [143]:
winequality_red_train_normalize_layer = Normalization()

winequality_red_train_normalize_layer.adapt(winequality_red_array_features_train)

In [144]:
winequality_red_train_normalize_layer(winequality_red_array_features_train[:5])

<tf.Tensor: shape=(5, 11), dtype=float32, numpy=
array([[-0.30553326,  1.9420916 , -0.05512348,  0.8927547 ,  0.4286379 ,
         1.4531912 ,  0.6088108 , -0.03651224, -0.31152722, -1.1563274 ,
        -1.1284736 ],
       [-0.19012372, -0.5906696 , -0.516262  , -0.37952232, -0.303999  ,
        -0.9585488 , -0.86318165,  0.23643786, -0.11751232, -0.28059137,
        -1.1284736 ],
       [-0.65176237,  0.73199445, -1.2335886 , -0.5208865 , -0.19625823,
        -0.8620792 , -1.0434256 , -0.46170616,  0.20584586, -1.5650042 ,
        -0.8493797 ],
       [-0.13241869,  1.1259798 , -0.2600739 , -0.23815818, -0.3255472 ,
        -0.4762008 , -0.17224643, -0.026031  ,  0.33518913, -0.6308857 ,
        -1.2215043 ],
       [ 2.5797076 ,  1.7732407 ,  1.8406684 , -0.09679403,  0.01922316,
        -0.2832616 , -0.3524904 ,  1.5487196 , -1.346275  , -0.3389737 ,
        -0.01209895]], dtype=float32)>

# Створення моделей

In [167]:
def make_model(*layers):
      model = Sequential([
            winequality_red_train_normalize_layer, 
            *layers
      ])
      model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

      return model

# Модель з прошарками DropOut

In [168]:
model_with_dropouts = make_model(
      Dense(128, activation='relu'),
      Dropout(0.5),
      Dense(64, activation='relu'),
      Dropout(0.5),
      Dense(1, activation='softmax')
)

# Модель без DropOut

In [169]:
model_simple = make_model(
      Dense(128, activation='relu'),
      Dense(64, activation='relu'),
      Dense(1, activation='softmax')
)

# Модель з іншою кількістю прошарків

In [170]:
model_more_layers = make_model(
      Dense(256, activation='relu'),
      Dense(128, activation='relu'),
      Dense(64, activation='relu'),
      Dense(32, activation='relu'),
      Dense(16, activation='relu'),
      Dense(8, activation='relu'),
      Dense(4, activation='relu'),
      Dense(2, activation='relu'),
      Dense(1, activation='softmax')
)

# Директорія для зберігання логів

In [171]:
log_dir = os.path.join("logs", "fit", "model")

os.makedirs(log_dir, exist_ok=True)

# Налаштування колбеків

In [172]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Навчання моделі з використанням колбеків

In [173]:
EPOCHS = 100

In [174]:
def fit_model(model, *callbacks):
      history = model.fit(
            winequality_red_array_features_train,
            winequality_red_array_target_train,
            epochs=EPOCHS,
            validation_data=(
                  winequality_red_array_features_test,
                  winequality_red_array_target_test
            ),
            callbacks=callbacks,
      )

      return history

In [195]:
models = [
      (model_with_dropouts, tensorboard, early_stopping),
      (model_with_dropouts, tensorboard),
      (model_simple, tensorboard, early_stopping),
      (model_simple, tensorboard),
      (model_more_layers, tensorboard, early_stopping),
      (model_more_layers, tensorboard),
]

In [197]:
histories = [fit_model(model_data[0], *model_data[1:]) for model_data in models]

Epoch 1/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4049 - loss: 0.0000e+00 - val_accuracy: 0.4031 - val_loss: 0.0000e+00
Epoch 2/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 894us/step - accuracy: 0.4050 - loss: 0.0000e+00 - val_accuracy: 0.4031 - val_loss: 0.0000e+00
Epoch 3/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870us/step - accuracy: 0.3943 - loss: 0.0000e+00 - val_accuracy: 0.4031 - val_loss: 0.0000e+00
Epoch 4/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 841us/step - accuracy: 0.4320 - loss: 0.0000e+00 - val_accuracy: 0.4031 - val_loss: 0.0000e+00
Epoch 5/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 873us/step - accuracy: 0.3879 - loss: 0.0000e+00 - val_accuracy: 0.4031 - val_loss: 0.0000e+00
Epoch 1/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4060 - loss: 0.0000e+00 - val_accuracy: 0.4031 -