In [23]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(32)
tf.random.set_seed(32)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

In [24]:
import pandas as pd

wine = pd.read_csv('../../DL/data/winequality-red.csv')
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [25]:
from sklearn.utils import shuffle

wine = shuffle(wine)

In [26]:
wine.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1599 entries, 348 to 727
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 162.4 KB


In [27]:
wine.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [28]:
X = wine.drop(['quality'], axis=1).copy()
X.shape

(1599, 11)

In [29]:
X

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
348,9.6,0.560,0.31,2.8,0.089,15.0,46.0,0.99790,3.11,0.92,10.0
423,10.5,0.240,0.47,2.1,0.066,6.0,24.0,0.99780,3.15,0.90,11.0
851,9.3,0.430,0.44,1.9,0.085,9.0,22.0,0.99708,3.28,0.55,9.5
754,7.8,0.480,0.68,1.7,0.415,14.0,32.0,0.99656,3.09,1.06,9.1
1260,8.6,0.635,0.68,1.8,0.403,19.0,56.0,0.99632,3.02,1.15,9.3
...,...,...,...,...,...,...,...,...,...,...,...
88,9.3,0.390,0.44,2.1,0.107,34.0,125.0,0.99780,3.14,1.22,9.5
892,10.1,0.450,0.23,1.9,0.082,10.0,18.0,0.99774,3.22,0.65,9.3
1334,7.2,0.835,0.00,2.0,0.166,4.0,11.0,0.99608,3.39,0.52,10.0
1579,6.2,0.560,0.09,1.7,0.053,24.0,32.0,0.99402,3.54,0.60,11.3


In [30]:
y = wine['quality'].copy()
y

348     6
423     7
851     5
754     6
1260    5
       ..
88      5
892     6
1334    5
1579    5
727     5
Name: quality, Length: 1599, dtype: int64

In [57]:
wine.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [32]:
mean = X.mean(axis=0)
X -= mean
std = X.std(axis=0)
X /= std

In [82]:
from tensorflow.keras import models
from tensorflow.keras import layers

def build_model():
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=(11,)))
#     model.add(layers.BatchNormalization())
    model.add(layers.Dense(300, activation='selu'))
#     model.add(layers.BatchNormalization())
    model.add(layers.Dense(200,activation='selu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(100,activation='selu'))
#     model.add(layers.BatchNormalization()) 
#     model.add(layers.Dense(50,activation='selu'))
#     model.add(layers.BatchNormalization()) 
    model.add(layers.Dense(25,activation='selu'))
#     model.add(layers.BatchNormalization()) 
#     model.add(layers.Dense(10,activation='selu'))
    model.add(layers.BatchNormalization()) 
    
    model.add(layers.Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

In [83]:
import numpy as np
np.random.seed(30)
tf.random.set_seed(30)
k = 10
num_val_samples = len(X) // k
num_epochs = 100
all_scores = []
for i in range(k):
    print('processing fold #', i)

    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = y[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [y[:i * num_val_samples],
         y[(i + 1) * num_val_samples:]],
        axis=0)

    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
              epochs=num_epochs, verbose=0)  
#     history = model.fit(partial_train_data, partial_train_targets,
#               epochs=num_epochs, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mse)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [84]:
all_scores

[0.43549591302871704,
 0.4886320233345032,
 0.4025425910949707,
 0.6617569923400879,
 0.34193775057792664,
 0.489292174577713,
 0.47512516379356384,
 0.3906455338001251,
 0.3407684564590454,
 0.36866626143455505]

In [85]:
np.mean(all_scores)

0.43948628604412077

In [45]:
all_scores

[0.4057203531265259,
 0.44418415427207947,
 0.41822949051856995,
 0.565175473690033,
 0.35641518235206604,
 0.39904069900512695,
 0.449663370847702,
 0.34040775895118713,
 0.33619481325149536,
 0.3624160587787628]

In [46]:
np.mean(all_scores)

0.40774473547935486

In [15]:
np.mean(all_scores)

0.3978750467300415

In [21]:
all_scores

[0.36825162172317505,
 0.3499695658683777,
 0.5142965316772461,
 0.4187380373477936,
 0.4360085725784302,
 0.46195197105407715,
 0.40259143710136414,
 0.48936980962753296,
 0.43150272965431213,
 0.39034610986709595]

In [22]:
np.mean(all_scores)

0.4263026386499405

In [42]:
all_scores

[0.3802739381790161,
 0.3433116376399994,
 0.4376659393310547,
 0.409509539604187,
 0.390725314617157,
 0.4434363842010498,
 0.4223284423351288,
 0.467769056558609,
 0.35952916741371155,
 0.3521803319454193]

In [43]:
np.mean(all_scores)

0.40067297518253325

In [21]:
np.mean(all_scores)

0.4026421278715134

In [89]:
np.mean(all_scores)

0.3965066224336624