In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer

In [None]:
BASE_DIR = '../input/red-wine-quality-cortez-et-al-2009/'
red_wine = pd.read_csv(BASE_DIR + "winequality-red.csv")

In [None]:
red_wine.columns

In [None]:
features_num = ['fixed acidity', 'volatile acidity', 'citric acid',
       'residual sugar', 'chlorides', 'free sulfur dioxide',
       'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']
features_num

In [None]:
preprocessor = make_column_transformer(
    (StandardScaler(), features_num)
)

In [None]:
red_wine.head()

In [None]:
from tensorflow import keras
from tensorflow.keras import layers, callbacks


# Create training and validation splits
df_train = red_wine.sample(frac=0.7, random_state=0)
df_valid = red_wine.drop(df_train.index)

In [None]:
# Split features and target
X_train = df_train.drop('quality', axis=1)
X_valid = df_valid.drop('quality', axis=1)

X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

y_train = df_train['quality']
y_valid = df_valid['quality']

In [None]:
input_shape=X_train.shape[1]
print(input_shape)

In [None]:
X_train[:,0].mean() , X_train[:,0].std()

In [None]:
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

model = keras.Sequential([
    # the hidden ReLU layers
    layers.Dense(units=32, activation='relu', input_shape=[input_shape]),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(units=32, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(units=32, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    # the linear output layer 
    layers.Dense(units=1)
])

model.compile(
    optimizer='adam',
    loss='mae',
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=256,
    epochs=500,
    callbacks=[early_stopping], # put your callbacks in a list
    verbose=0,  # turn off training log
)

history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot();
print("Minimum validation loss: {}".format(history_df['val_loss'].min()))


In [None]:
sample_df = red_wine.iloc[67,:].drop(["quality"])
sample_df = pd.DataFrame(sample_df).T

In [None]:
sample_df

In [None]:
sample_df = preprocessor.transform(sample_df)

In [None]:
sample_df

In [None]:
model.predict(sample_df)

In [None]:
red_wine.iloc[67,:]

In [None]:
model.save("redwine_model.h5")

In [None]:
import pickle
pickle.dump(preprocessor, open("preprocessor.p", "wb"))