In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

In [2]:
# Loading ipython-sql
%load_ext sql

In [3]:
# Importing create_engine function from sqlalchemy
from sqlalchemy import create_engine

In [4]:
# Connecting ipython-sql to PostgreSQL Database
%sql postgresql://postgres:pencil@localhost/wine_db

In [5]:
# Connecting sqlalchemy to PostgreSQL Database
engine = create_engine('postgresql://postgres:pencil@localhost/wine_db')

In [6]:
# Checking that we can pull data from PostgreSQL Database in cell below

In [7]:
%%sql

SELECT
    *
FROM white_wine_quality 
LIMIT 3

 * postgresql://postgres:***@localhost/wine_db
3 rows affected.


type,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulfates,alcohol,quality
white,7.0,0.27,0.36,20.7,0.045,45,170,1.001,3.0,0.45,8.8,6
white,6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5,6
white,8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1,6


In [8]:
# Formulating DataFrame from Database
white_wine_df = pd.read_sql('SELECT * FROM white_wine_quality', engine)
white_wine_df

Unnamed: 0,type,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulfates,alcohol,quality
0,white,7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8,6
1,white,6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5,6
2,white,8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1,6
3,white,7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9,6
4,white,7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4865,white,5.7,0.21,0.32,0.9,0.038,38,121,0.99074,3.24,0.46,10.6,6
4866,white,6.2,0.21,0.29,1.6,0.039,24,92,0.99114,3.27,0.5,11.2,6
4867,white,6.6,0.32,0.36,8,0.047,57,168,0.9949,3.15,0.46,9.6,5
4868,white,5.5,0.29,0.3,1.1,0.022,20,110,0.98869,3.34,0.38,12.8,7


In [9]:
# Checking for any empty spaces in our data
white_wine_df.isnull().sum()

type                    0
fixed_acidity           0
volatile_acidity        0
citric_acid             0
residual_sugar          0
chlorides               0
free_sulfur_dioxide     0
total_sulfur_dioxide    0
density                 0
ph                      0
sulfates                0
alcohol                 0
quality                 0
dtype: int64

In [10]:
white_wine_df.dtypes

type                    object
fixed_acidity           object
volatile_acidity        object
citric_acid             object
residual_sugar          object
chlorides               object
free_sulfur_dioxide     object
total_sulfur_dioxide    object
density                 object
ph                      object
sulfates                object
alcohol                 object
quality                 object
dtype: object

In [11]:
white_wine_df = white_wine_df.astype({"fixed_acidity": float, "volatile_acidity": float, "citric_acid": float, "residual_sugar": float, "chlorides": float, "free_sulfur_dioxide": float, "total_sulfur_dioxide": float, "density": float, "ph": float, "sulfates": float, "alcohol": float, "quality": int})
white_wine_df.dtypes

type                     object
fixed_acidity           float64
volatile_acidity        float64
citric_acid             float64
residual_sugar          float64
chlorides               float64
free_sulfur_dioxide     float64
total_sulfur_dioxide    float64
density                 float64
ph                      float64
sulfates                float64
alcohol                 float64
quality                   int32
dtype: object

In [12]:
# Drop the non-beneficial ID columns, 'residual_sugar', 'free_sulfur_dioxide', and 'ph'.
white_wine_df.drop(['residual_sugar', 'free_sulfur_dioxide', 'ph'], axis=1, inplace=True)

In [13]:
# Create Classification version of target variable
# Idea for classification inspired by Terence Shin on 
# towardsdatascience.com/predicting-wine-quality-with-several-classification-techniques-179038ea6434
white_wine_df['goodquality'] = [1 if x >= 7 else 0 for x in white_wine_df['quality']]

In [14]:
white_wine_df.head()

Unnamed: 0,type,fixed_acidity,volatile_acidity,citric_acid,chlorides,total_sulfur_dioxide,density,sulfates,alcohol,quality,goodquality
0,white,7.0,0.27,0.36,0.045,170.0,1.001,0.45,8.8,6,0
1,white,6.3,0.3,0.34,0.049,132.0,0.994,0.49,9.5,6,0
2,white,8.1,0.28,0.4,0.05,97.0,0.9951,0.44,10.1,6,0
3,white,7.2,0.23,0.32,0.058,186.0,0.9956,0.4,9.9,6,0
4,white,7.2,0.23,0.32,0.058,186.0,0.9956,0.4,9.9,6,0


In [15]:
# Split our preprocessed data into our features and target arrays
y= white_wine_df["goodquality"].values
X = white_wine_df.drop(columns=["goodquality", 'type'], axis=1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [16]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
number_hidden_nodes_layer1 =  50
number_hidden_nodes_layer2 = 25

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=number_hidden_nodes_layer1, activation="relu", input_dim=number_input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=number_hidden_nodes_layer2, activation="relu", input_dim=number_input_features))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 50)                500       
                                                                 
 dense_1 (Dense)             (None, 25)                1275      
                                                                 
 dense_2 (Dense)             (None, 1)                 26        
                                                                 
Total params: 1,801
Trainable params: 1,801
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [19]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [20]:
# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=20,callbacks=[cp_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 00005: saving model to checkpoints\weights.05.hdf5
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 00010: saving model to checkpoints\weights.10.hdf5
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 00015: saving model to checkpoints\weights.15.hdf5
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 00020: saving model to checkpoints\weights.20.hdf5
