In [145]:
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [146]:
#load dataset
wine_data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",sep =";")
wine_data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [147]:
#Data exploration
keys = wine_data.keys()

#input keys
print("Interesting for input:" ,keys[:-1])

# target
print("Target value:", keys[-1])

Interesting for input: Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol'],
      dtype='object')
Target value: quality


In [148]:
# Split dataset
full_size = len(wine_data)

train_size = int(0.7 * full_size)
valid_size = int(0.15 * full_size)

train_ds, validate_ds, test_ds = \
              np.split(wine_data.sample(frac=1, random_state=42), 
                       [train_size,train_size + valid_size])
              

In [149]:
# seperate lables from input
train_label = train_ds["quality"]
train_input = train_ds.drop("quality", axis = 1)

test_label = test_ds["quality"]
test_input = test_ds.drop("quality", axis = 1)

validate_label = validate_ds["quality"]
validate_input = validate_ds.drop("quality", axis = 1)

In [150]:
# default threshold everything above 5 is good 
def make_binary(target, threshold = 5):
  return int(target > threshold)


In [151]:
print(train_input.shape)

(1119, 11)


In [152]:
# Build tensorflow dataset
training_data =tf.data.Dataset.from_tensor_slices((train_input, train_label))
testing_data = tf.data.Dataset.from_tensor_slices((test_input, test_label))
validation_data = tf.data.Dataset.from_tensor_slices((validate_input, validate_label))


In [153]:
def prepare_data(dataset):
  dataset = dataset.map(lambda inputs , target: (inputs, make_binary(target)))
  dataset = dataset.batch(20).prefetch(20)
  return dataset

In [154]:
train_data = prepare_data(training_data)
test_data = prepare_data(testing_data)
validation_data = prepare_data(validation_data)

In [155]:
for input, target in train_data.take(1):
  print(input)
  print(target)

tf.Tensor(
[[7.7000e+00 5.6000e-01 8.0000e-02 2.5000e+00 1.1400e-01 1.4000e+01
  4.6000e+01 9.9710e-01 3.2400e+00 6.6000e-01 9.6000e+00]
 [7.8000e+00 5.0000e-01 1.7000e-01 1.6000e+00 8.2000e-02 2.1000e+01
  1.0200e+02 9.9600e-01 3.3900e+00 4.8000e-01 9.5000e+00]
 [1.0700e+01 6.7000e-01 2.2000e-01 2.7000e+00 1.0700e-01 1.7000e+01
  3.4000e+01 1.0004e+00 3.2800e+00 9.8000e-01 9.9000e+00]
 [8.5000e+00 4.6000e-01 3.1000e-01 2.2500e+00 7.8000e-02 3.2000e+01
  5.8000e+01 9.9800e-01 3.3300e+00 5.4000e-01 9.8000e+00]
 [6.7000e+00 4.6000e-01 2.4000e-01 1.7000e+00 7.7000e-02 1.8000e+01
  3.4000e+01 9.9480e-01 3.3900e+00 6.0000e-01 1.0600e+01]
 [7.2000e+00 4.1000e-01 3.0000e-01 2.1000e+00 8.3000e-02 3.5000e+01
  7.2000e+01 9.9700e-01 3.4400e+00 5.2000e-01 9.4000e+00]
 [7.7000e+00 5.4000e-01 2.6000e-01 1.9000e+00 8.9000e-02 2.3000e+01
  1.4700e+02 9.9636e-01 3.2600e+00 5.9000e-01 9.7000e+00]
 [7.0000e+00 7.8000e-01 8.0000e-02 2.0000e+00 9.3000e-02 1.0000e+01
  1.9000e+01 9.9560e-01 3.4000e+00 4.70

In [156]:
# Model
#Build model 
class MyModel(tf.keras.Model):
    
    def __init__(self):
        super(MyModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(64, activation=tf.nn.sigmoid)
        self.dense2 = tf.keras.layers.Dense(1000, activation=tf.nn.sigmoid)
        self.out = tf.keras.layers.Dense(1,activation=tf.nn.sigmoid)

    @tf.function
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.out(x)
        return x

In [157]:
@tf.function
def train_step(model, input, target, loss_function, optimizer):
  # loss_object and optimizer_object are instances of respective tensorflow classes
  with tf.GradientTape() as tape:
    prediction = model(input)
    loss = loss_function(target, prediction)
    gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return loss


def test(model, test_data, loss_function):
  # test over complete test data

  test_accuracy_aggregator = []
  test_loss_aggregator = []

  for (input, target) in test_data:
    prediction = model(input)
    sample_test_loss = loss_function(target, prediction)
    sample_test_accuracy = np.round(target,0) == np.round(prediction,0)
    sample_test_accuracy = np.mean(sample_test_accuracy)
    test_loss_aggregator.append(sample_test_loss.numpy())
    test_accuracy_aggregator.append(np.mean(sample_test_accuracy))

  test_loss = tf.reduce_mean(test_loss_aggregator)
  test_accuracy = tf.reduce_mean(test_accuracy_aggregator)

  return test_loss, test_accuracy

In [158]:
tf.keras.backend.clear_session()

train_dataset = train_data
test_dataset = test_data

### Hyperparameters
num_epochs = 10
learning_rate = 0.1

# Initialize the model.
model = MyModel()
# Initialize the loss: categorical cross entropy. Check out 'tf.keras.losses'.
cross_entropy_loss = tf.keras.losses.BinaryCrossentropy()
# Initialize the optimizer: SGD with default parameters. Check out 'tf.keras.optimizers'
optimizer = tf.keras.optimizers.SGD(learning_rate)

# Initialize lists for later visualization.
train_losses = []

test_losses = []
test_accuracies = []

#testing once before we begin
test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)

#check how model performs on train data once before we begin
train_loss, _ = test(model, train_dataset,cross_entropy_loss)
train_losses.append(train_loss)

# We train for num_epochs epochs.
for epoch in range(num_epochs):
    print(f'Epoch: {str(epoch)} starting with accuracy {test_accuracies[-1]}')

    #training (and checking in with training)
    epoch_loss_agg = []
    for i, (input,target) in enumerate(train_dataset):
        train_loss = train_step(model, input, target, cross_entropy_loss, optimizer)
        epoch_loss_agg.append(train_loss)
          
    #track training loss
    train_losses.append(tf.reduce_mean(epoch_loss_agg))

    #testing, so we can track accuracy and test loss
    test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

2021-11-18 13:40:36.675428: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 13:40:36.738977: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 13:40:36.954799: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 13:40:37.053438: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch: 0 starting with accuracy 0.5884615384615385


2021-11-18 13:40:37.266260: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch: 1 starting with accuracy 0.5884615384615385
Epoch: 2 starting with accuracy 0.5269230769230769
Epoch: 3 starting with accuracy 0.4115384615384615
Epoch: 4 starting with accuracy 0.4115384615384615
Epoch: 5 starting with accuracy 0.49615384615384617
Epoch: 6 starting with accuracy 0.5046153846153847
Epoch: 7 starting with accuracy 0.5169230769230769
Epoch: 8 starting with accuracy 0.5373076923076923
Epoch: 9 starting with accuracy 0.5465384615384615


In [159]:
# Fine tuning

In [160]:
# Visualization