# Imports

In [162]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout
import multiprocessing

# Code

### Set Optimisation Settings

In [163]:
BATCH_SIZE = 20
PREFETCH_SIZE = 20

cpu_count = multiprocessing.cpu_count()

### Make Binary Function
Returns True if the target quality is higher than the entire dataset quality mean


In [164]:
# default threshold everything above 5 is good 
def make_binary(target, threshold = 5):
  return int(target > threshold)


### What keys are there and what should be input and target for our NN
#### Inputs
fixed acidity    
volatile acidity       
citric acid       
residual sugar       
chlorides       
free sulfur dioxide       
total sulfur dioxide       
density       
pH       
sulphates       
alcohol       

#### Target
quality     

### Prepare Dataset

In [165]:
# Download data
csv_file = tf.keras.utils.get_file('winequality-red.csv', 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv')

In [166]:
# Convert CSV to Tensorflow with pandas
df = pd.read_csv(csv_file, sep = ';')
print(df.dtypes)


df_target = df['quality']


fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object


### Split Dataset and pipe?

In [167]:
# Split dataset
full_size = len(df)

train_size = int(0.7 * full_size)
valid_size = int(0.15 * full_size)

train_ds, valid_ds, test_ds = \
              np.split(df.sample(frac=1, random_state=42), 
                       [train_size,train_size + valid_size])
              

In [168]:
# seperate lables from input
train_label = train_ds["quality"]
train_input = train_ds.drop("quality", axis = 1)

test_label = test_ds["quality"]
test_input = test_ds.drop("quality", axis = 1)

validate_label = valid_ds["quality"]
validate_input = valid_ds.drop("quality", axis = 1)

In [169]:
# Build tensorflow dataset
train_ds =tf.data.Dataset.from_tensor_slices((train_input, train_label))
test_ds = tf.data.Dataset.from_tensor_slices((test_input, test_label))
valid_ds = tf.data.Dataset.from_tensor_slices((validate_input, validate_label))

In [170]:
# Data pipeline
def prepare_data(dataset):
  # Create binary target values
  dataset = dataset.map(lambda inputs , target: (inputs, make_binary(target)))
  # Create batches for input and prefetch for better performance
  dataset = dataset.batch(BATCH_SIZE).prefetch(PREFETCH_SIZE)
  
  return dataset

In [171]:
# Apply data pipeline
train_ds = prepare_data(train_ds)
test_ds = prepare_data(test_ds)
valid_ds = prepare_data(valid_ds)

### Build the model

In [172]:
#custom model with custom layer
class CustomModel(tf.keras.Model):
    
    # Initialize model layers
    def __init__(self):
        #Inheret __init__ tf.keras.model
        super(CustomModel, self).__init__()

        # Input layer with sigmoid activation function and a L_1 + L_2 regularization (regularization factors recommended by Keras)
        self.layer1 = Dense(64, activation=tf.nn.sigmoid, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4), activity_regularizer=regularizers.l2(1e-5))
        # Add a dropout layer expecting a tensor of any rank, turning random inputs from layer1 to zero (Dropout rate = 0.5)
        self.layer2 = Dropout(0.5)
        # binary activation function for output
        self.out = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
    
    
    # cast the call-function as tf.function to increase efficiency
    @tf.function
    # pass the input through the layers of the network and return the output
    def call(self, inputs):
        x = self.layer1(inputs)
        # Activate training mode -> set some input to zero
        x = self.layer2(x, True)
        x = self.out(x)
        return x

In [173]:
# Delete old graph and start new training
tf.keras.backend.clear_session()

### Train Step

In [174]:
# compute the loss of an input for the model and optimize/tweak according the parameters
def train_step(model, input, target, loss_function, optimizer, print_w = False):

  # use tf.gradientTape to compute loss, then gradients and apply these to the model to modify the parameters
  with tf.GradientTape() as tape:
      prediction = model(input)
      loss = loss_function(target, prediction)
      gradients = tape.gradient(loss, model.trainable_variables)
  # Gradients get applied to individual weights    
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  # Print all weights of respective layer (here first one)
  if print_w == True:
    print(model.layers[0].weights)

  return loss


# compute the differences between or model prediction and the label, -> Supervision
def test(model, test_data, loss_function):
  # test over complete test data
  test_accuracy_aggregator = []
  test_loss_aggregator = []

  for (input, target) in test_data:
    prediction = model(input)
    sample_test_loss = loss_function(target, prediction)
    sample_test_accuracy =  np.round(target, 0) == np.round(prediction, 0)
    sample_test_accuracy = np.mean(sample_test_accuracy)
    test_loss_aggregator.append(sample_test_loss.numpy())
    test_accuracy_aggregator.append(np.mean(sample_test_accuracy))
    
  # for all input and computed losses get the mean of accuracy and loss and return them
  test_loss = tf.reduce_mean(test_loss_aggregator)
  test_accuracy = tf.reduce_mean(test_accuracy_aggregator)

  return test_loss, test_accuracy

### Training

In [175]:
# Predefine learning-rate and epochs
num_epochs = 15
alpha = 0.01

# Create a model
model = CustomModel()

# Define loss-function and optimizer
cross_entropy_loss = tf.keras.losses.BinaryCrossentropy()

# Collection of relevant optimizers from week 4
optimizer = tf.keras.optimizers.Adam(alpha)
#optimizer = tf.keras.optimizers.SGD(alpha)
#optimizer = tf.keras.optimizers.RMSprop(alpha)

# create empty arrays to store test/accuracy values, to track the network progress
train_losses = []
test_losses = []
test_accuracies = []

# get initial accuracy- and loss valus before training starts
test_loss, test_accuracy = test(model, test_ds, cross_entropy_loss)
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)

train_loss, _ = test(model, train_ds, cross_entropy_loss)
train_losses.append(train_loss)


# training loop
for epoch in range(num_epochs):
    # print accuracy of each epoch
    print(f'Epoch: {str(epoch)} starting with accuracy {str(test_accuracies[-1])}')
    
    loss_epoch = []
    # for all input, do a forwardstep and obtain loss
    for input, target in train_ds:
        train_loss = train_step(model, input, target, cross_entropy_loss, optimizer)
        loss_epoch.append(train_loss)
    # get the mean loss of this epoch by using reduce_sum of TF over all input-losses and appending to the array  
    train_losses.append(tf.reduce_mean(loss_epoch))
    
    # get the losses and accuracy of this epoch and store them
    test_loss, test_accuracy = test(model, test_ds, cross_entropy_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)
    
# print accuracy after 10 epochs
print(test_accuracies[-1])
          
    

Epoch: 0 starting with accuracy tf.Tensor(0.4465384615384615, shape=(), dtype=float64)
Epoch: 1 starting with accuracy tf.Tensor(0.545, shape=(), dtype=float64)
Epoch: 2 starting with accuracy tf.Tensor(0.5738461538461539, shape=(), dtype=float64)
Epoch: 3 starting with accuracy tf.Tensor(0.5734615384615385, shape=(), dtype=float64)
Epoch: 4 starting with accuracy tf.Tensor(0.5676923076923077, shape=(), dtype=float64)
Epoch: 5 starting with accuracy tf.Tensor(0.5715384615384616, shape=(), dtype=float64)
Epoch: 6 starting with accuracy tf.Tensor(0.5546153846153846, shape=(), dtype=float64)
Epoch: 7 starting with accuracy tf.Tensor(0.5684615384615385, shape=(), dtype=float64)
Epoch: 8 starting with accuracy tf.Tensor(0.5703846153846154, shape=(), dtype=float64)
Epoch: 9 starting with accuracy tf.Tensor(0.5700000000000001, shape=(), dtype=float64)
Epoch: 10 starting with accuracy tf.Tensor(0.5611538461538461, shape=(), dtype=float64)
Epoch: 11 starting with accuracy tf.Tensor(0.5676923076

### Gradient Optimization

# Validation