# Imports

In [61]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense
import multiprocessing

# Code

### Set Optimisation Settings

In [62]:
SHUFFLE_BUFFER = 500
BATCH_SIZE = 20
PREFETCH_SIZE = 20

cpu_count = multiprocessing.cpu_count()

### Make Binary Function
Returns True if the target quality is higher than the entire dataset quality mean


In [63]:
# default threshold everything above 5 is good 
def make_binary(target, threshold = 5):
  return int(target > threshold)


### What keys are there and what should be input and target for our NN
#### Inputs
fixed acidity    
volatile acidity       
citric acid       
residual sugar       
chlorides       
free sulfur dioxide       
total sulfur dioxide       
density       
pH       
sulphates       
alcohol       

#### Target
quality     

### Prepare Dataset

In [64]:
# Download data
csv_file = tf.keras.utils.get_file('winequality-red.csv', 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv')

In [65]:
# Convert CSV to Tensorflow with pandas
df = pd.read_csv(csv_file, sep = ';')
print(df.dtypes)

#tf.convert_to_tensor(df)

df_target = df['quality']


fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object


### Split Dataset and pipe?

In [66]:
# Split dataset
full_size = len(df)

train_size = int(0.7 * full_size)
valid_size = int(0.15 * full_size)

train_ds, valid_ds, test_ds = \
              np.split(df.sample(frac=1, random_state=42), 
                       [train_size,train_size + valid_size])
              

In [67]:
# seperate lables from input
train_label = train_ds["quality"]
train_input = train_ds.drop("quality", axis = 1)

test_label = test_ds["quality"]
test_input = test_ds.drop("quality", axis = 1)

validate_label = valid_ds["quality"]
validate_input = valid_ds.drop("quality", axis = 1)

In [68]:
# Build tensorflow dataset
train_ds =tf.data.Dataset.from_tensor_slices((train_input, train_label))
test_ds = tf.data.Dataset.from_tensor_slices((test_input, test_label))
valid_ds = tf.data.Dataset.from_tensor_slices((validate_input, validate_label))


In [69]:
def prepare_data(dataset):
  dataset = dataset.map(lambda inputs , target: (inputs, make_binary(target)))
  dataset = dataset.batch(BATCH_SIZE).prefetch(PREFETCH_SIZE)
  return dataset

In [70]:
train_ds = prepare_data(train_ds)
test_ds = prepare_data(test_ds)
valid_ds = prepare_data(valid_ds)

### Custom Layer

In [71]:
#custom layer

class CustomLayer(tf.keras.layers.Layer):
    # init func with customizable size and activation function, standard units=8, Activation = sigmoid
    def __init__(self, units=8, activation=tf.nn.sigmoid):
        super(CustomLayer, self).__init__()
        self.units = units
        self.activation = activation
        
    # build function to apply shape of input to layer when build, creating according weights and biases 
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                initializer='random_normal',
                                trainable=True)

        self.b = self.add_weight(shape=(self.units,), 
                              initializer='random_normal',
                              trainable=True)
    # when called return neuron output/drive by multiplying input with weights + bias and applying the activation function
    def call(self, inputs):
        x = tf.matmul(inputs, self.w) + self.b
        x = self.activation(x)
        return x
    

### Build the model

In [72]:
#custom model with custom layer
class CustomModel(tf.keras.Model):
    
    # 
    def __init__(self):
        super(CustomModel, self).__init__()
        self.layer1 = CustomLayer(64) # sigmoid is standard
        self.layer2 = CustomLayer(256) # Use 256 as its the first amount where the accuracy gets over 0.5
        self.out = CustomLayer(1)
    
    # cast the call-function as tf.function to increase efficiency
    @tf.function
    # pass the input through the layers of the network and return the output
    def call(self, inputs):
        x = self.layer1(inputs)
        x = self.layer2(x)
        x = self.out(x)
        return x

In [73]:
tf.keras.backend.clear_session()

### Train Step

In [74]:
# compute the loss of an input for the model and optimize/tweak according the parameters
def train_step(model, input, target, loss_function, optimizer):
    # use tf.gradientTape to compute loss, then gradients and apply these to the model to modify the parameters
    with tf.GradientTape() as tape:
        prediction = model(input)
        loss = loss_function(target, prediction)
        gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss


# compute the differences between or model prediction and the label, -> Supervision
def test(model, test_data, loss_function):
  # test over complete test data
  test_accuracy_aggregator = []
  test_loss_aggregator = []

  for (input, target) in test_data:
    prediction = model(input)
    sample_test_loss = loss_function(target, prediction)
    sample_test_accuracy =  np.round(target, 0) == np.round(prediction, 0)
    sample_test_accuracy = np.mean(sample_test_accuracy)
    test_loss_aggregator.append(sample_test_loss.numpy())
    test_accuracy_aggregator.append(np.mean(sample_test_accuracy))
    
# for all input and computed losses get the mean of accuracy and loss and return them
  test_loss = tf.reduce_mean(test_loss_aggregator)
  test_accuracy = tf.reduce_mean(test_accuracy_aggregator)

  return test_loss, test_accuracy

### Training

In [75]:
#predefine learning-rate and epochs
num_epochs = 10
alpha = 0.1

# create a model
model = CustomModel()

# define loss-function and optimizer
cross_entropy_loss = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(alpha)

# create empty arrays to store test/accuracy values, to track the network progress
train_losses = []
test_losses = []
test_accuracies = []

# get initial accuracy- and loss valus before training starts
test_loss, test_accuracy = test(model, test_ds, cross_entropy_loss)
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)

train_loss, _ = test(model, train_ds, cross_entropy_loss)
train_losses.append(train_loss)


# training loop
for epoch in range(num_epochs):
    # print accuracy of each epoch
    print(f'Epoch: {str(epoch)} starting with accuracy {str(test_accuracies[-1])}')
    
    loss_epoch = []
    # for all input, do a forwardstep and obtain loss
    for input, target in train_ds:
        train_loss = train_step(model, input, target, cross_entropy_loss, optimizer)
        loss_epoch.append(train_loss)
    # get the mean loss of this epoch by using reduce_sum of TF over all input-losses and appending to the array  
    train_losses.append(tf.reduce_mean(loss_epoch))
    
    # get the losses and accuracy of this epoch and store them
    test_loss, test_accuracy = test(model, test_ds, cross_entropy_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)
    
# print accuracy after 10 epochs
print(test_accuracies[-1])
          
    

2021-11-18 15:26:07.778822: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 15:26:07.838943: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 15:26:08.145447: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 15:26:08.178238: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 15:26:08.196359: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch: 0 starting with accuracy tf.Tensor(0.5884615384615385, shape=(), dtype=float64)


2021-11-18 15:26:08.617325: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-11-18 15:26:08.632485: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch: 1 starting with accuracy tf.Tensor(0.4115384615384615, shape=(), dtype=float64)
Epoch: 2 starting with accuracy tf.Tensor(0.4115384615384615, shape=(), dtype=float64)
Epoch: 3 starting with accuracy tf.Tensor(0.4115384615384615, shape=(), dtype=float64)
Epoch: 4 starting with accuracy tf.Tensor(0.4115384615384615, shape=(), dtype=float64)
Epoch: 5 starting with accuracy tf.Tensor(0.4115384615384615, shape=(), dtype=float64)
Epoch: 6 starting with accuracy tf.Tensor(0.41884615384615387, shape=(), dtype=float64)
Epoch: 7 starting with accuracy tf.Tensor(0.5096153846153846, shape=(), dtype=float64)
Epoch: 8 starting with accuracy tf.Tensor(0.5138461538461538, shape=(), dtype=float64)
Epoch: 9 starting with accuracy tf.Tensor(0.515, shape=(), dtype=float64)
tf.Tensor(0.5146153846153847, shape=(), dtype=float64)


### Gradient Optimization

# Validation