#### Group Information

Group No: 

- Member 1: Lai Yicheng
- Member 2: Lee Ying Shen
- Member 3: Lim Ting Juin
- Member 4: Koay Chun Keat

#### Import Libraries

In [51]:
%config Completer.use_jedi=False

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set a random seed for reproducible results 
tf.random.set_seed(42)

#### Load the dataset

In [52]:
dataset = pd.read_csv('classification_dataset.csv')
dataset.describe()

Unnamed: 0,f1,f2,f3,f4,f5,label
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.422609,0.608152,5.572301,0.66537,5.495197,0.5
std,5.105038,6.983217,5.840074,6.439401,4.18677,0.50025
min,-16.877003,-18.725112,-16.255804,-13.320196,-5.700803,0.0
25%,-3.07309,-4.289903,1.759988,-4.489635,2.668686,0.0
50%,0.447864,0.967124,5.760596,-0.465704,5.558958,0.5
75%,3.743536,5.657798,9.55616,5.528407,8.388997,1.0
max,15.376673,17.90449,24.342184,20.199927,20.973491,1.0


#### Define the loss function

In [53]:
def loss_fn(y_true, y_pred):
    """
    This function calculates the loss function
    """

    # Setting a minimum and maximum value to prevent log(0) and log(1)
    # epsilon = 1e-7
    # y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
    y_true = tf.cast(y_true, tf.float32)

    # Binary Negative Log-Likelihood 
    epsilon = 1e-7
    return -tf.reduce_mean(y_true * tf.math.log(y_pred + epsilon) + (1 - y_true) * tf.math.log(1 - y_pred + epsilon))

#### Define function to perform prediction

In [54]:
def sigmoid(output):
    """ 
    This function calculates the sigmoid function.
    """
    return 1 / (1 + tf.exp(-output))

def relu(inputs):
    """ 
    This function calculates the ReLU function.
    """
    return tf.maximum(0, inputs)

def forward(inputs, weights, biases):
    """ 
    This function calculates the forward pass (predicts the label).
    """
    activation = 0
    num_layers = len(weights) - 1
    for i in range(num_layers):
        weighted_sum = tf.matmul(inputs, weights[i], False, False) + biases[i]
        activation = relu(weighted_sum)

    weighted_sum_output = tf.matmul(activation, weights[-1], False, False) + biases[-1]
    prediction = sigmoid(weighted_sum_output)

    return prediction

#### Define function for model training
Display the training and validation loss values for each epoch of the training loop. The displayed value must be in 6 decimal places.<br>
Hint: <br>
Use `tf.GradientTape` to compute the gradients.

In [55]:
def train(inputs, targets, weights, biases, learning_rate):
    """ 
    This function performs the forward pass, computes the gradient and update the weights and biases.
    """
    with tf.GradientTape(persistent=True) as tape:
        outputs = forward(inputs, weights, biases)
        loss = loss_fn(targets, outputs)

    for i in range(len(weights)):
        weights_grad = tape.gradient(loss, weights[i])
        biases_grad = tape.gradient(loss, biases[i])
        # Update weights and biases
        weights[i].assign_sub(learning_rate * weights_grad)
        biases[i].assign_sub(learning_rate * biases_grad)

    # Delete the tape after using it
    del tape

    return weights, biases, loss

def fit(train_dataset, valid_dataset, weights, biases, learning_rate, epochs):
    """ 
    This function implements the training loop.
    """

    train_losses = []
    valid_losses = []

    for epoch in range(epochs):
        # Training loop
        for inputs_batch, targets_batch in train_dataset:
            weights, biases, loss = train(inputs_batch, targets_batch, weights, biases, learning_rate)
        train_losses.append(loss)

        # Validation loop
        valid_loss_avg = 0
        valid_steps = 0
        for valid_inputs, valid_targets in valid_dataset:
            valid_outputs = forward(valid_inputs, weights, biases)
            valid_loss = loss_fn(valid_targets, valid_outputs)
            valid_loss_avg += valid_loss
            valid_steps += 1
        avg_valid_loss = valid_loss_avg / valid_steps
        valid_losses.append(avg_valid_loss)

        print(f"Epoch: {epoch}, Train Loss: {loss}, Validation Loss: {avg_valid_loss}")

    return weights, biases, train_losses, valid_losses


#### Define the tensors to hold the weights and biases (create the model)
Hint: <br>
Use `tf.Variable` to create the tensors.<br>
Put the tensors in a list.

In [56]:
features = dataset.iloc[:, :-1]  # All rows, all columns except the last
labels = dataset.iloc[:, -1]  # All rows, only the last column

# Convert the pandas DataFrame into a TensorFlow Dataset
dataset_tf = tf.data.Dataset.from_tensor_slices((features.values, labels.values))

input_size = 5  # 5 features
hidden_size = 8  # Size of the hidden layer
output_size = 1  # Predict 2 classes

# Initialize weights and biases for each layer in lists
weights = [tf.Variable(tf.random.normal([input_size, hidden_size], stddev=0.1)),
           tf.Variable(tf.random.normal([hidden_size, output_size], stddev=0.1))]

biases = [tf.Variable(tf.zeros([hidden_size])),
          tf.Variable(tf.zeros([output_size]))]

learning_rate = 0.05

#### Split the dataset
The ratio of training and test is 7:1:2.

In [57]:
# Shuffle the dataset
dataset_tf = dataset_tf.shuffle(buffer_size=len(dataset_tf), seed=42)

# Calculate the number of examples
total_size = len(dataset_tf)
train_size = int(0.7 * total_size)
valid_size = int(0.1 * total_size)
# The rest is used for testing

# Create the training, validation and test sets
train_dataset = dataset_tf.take(train_size)
valid_dataset = dataset_tf.skip(train_size).take(valid_size)
test_dataset = dataset_tf.skip(train_size + valid_size)

#### Normalize the data

In [58]:
# Define a function to normalize the data
def normalize_data(features, labels):
    features = tf.cast(features, tf.float32)
    features = (features - tf.reduce_min(features)) / (tf.reduce_max(features) - tf.reduce_min(features))
    return features, labels

# Apply the normalization function to the datasets
train_dataset = train_dataset.map(normalize_data)
valid_dataset = valid_dataset.map(normalize_data)
test_dataset = test_dataset.map(normalize_data)


train_dataset = train_dataset.batch(batch_size=32, drop_remainder=True)
valid_dataset = valid_dataset.batch(batch_size=32, drop_remainder=True)

#### Train the model

In [59]:
# Train the model
weights, biases, train_losses, valid_losses = fit(train_dataset, valid_dataset, weights, biases, learning_rate, 30)

Epoch: 0, Train Loss: 0.694969117641449, Validation Loss: 0.6939039826393127
Epoch: 1, Train Loss: 0.6939254403114319, Validation Loss: 0.6951074600219727
Epoch: 2, Train Loss: 0.6937335729598999, Validation Loss: 0.6932508945465088
Epoch: 3, Train Loss: 0.6934312582015991, Validation Loss: 0.6935362815856934
Epoch: 4, Train Loss: 0.6957816481590271, Validation Loss: 0.6928173899650574
Epoch: 5, Train Loss: 0.6924941539764404, Validation Loss: 0.6926401257514954
Epoch: 6, Train Loss: 0.6928074955940247, Validation Loss: 0.6935842633247375
Epoch: 7, Train Loss: 0.7011305093765259, Validation Loss: 0.6924273371696472
Epoch: 8, Train Loss: 0.6931825876235962, Validation Loss: 0.692958652973175
Epoch: 9, Train Loss: 0.6932671070098877, Validation Loss: 0.6930678486824036
Epoch: 10, Train Loss: 0.6945281028747559, Validation Loss: 0.6933467984199524
Epoch: 11, Train Loss: 0.6926710605621338, Validation Loss: 0.6928125023841858
Epoch: 12, Train Loss: 0.6932002305984497, Validation Loss: 0.69

#### Display the training loss and validation loss against epoch graph

In [60]:
len(valid_losses)

30

#### Predict the test set

#### Display the confusion matrix and the classification report.