#### Group Information

Group No: 

- Member 1: Lai Yicheng
- Member 2: Lee Ying Shen
- Member 3: Lim Ting Juin
- Member 4: Koay Chun Keat

#### Import Libraries

In [25]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
print("TensorFlow version:", tf.__version__)

# Set a random seed for reproducible results 
tf.random.set_seed(42)

TensorFlow version: 2.16.1


#### Load the dataset

In [127]:
ds = pd.read_csv('classification_dataset.csv')
ds.head()

len(ds[ds['label'] == 1]), len(ds[ds['label'] == 0])

(500, 500)

In [None]:
for i in range(len(ds.columns[:-1])):
  label = ds.columns[i]
  plt.hist(ds[ds['label'] == 1][label], color="blue", label="Positive")
  plt.hist(ds[ds['label'] == 0][label], color="red", label="Negative")
  plt.title(label)
  plt.ylabel("Frequency")
  plt.xlabel(label)
  plt.legend()
  plt.show()

#### Define the loss function

In [191]:
def loss_fn(y_true, y_pred):
    """
    This function calculates the loss function
    """
    # return 0.5 * tf.reduce_mean(tf.square(y_true - y_pred)) # equivalent to formula (1/2m) * sum((y_true - y_predict)^2)
    return -y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)


#### Define function to perform prediction

In [444]:
def sigmoid(weighted_sum):
    """
    This function calculates the sigmoid function.

    Parameters: weighted_sum (float): The weighted sum of inputs.

    Returns: result (float): The result of the sigmoid function.
    """
    result = 1 / (1 + tf.exp(-weighted_sum))
    return result


def relu(weighted_sum):
    """
    This function calculates the ReLU function.

    Parameters: weighted_sum (float): The weighted sum of the inputs and weights.

    Returns: result (float): The result of the ReLU function.
    """
    return tf.maximum(0.0, weighted_sum)


# feedforward neural network (FNN)
def forward(inputs, weights, biases):
    """
    This function calculates the forward pass (predicts the label).

    Parameters: inputs (list): The input features, weights (list): The weights of the neural network.

    Returns: prediction (float): The predicted label.
    """

    weighted_sum_input = np.dot(weights["w_hidden"], inputs) + biases["b_hidden"]
    # print("weightedsuminput: ", weighted_sum_input)
    output_hidden_activation = relu(weighted_sum_input)
    # print("hiddenactivation: ", output_hidden_activation)

    weighted_sum_activation = tf.reduce_sum(tf.multiply(weights["w_output"], output_hidden_activation)) + biases["b_output"]
    # print("weightedsumactivation: ", weighted_sum_activation)
    prediction = sigmoid(weighted_sum_activation)
    # print("prediction:", prediction)

    return prediction, weighted_sum_activation, output_hidden_activation, weighted_sum_input

#### Define function for model training
Display the training and validation loss values for each epoch of the training loop. The displayed value must be in 6 decimal places.<br>
Hint: <br>
Use `tf.GradientTape` to compute the gradients.

In [451]:
def train(x_train, y_train, weights, biases, learning_rate):
    """
    This function performs the forward pass, computes the gradient and update the weights and biases.

    Parameters: x_train (list): The input features, y_true (float): The true label, weights (list): The weights of the neural network, biases (list): The biases of the neural network, learning_rate (float): The learning rate.

    Returns: weights (list): The updated weights, biases (list): The updated biases, loss (float): The loss value.
    """

    # Perform the forward pass
    y_pred, weighted_sum_activation, output_hidden_activation, weighted_sum_input = forward(
        x_train, weights, biases
    )

    # Compute the loss
    loss = loss_fn(y_train, y_pred)

    # Perform the backward pass/ backprop
    y_pred = tf.Variable(y_pred)
    y_true = tf.Variable(y_train)
    weighted_sum_activation = tf.Variable(weighted_sum_activation)
    output_hidden_activation = tf.Variable(output_hidden_activation)
    weighted_sum_input = tf.Variable(weighted_sum_input)
    weights["w_output"] = tf.Variable(weights["w_output"])
    biases["b_output"] = tf.Variable(biases["b_output"])
    weights["w_hidden"] = tf.Variable(weights["w_hidden"])
    biases["b_hidden"] = tf.Variable(biases["b_hidden"])

    dJ_dw = {
    "dJ_dw_hidden": np.zeros((5, 5)),
    "dJ_dw_output": np.zeros((1, 5)),
    }

    dJ_db = {
    "dJ_db_hidden": np.zeros(5),
    "dJ_db_output": np.zeros(1),
    }

    # 1. Compute the gradients
    with tf.GradientTape(persistent=True) as t:
        t.watch(y_pred)
        t.watch(y_true)
        t.watch(weighted_sum_activation)
        t.watch(output_hidden_activation)
        t.watch(weighted_sum_input)
        t.watch(weights["w_output"])
        t.watch(biases["b_output"])
        t.watch(weights["w_hidden"])
        t.watch(biases["b_hidden"])

        # a) compute gradient for weights
        # a)i) compute gradient for weights for output layer

        # J --> y_pred --> weighted_sum_activation --> w[output] ( affected by the following )
        # dJ_dw[output] = d(J)_d(y_pred) * d(y_pred)_d(weighted_sum_activation) * d(weighted_sum_activation)_d(w[output])

        J = -y_true * tf.math.log(y_pred) - (1 - y_true) * tf.math.log(1 - y_pred)
        dJ_dy_pred = t.gradient(J, y_pred)
        # print("dJ_dy_pred: ", dJ_dy_pred)

        y_pred = 1 / (1 + tf.exp(-weighted_sum_activation))
        dy_pred_dweighted_sum_activation = t.gradient(y_pred, weighted_sum_activation)
        # print("dy_pred_dweighted_sum_activation: ", dy_pred_dweighted_sum_activation)

        weighted_sum_activation = (
            weights["w_output"] * output_hidden_activation + biases["b_output"]
        )
        dweighted_sum_activation_dw = t.gradient(
            weighted_sum_activation, weights["w_output"]
        )
        # print("dweighted_sum_activation_dw: ", dweighted_sum_activation_dw)

        dJ_dw["dJ_dw_output"] = (
            dJ_dy_pred * dy_pred_dweighted_sum_activation * dweighted_sum_activation_dw
        )

        # print("dJ_dw_output: ", dJ_dw["dJ_dw_output"])

        # a)ii) compute gradient for weights for hidden layer

        # J --> y_pred --> weighted_sum_activation --> w[output] --> weighted_sum_input --> w[hidden] ( affected by the following )
        # dJ_dw[hidden] = d(J)_d(y_pred) * d(y_pred)_d(weighted_sum_activation) * d(weighted_sum_activation)_d(w[output]) * d(weighted_sum_input)_d(w[hidden])

        output_hidden_activation = tf.maximum(0.0, weighted_sum_input)
        doutput_hidden_activation_dweighted_sum_input = t.gradient(
            output_hidden_activation, weighted_sum_input
        )
        # print("doutput_hidden_activation_dweighted_sum_input: ", doutput_hidden_activation_dweighted_sum_input)

        # print shape and datatype
        # print("x_train: ", x_train)
        # print("weights[w_hidden]: ", weights["w_hidden"])
        # print("biases[b_hidden]: ", biases["b_hidden"])
        weighted_sum_input = weights["w_hidden"] * x_train + biases["b_hidden"]
        dweighted_sum_input_dw = t.gradient(weighted_sum_input, weights["w_hidden"])
        # print("dweighted_sum_input_dw: ", dweighted_sum_input_dw)

        dJ_dw["dJ_dw_hidden"] = (
            dJ_dw["dJ_dw_output"]
            * doutput_hidden_activation_dweighted_sum_input
            * dweighted_sum_input_dw
        )
        
        # print("dJ_dw_hidden: ", dJ_dw["dJ_dw_hidden"])

        # b) compute gradient for biases
        # b)i) compute gradient for biases for output layer

        # J --> y_pred --> weighted_sum_activation --> b[output] ( affected by the following )
        # dJ_db[output] = d(J)_d(y_pred) * d(y_pred)_d(weighted_sum_activation) * d(weighted_sum_activation)_d(b[output])

        dweighted_sum_activation_db = t.gradient(weighted_sum_activation, biases["b_output"])
        # print("dweighted_sum_activation_db: ", dweighted_sum_activation_db)

        dJ_db["dJ_db_output"] = dJ_dy_pred * dy_pred_dweighted_sum_activation * dweighted_sum_activation_db
        # print("dJ_db_output: ", dJ_db["dJ_db_output"])

        # b)i) compute gradient for biases for hidden layer

        # J --> y_pred --> weighted_sum_activation --> w[output] --> weighted_sum_input --> b[hidden] ( affected by the following )
        # dJ_db[hidden] = d(J)_d(y_pred) * d(y_pred)_d(weighted_sum_activation) * d(weighted_sum_activation)_d(w[output]) * d(weighted_sum_input)_d(b[hidden])

        dweighted_sum_input_db = t.gradient(weighted_sum_input, biases["b_hidden"])
        # print("dweighted_sum_input_db: ", dweighted_sum_input_db)

        dJ_db["dJ_db_hidden"] = dJ_dw["dJ_dw_output"] * doutput_hidden_activation_dweighted_sum_input * dweighted_sum_input_db
        # print("dJ_db_hidden: ", dJ_db["dJ_db_hidden"])

    # 2. Update the weights and biases
    # print("weights[w_hidden]: ", weights["w_hidden"])
    # print("learning_rate: ", learning_rate)
    # print("dJ_dw[dJ_dw_hidden]: ", dJ_dw["dJ_dw_hidden"])
    weights["w_hidden"] = weights["w_hidden"] - tf.cast(learning_rate, dtype=tf.float64) * dJ_dw["dJ_dw_hidden"]
    weights["w_output"] = weights["w_output"] - tf.cast(learning_rate, dtype=tf.float64) * dJ_dw["dJ_dw_output"]
    biases["b_hidden"] = biases["b_hidden"] - tf.cast(learning_rate, dtype=tf.float64) * dJ_db["dJ_db_hidden"]
    biases["b_output"] = biases["b_output"] - tf.cast(learning_rate, dtype=tf.float64) * dJ_db["dJ_db_output"]

    return weights, biases, loss

def fit(x_train, y_train, weights, biases, learning_rate, epochs=10):
    """

    This function implements the training loop.

    """

    losses = []

    for i in range(epochs):
        for j in range(len(x_train)):
            x_train_row = x_train[j]
            y_train_row = y_train[j]
            weights, biases, loss = train(
                x_train_row, y_train_row, weights, biases, learning_rate
            )
            losses.append(loss)
        
        # show loss in 6 decimal places
        formatted_loss = f"{loss[0]:.6f}"
        print(f"Epoch: {i}, Loss: {formatted_loss}")
    
    return weights, biases, losses

#### Define the tensors to hold the weights and biases (create the model)
Hint: <br>
Use `tf.Variable` to create the tensors.<br>
Put the tensors in a list.

In [430]:
ds_tf = tf.Variable(ds)
ds_tf

# Example weights (randomly initialized)
np.random.seed(42)

weights = {
  "w_hidden": tf.Variable(np.random.randn(5, 5)),  # Weight matrix for hidden layer
  "w_output": tf.Variable(np.random.randn(1, 5)),  # Weight matrix for output layer
}

biases = {
  "b_hidden": tf.Variable(np.random.randn(1,5)),  # Bias vector for hidden layer
  "b_output": tf.Variable(np.random.randn(1)),  # Bias vector for output layer
}

learning_rate = tf.constant(0.01)

#### Split the dataset
The ratio of training and test is 7:1:2.

In [378]:
# Split the given dataset into three training, validation and test with a ratio of 7:1:2.
ds_shuffle = tf.random.shuffle(ds_tf, seed=42)
train_data, valid_data, test_data = ds_shuffle[:700], ds_shuffle[700:800], ds_shuffle[800:]

x_train, y_train = train_data[:, :-1], train_data[:, -1]
x_valid, y_valid = valid_data[:, :-1], valid_data[:, -1]
x_test, y_test = test_data[:, :-1], test_data[:, -1]

#### Normalize the data

In [379]:
norm_x_train = (x_train - tf.reduce_mean(x_train, axis=0)) / tf.math.reduce_std(x_train, axis=0)
norm_x_valid = (x_valid - tf.reduce_mean(x_valid, axis=0)) / tf.math.reduce_std(x_valid, axis=0)
norm_x_test = (x_test - tf.reduce_mean(x_test, axis=0)) / tf.math.reduce_std(x_test, axis=0)

#### Train the model

In [452]:
# Train the model
weights, biases, train_losses = fit(norm_x_train, y_train, weights, biases, learning_rate, 10)

# Validate the model
_,_, valid_losses = fit(norm_x_valid, y_valid, weights, biases, learning_rate, 10)

Epoch: 1, Loss: 0.135067


KeyboardInterrupt: 

#### Display the training loss and validation loss against epoch graph

In [None]:
# display training loss and validation loss against epoch graph



#### Predict the test set

#### Display the confusion matrix and the classification report.