# Code for 2-layer NN model

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (10,10)
np.random.seed(0)

X_train_path = None
y_train_path = None
X_test_path = None
y_test_path = None

train_df = pd.DataFrame(np.load(X_train_path), columns=["feature_1", "feature_2"])
train_df["target"] = np.load(y_train_path)
test_df = pd.DataFrame(np.load(X_test_path), columns=["feature_1", "feature_2"])
test_df["target"] = np.load(y_test_path)

class Sigmoid:
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)

    def forward(self, input):
        self.input = input
        return self.sigmoid(self.input)

    def backward(self, output_gradient, learning_rate=None):
        return np.multiply(output_gradient, self.sigmoid(self.input))


class ReLU:
    @staticmethod
    def relu(x):
        return np.maximum(x, 0)

    @staticmethod
    def relu_prime(x):
        return np.where(x > 0, 1.0, 0.0)

    def forward(self, input):
        self.input = input
        return self.relu(self.input)

    def backward(self, output_gradient, learning_rate=None):
        return np.multiply(output_gradient, self.relu_prime(self.input))



class Dense:
    def __init__(self, input_size, output_size):
        # He normal initialization as advised by He et al., 2015
        self.weights = np.random.normal(0, np.sqrt(2 / input_size), 
                                        size = (output_size, input_size))
        self.bias = np.zeros((output_size, 1))


    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias


    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient    


# loss function
def binary_cross_entropy(y_true, y_pred, epsilon = 1e-8):
    # clipping for stable log operation
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon) 
    return (-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred)).item()


def binary_cross_entropy_prime(y_true, y_pred):
    return (1 - y_true) / (1 - y_pred) - (y_true / y_pred)


def predict(trained_network, x, thresh = 0.5):
    """helper for inference with a trained neural network model."""
    preds = []

    for i in range(len(x)):
        out = None

        if isinstance(x, pd.DataFrame):
            out = x.loc[i, ["feature_1", "feature_2"]].values.reshape(-1, 1)
        else:
            out = x[i].reshape(-1, 1)
        
        for layer in trained_network:
            out = layer.forward(out)

        preds.append(1 if out.item() >= thresh else 0)

    return np.array(preds)


## Train & evaluate the model

In [None]:
# necessary hyperparameters
N_EPOCHS = 450
LEARNING_RATE = 0.001
HIDDEN_SIZE = 100
INPUT_SIZE = len(train_df.columns) - 1
OUTPUT_SIZE = 1

# model architecture
network = [
              Dense(INPUT_SIZE, HIDDEN_SIZE),
              ReLU(),
              Dense(HIDDEN_SIZE, OUTPUT_SIZE),
              Sigmoid()
          ]
 
# training loop
for e in range(N_EPOCHS):
    loss = 0

    for index, row in train_df.iterrows():
        x = np.array(row.values[:-1]).reshape(-1, 1)
        y = row.values[-1]

        for layer in network:
            # forward prop
            x = layer.forward(x)

        # loss calculation
        loss += binary_cross_entropy(y, x)

        # backward prop
        grad = binary_cross_entropy_prime(y, x)

        for layer in reversed(network):
            grad = layer.backward(grad, LEARNING_RATE)

    # averaging
    loss /= len(train_df)

    # logging
    if e % 50 == 0:
        print("Epoch: {}, Average BCE Loss = {:.4f}".format(e, loss))


test_preds = predict(network, test_df[list(test_df.columns)[:-1]].values) 
print("Test Accuracy: {}".format(np.mean((np.equal(test_df["target"].values, test_preds) * 1))))

Epoch: 0, Average BCE Loss = 0.6933
Epoch: 50, Average BCE Loss = 0.5038
Epoch: 100, Average BCE Loss = 0.4238
Epoch: 150, Average BCE Loss = 0.3623
Epoch: 200, Average BCE Loss = 0.3137
Epoch: 250, Average BCE Loss = 0.2727
Epoch: 300, Average BCE Loss = 0.2411
Epoch: 350, Average BCE Loss = 0.2186
Epoch: 400, Average BCE Loss = 0.2037
Test Accuracy: 0.875
