<a href="https://colab.research.google.com/github/omarFarooq21/AI-ML-/blob/main/xor_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This is my first attempt at implementeing an ANN from scratch.
# Key takeaways: Do not use ReLU at the output layer,
#                Sigmoid/Softmax work fine for multi class classification at the output layer
#                Use Xavier Initialization and do not rely on numpy gaussian init

# Next steps: Explore optimizers and tanh/sigmoid in hidden layer and go through the book after exams...
# Hopefully build a more complex neural network to solve a semi-real world problem
import numpy as np
inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
labels = np.array([[0],[1],[1],[0]])

def xavier_init(size):
    return np.random.randn(*size) * np.sqrt(1 / size[0])

def model():
    # weights_1 = np.random.rand(2,2)
    # bias_1 = np.random.rand(1,1)
    # weights_2 = np.random.rand(2,2)
    # bias_2 = np.random.rand(1,1)
    # weights_output = np.random.rand(1, 2)
    # bias_output = np.random.rand(1,1)
    # weights_1 = np.random.randn(2,2)
    # bias_1 = np.random.randn(1,2)
    # weights_2 = np.random.randn(2,2)
    # bias_2 = np.random.randn(1,2)
    # weights_output = np.random.randn(1,2)
    # bias_output = np.random.randn(1,1)

    weights_1 = xavier_init((2,2))
    bias_1 = np.zeros((1,2))
    weights_2 = xavier_init((2,2))
    bias_2 = np.zeros((1,2))
    weights_output = xavier_init((1,2))
    bias_output = np.zeros((1,1))

    return weights_1, bias_1, weights_2, bias_2, weights_output, bias_output

def ReLU(x):
    return(np.maximum(0,x))

def MSE_Loss(ypred, ytrue):
    return np.sum(1/2 * (ypred - ytrue)**2)

def binary_cross_entropy_loss(y_pred, y_true):
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def forward_pass(list_params, inputs):
    weights_1, bias_1, weights_2, bias_2, weights_output, bias_output = list_params
    # print("Destructured successfully")
    z1 = np.dot(inputs, weights_1) + bias_1
    a1 = ReLU(z1)
    z2 = np.dot(a1, weights_2) + bias_2
    a2 = ReLU(z2)
    # print('shape a2: ', a2.shape)
    # print('shape weights: ', weights_output.shape)
    z3 = np.dot(a2, weights_output.T) + bias_output
    # output = ReLU(z3)
    output = sigmoid(z3)
    # print("output: ", output)

    return output, a2, a1, z1, z2, z3

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def gradient_activation(x):
    return (x > 0).astype(int)

def gradient_sigmoid(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

# MSE and BCE derivative
def derivative_loss(output, labels):
    # Assuming we are using Mean Squared Loss...
    return output - labels

## ---

def compute_gradients_and_update(outputs, list_params, inputs, labels, allowed_print):
    output, a2, a1, z1, z2, z3 = outputs
    weights_1, bias_1, weights_2, bias_2, weights_output, bias_output = list_params

    dL_doutput = derivative_loss(output, labels)
    # MSE
    # dL_doutput = derivative_loss(output, labels) # Implementation of this function can change (Tanh, sigmoid etc.)
    # Swapped ReLU at the output layer for Sigmoid:
    doutput_dz3 = gradient_sigmoid(z3)
    # RelU
    # doutput_dz3 = gradient_activation(z3) # Compute gradient of the ReLU activation function
    dL_dz3 = dL_doutput * doutput_dz3  # dL/dy^ * dy^/dz3

    #    Output Layer...
    dweights_output = np.dot(a2.T, dL_dz3) #dL/dy * dy/dz3 * dz3/dweights_output ...
    # print(dweights_output.shape)
    dbias_output = np.sum(dL_dz3, axis=0, keepdims=True)

    #    Second Hidden Layer...
    dL_da2 = np.dot(dL_dz3, weights_output) # weights_output = dz3/da2
    da2_dz2 = gradient_activation(z2)
    dL_dz2 = dL_da2*da2_dz2
    dweights_2 = np.dot(a1.T, dL_dz2) # a1 here because dz2/dweights_2 = a1
    # dbias_2 = dL_dz2 * 1              # Equation is written in my notes DLAM..
    dbias_2 = np.sum(dL_dz2, axis=0, keepdims=True)
    #    First Hidden Layer...
    dL_da1 = np.dot(dL_dz2, weights_2)
    da1_dz1 = gradient_activation(z1)
    dL_dz1 = dL_da1 * da1_dz1
    dweights_1 = np.dot(inputs.T, dL_dz1)
    dbias_1 = np.sum(dL_dz1, axis=0, keepdims=True)      # At this point this becomes intuitive so I didn't write out its equation

    lr = 0.03
    weights_output -= lr*dweights_output.T
    bias_output -= lr*dbias_output
    weights_2 -= lr*dweights_2
    bias_2 -= lr*dbias_2
    weights_1 -= lr*dweights_1
    bias_1 -= lr*dbias_1

    if(allowed_print):
        # print("Current_Loss: ", MSE_Loss(output, labels))
        print("Current_Loss: ", binary_cross_entropy_loss(output, labels))
    return weights_1, bias_1, weights_2, bias_2, weights_output, bias_output



list_params = model()
# print(list_params[0])
# output, a2, a1, z1, z2, z3 = forward_pass(list_params, inputs)
outputs = forward_pass(list_params, inputs)
compute_gradients_and_update(outputs, list_params, inputs, labels, True)


Current_Loss:  0.7126742416245981


(array([[ 0.33796559, -0.94396909],
        [ 0.44555933, -0.94879638]]),
 array([[-0.00162611,  0.        ]]),
 array([[-0.47575001,  0.46085631],
        [-0.53644775, -1.21953066]]),
 array([[0.        , 0.00303126]]),
 array([[-1.13334265,  1.79140554]]),
 array([[-0.00205836]]))

In [None]:
for epoch in range(5000):
    outputs = forward_pass(list_params, inputs)

    if(epoch%100 == 0):
        list_params = compute_gradients_and_update(outputs, list_params, inputs, labels, True)
    else:
        list_params = compute_gradients_and_update(outputs, list_params, inputs,labels, False)

Current_Loss:  0.7067898176381358
Current_Loss:  0.597772962546051
Current_Loss:  0.5671542226008698
Current_Loss:  0.544680024481239
Current_Loss:  0.5254790268509892
Current_Loss:  0.5145486078613186
Current_Loss:  0.5072790986010847
Current_Loss:  0.5033363646520145
Current_Loss:  0.4988476867218368
Current_Loss:  0.4967166767456844
Current_Loss:  0.4950320120048292
Current_Loss:  0.4931062089642466
Current_Loss:  0.4920261492803652
Current_Loss:  0.49036456310206994
Current_Loss:  0.48956588274741536
Current_Loss:  0.47958075094874775
Current_Loss:  0.44172075755251644
Current_Loss:  0.3767077548981707
Current_Loss:  0.29409554066474847
Current_Loss:  0.22696189263771369
Current_Loss:  0.18285787619843352
Current_Loss:  0.15511374116746376
Current_Loss:  0.13542054193742373
Current_Loss:  0.12175666545726917
Current_Loss:  0.11079094453525234
Current_Loss:  0.10179763360502346
Current_Loss:  0.0946795424432097
Current_Loss:  0.08860893168911206
Current_Loss:  0.08352224402690217
Cu

In [None]:
threshold = 0.5

new_input1 = np.array([[1,1]])
new_input2 = np.array([[1,0]])
new_input3 = np.array([[0,1]])
new_input4 = np.array([[0,0]])

predicted_output1, _, _, _, _, _ = forward_pass(list_params, new_input1)
predicted_output2, _, _, _, _, _ = forward_pass(list_params, new_input2)
predicted_output3, _, _, _, _, _ = forward_pass(list_params, new_input3)
predicted_output4, _, _, _, _, _ = forward_pass(list_params, new_input4)

predicted_class1 = (predicted_output1 >= threshold).astype(int)
predicted_class2 = (predicted_output2 >= threshold).astype(int)
predicted_class3 = (predicted_output3 >= threshold).astype(int)
predicted_class4 = (predicted_output4 >= threshold).astype(int)

print("Predicted Class for input [1, 1]:", predicted_class1)
print("Predicted Class for input [1, 0]:", predicted_class2)
print("Predicted Class for input [0, 1]:", predicted_class3)
print("Predicted Class for input [0, 0]:", predicted_class4)

Predicted Class for input [1, 1]: [[0]]
Predicted Class for input [1, 0]: [[1]]
Predicted Class for input [0, 1]: [[1]]
Predicted Class for input [0, 0]: [[0]]
