In [2]:
# Task 1: Binary classification
# Input dataset: dataset-1.csv
# 
# Princess Tara Zamani
# U1139219
from numpy import *
from numpy.random import randn

grad_track = 0

# Inputs
#   w1, w2 : weights to be updated
#   x      : input used for backprop
#   y      : actual labels
# Outputs
#   loss        : binary cross entropy loss
#   loss_grad   : binary cross entropy loss gradient
#   y_pred      : predicted output
#   grad_y_pred : predicted output gradient
#   matMult     : input layer output
def forward(w1, w2, x, y):
    random.seed(0)
    matMult = x.dot(w1)
    y_pred = sigmoid_activation(matMult.dot(w2)) 
    delta = y_pred - y
    grad_y_pred = 2*delta
    b_cE = binary_crossEntropy(y, y_pred)
    loss = b_cE[0] 
    loss_grad = b_cE[1]
   
    return loss, loss_grad, y_pred, grad_y_pred, matMult


# Inputs
#   w1, w2      : weights to be updated
#   hidden      : hidden (forward value) used for backprop
#   x           : input used for backprop
#   loss_grad   : loss function gradient
#   y_pred      : predicted output
#   grad_y_pred : output gradients
#   lr          : learning rate
# Outputs
#   w1, w2      : updated weights
def backward(w1,w2,hidden,x,loss_grad,y_pred,grad_y_pred,lr=1e-4):
    if not grad_y_pred.all():
        return w1,w2
    else:
        #grad_upstream = grad * (grad_y_pred * (1 - grad_y_pred))
        grad_upstream = loss_grad * (y_pred * (1 - y_pred))
        grad_w2 = hidden.T.dot(grad_upstream) 
        grad_hidden = grad_upstream.dot(w2.T) 
        grad_w1 = x.T.dot(grad_hidden) 
        w1 -= lr* grad_w1
        w2 -= lr* grad_w2
        return w1,w2
    

# Inputs
#   w1, w2      : weights to be updated
#   x           : input used for backprop
#   y           : actual labels
#   sample_size : size of the sample
# Outputs
#   loss     : evaluated loss
#   accuracy : accuracy compared to expected outputs
def evaluation(w1,w2,x,y,sample_size):
    hidden_linear = matmul(x,w1)
    y_pred, output = sigmoid_activation(hidden_linear.dot(w2)), sigmoid_activation(hidden_linear.dot(w2))     #  output = linear(w2*h)
    delta = y_pred - y        
    grad_y_pred = 2*delta
    loss = binary_crossEntropy(y, y_pred)   

    right_count = 0
    y_pred_size = size(y_pred, axis=0)
    for idx in range(sample_size):
        if (y_pred[idx] > 0.5):
            output[idx] = 1
        else:
            output[idx] = 0

        if (output[idx] == y[idx]):
            right_count = right_count + 1

    accuracy =  right_count / sample_size

    return loss[0], accuracy


# Inputs
#   x : exponent
# Outputs
#   sigmoid calculation
def sigmoid_activation(x):
    return 1/(1+exp(-x))


# Inputs
#   y      : labels of data
#   y_pred : predicted output value
# Outputs
#   loss : Binary Cross-Entropy 
#   grad : gradient 
def binary_crossEntropy(y, y_pred):
    loss = sum(maximum(y_pred, 0) - y_pred*y + log(1+ exp(- abs(y_pred))))
    grad = ((1/(1+exp(- y_pred))) - y.reshape(y.shape[0],1))  # from Z computes the Sigmoid so P_hat - Y, where P_hat = sigma(Z)
    return loss, grad


def main(args):
    data = genfromtxt('dataset-1.csv', delimiter=',')
    random.shuffle(data)

    sample, D_in, D_out, n_neurons = 80, 4, 1, 32 

    x = data[0:sample,0:4]
    y = data[0:sample,4:5]

    test_x = data[sample:,0:4]
    test_y = data[sample:,4:5]

    w1, w2 = randn(D_in, n_neurons), randn(n_neurons, D_out)

    batched = 1  # batched = 1 => each epoch uses whole training set
                 # batched = 0 => 50% in one epoch
   
    for i in range(300):  # loops 300 epochs 
        if batched == 1:
            loss, loss_grad, y_pred, grad_y_pred, hidden = forward(w1, w2, x, y) 
            w1, w2 = backward(w1, w2, hidden, x, loss_grad, y_pred, grad_y_pred, lr=5e-5) 
            eval_result = evaluation(w1, w2, x, y, sample)    # sample = sample size is 80
            test_acc = evaluation(w1, w2, test_x, test_y, 20) # sample size is 20
            print(f"{i}-th iteration, Loss = {loss}, \tTrain acc = {eval_result[1]}, \tTest acc = {test_acc[1]}")
        else:
            x_first_half = x[0:40]
            y_first_half = y[0:40]
            loss, loss_grad, grad_y_pred, hidden = forward(w1, w2, x_first_half, y_first_half)
            w1, w2 = backward(w1, w2, hidden, x_first_half, loss_grad, grad_y_pred, lr=1e-4)

            x_second_half = x[40:]
            y_second_half = y[40:]
            loss, loss_grad, grad_y_pred, hidden = forward(w1, w2, x_second_half, y_second_half)
            w1, w2 = backward(w1, w2, hidden, x_second_half, loss_grad, grad_y_pred, lr=1e-4)
            eval_result = evaluation(w1, w2, x, y, sample)
            print(f"{i}-th iteration, b_cE = {eval_result[0]},\t Loss = {loss},\t Accuracy = {eval_result[1]}")

    
if __name__ == '__main__':
    import sys
    sys.exit(main(sys.argv))
    
    

0-th iteration, Loss = 54.73064318447574, 	Train acc = 0.5125, 	Test acc = 0.6
1-th iteration, Loss = 54.67940307237551, 	Train acc = 0.5125, 	Test acc = 0.6
2-th iteration, Loss = 54.62190182247811, 	Train acc = 0.5125, 	Test acc = 0.6
3-th iteration, Loss = 54.556989229945756, 	Train acc = 0.5125, 	Test acc = 0.6
4-th iteration, Loss = 54.48321943862738, 	Train acc = 0.5125, 	Test acc = 0.6
5-th iteration, Loss = 54.39875449277129, 	Train acc = 0.5125, 	Test acc = 0.6
6-th iteration, Loss = 54.30123198642019, 	Train acc = 0.5125, 	Test acc = 0.6
7-th iteration, Loss = 54.1875836253343, 	Train acc = 0.525, 	Test acc = 0.6
8-th iteration, Loss = 54.05378881936778, 	Train acc = 0.525, 	Test acc = 0.6
9-th iteration, Loss = 53.89454816110343, 	Train acc = 0.525, 	Test acc = 0.6
10-th iteration, Loss = 53.70287350684662, 	Train acc = 0.525, 	Test acc = 0.6
11-th iteration, Loss = 53.46963189230769, 	Train acc = 0.525, 	Test acc = 0.6
12-th iteration, Loss = 53.18318358194855, 	Train acc =

206-th iteration, Loss = 40.31783776434895, 	Train acc = 1.0, 	Test acc = 1.0
207-th iteration, Loss = 40.317671448321406, 	Train acc = 1.0, 	Test acc = 1.0
208-th iteration, Loss = 40.31750775610061, 	Train acc = 1.0, 	Test acc = 1.0
209-th iteration, Loss = 40.31734663715548, 	Train acc = 1.0, 	Test acc = 1.0
210-th iteration, Loss = 40.3171880421327, 	Train acc = 1.0, 	Test acc = 1.0
211-th iteration, Loss = 40.31703192282347, 	Train acc = 1.0, 	Test acc = 1.0
212-th iteration, Loss = 40.31687823213129, 	Train acc = 1.0, 	Test acc = 1.0
213-th iteration, Loss = 40.31672692404091, 	Train acc = 1.0, 	Test acc = 1.0
214-th iteration, Loss = 40.31657795358818, 	Train acc = 1.0, 	Test acc = 1.0
215-th iteration, Loss = 40.31643127683107, 	Train acc = 1.0, 	Test acc = 1.0
216-th iteration, Loss = 40.31628685082141, 	Train acc = 1.0, 	Test acc = 1.0
217-th iteration, Loss = 40.31614463357785, 	Train acc = 1.0, 	Test acc = 1.0
218-th iteration, Loss = 40.316004584059385, 	Train acc = 1.0, 	

SystemExit: 