In [6]:
# Task 2: Regression for fix-point binary numbers
# Input dataset: dataset-2.csv
# 
# Princess Tara Zamani
# U1139219
from numpy import *
from numpy.random import randn

# Inputs
#   delta : difference between predicted vs. actual outputs
# Outputs
#   mse_loss : loss 
def mse_loss(delta):
    mse_loss = square(delta).mean() 
    return mse_loss

# Inputs
#   x : exponent
# Outputs
#   sigmoid calculation
def sigmoid_act(x):
    return 1/(1+exp(-x))

# Inputs
#   w1, w2 : weights to be updated
#   x      : input used for backprop
#   y      : actual labels
# Outputs
#   loss        : binary cross entropy loss
#   grad_y_pred : predicted output gradient
#   h           : hidden layer output
def forward(w1,w2,x,y):
    h_linear = x.dot(w1)
    h=sigmoid_act(h_linear)  #  hidden = sigmoid(w1*x)
    y_pred = h.dot(w2)        #  output = linear(w2*h)
    delta = y_pred - y        # delta 
    grad_y_pred = 2*delta    # gradient of outputs
    loss = mse_loss(delta)    #  mse loss function 
    return loss,grad_y_pred,h

# Inputs
#   w1, w2      : weights to be updated
#   x           : input used for backprop
#   y           : actual labels
# Outputs
#   loss     : evaluated loss
def evaluation(w1,w2,x,y):
    h_linear = matmul(x,w1)
    h=sigmoid_act(h_linear)  #  hidden = sigmoid(w1*x)
    y_pred = h.dot(w2)      #  output = linear(w2*h)
    delta = y_pred - y        # delta 
    loss = mse_loss(delta)    #  mse loss function 
    return loss
    
# Inputs
#   w1, w2      : weights to be updated
#   h           : hidden (forward value) used for backprop
#   x           : input used for backprop
#   grad_y_pred : output gradients
#   lr          : learning rate
# Outputs
#   w1, w2      : updated weights
def backward(w1,w2,h,x,grad_y_pred,lr=1e-4): 
    if not grad_y_pred.all():
        return w1,w2
    else:
        grad_w2 = h.T.dot(grad_y_pred)
        grad_h = grad_y_pred.dot(w2.T)
        grad_w1 = x.T.dot(grad_h * h * (1-h)) # h is sigmoid calculated in forward; h*(1-h) is the derivative
        w1 -= lr* grad_w1
        w2 -= lr* grad_w2
        return w1,w2
    

def main(args):
    sample, D_in, D_out, n_neurons = 128, 8, 1, 16#34
    random.seed(0)
    
    # Make floating-point data set 
    data = zeros((256,9))
    p = array(zeros((256,1)), dtype=uint8)
    for idx in range(256):
        p[idx,0] = uint8(idx)
        
        bit = binary_repr(idx, width = 8)
        number = int(bit[:4], 2)
        decimal = int(bit[4:], 2) / power(2,4)
        label = number + decimal
        data[idx,8:] = label
        #print(f"bit: {bit} \t label: {label}")
    
    unpacked = unpackbits(p, axis = 1)
    data[:256, :8] = unpacked
    random.shuffle(data)
    print(data)
    
    x = data[0:sample,0:8]
    y = data[0:sample,8:9]
    test_x = data[sample:,0:8]
    test_y = data[sample:,8:9]
    
    w1, w2 = randn(D_in, n_neurons), randn(n_neurons, D_out)
    print(w2.shape)

    batched = 1 # batched = 0 => each epoch takes whoe training set 
                # batched = 1 => 50% in one epoch
    for i in range(300): # loops 300 epochs
        if batched==1:
            loss,grad_y_pred,h = forward(w1,w2,x,y)
            w1,w2 = backward(w1,w2,h,x,grad_y_pred,lr=5e-4)
            train_mse = evaluation(w1, w2, x, y)
            test_mse = evaluation(w1, w2, test_x, test_y)
            #print(w1, w2)
            print(f"epoch {i}: Train mse = {train_mse},  \t Test mse = {test_mse}")

        else:
            x_first_half = x[0:50]
            y_first_half = y[0:50]
            loss,grad_y_pred,h = forward(w1,w2,x_first_half,y_first_half)
            w1,w2 = backward(w1,w2,h,x_first_half,grad_y_pred,lr=1e-4)
            
            x_second_half = x[50:]
            y_second_half = y[50:]
            loss,grad_y_pred,h = forward(w1,w2,x_second_half,y_second_half)
            w1,w2 = backward(w1,w2,h,x_second_half,grad_y_pred,lr=1e-4)
            print("%d-th iteration, mse = %.10f" % (i, evaluation(w1,w2,x,y)))


if __name__ == '__main__':
    import sys
    sys.exit(main(sys.argv))

[[ 1.      0.      0.     ...  1.      0.      9.875 ]
 [ 0.      1.      0.     ...  1.      1.      5.1875]
 [ 1.      0.      1.     ...  1.      0.     10.625 ]
 ...
 [ 0.      1.      1.     ...  0.      1.      7.3125]
 [ 0.      0.      1.     ...  1.      1.      2.9375]
 [ 1.      0.      1.     ...  0.      0.     10.75  ]]
(16, 1)
epoch 0: Train mse = 22.568757878709626,  	 Test mse = 26.631095668653856
epoch 1: Train mse = 17.522527959920975,  	 Test mse = 19.805475043352907
epoch 2: Train mse = 16.410024338642543,  	 Test mse = 18.144060652483248
epoch 3: Train mse = 15.629826426103396,  	 Test mse = 17.185400609768365
epoch 4: Train mse = 14.915916679942821,  	 Test mse = 16.380928503672283
epoch 5: Train mse = 14.24579712796993,  	 Test mse = 15.64144091750078
epoch 6: Train mse = 13.611161388249725,  	 Test mse = 14.944594631290428
epoch 7: Train mse = 13.005971424770571,  	 Test mse = 14.28136731204937
epoch 8: Train mse = 12.425663919329413,  	 Test mse = 13.646373948

SystemExit: 