# Homework 2: Two-layer neural network
### Due Tuesday, January 29, 2019, 5pm
#### Jesse Zhu
#### ML-W2019

Using the MNIST hand-written digits dataset, we aim to categorize the digits 0-9 with a two-layer neural network,
composed of one hidden layer and one output layer. Both layers will use the sigmoid activation function, and the
network is fully connected.

Sigmoid function: 1/ (1+ e^-z)

In [1]:
#import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
#IMPORT DATA#################
testfile = "mnist_test.csv"
trainfile = "mnist_train.csv"

#Data is 785 columns by N rows. First column = Label (0-9), others = 0:255
test_data = np.genfromtxt(testfile, skip_header=True, delimiter=',')
train_data = np.genfromtxt(trainfile, skip_header=True, delimiter=',')

#PREPROCESSING###############

#SCALE to 0:1
test_data[:,1:] /= 255
train_data[:,1:] /= 255

In [65]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

#Accuraccy function
def acc(w1, w2, data, debug = 0):
    """
    input: 
        w1: input to hidden weights [785 by hu]
        w2: hidden to output weights [(hu+1) by 10]
        data: matrix of N data points by 785 values, where the first value is the target (0-9), and the rest are
            scaled gray-scale values between 0 and 1
    output: Percentage of correct classifications, where a classification is taken using the highest perceptron
        output value
    """
    length = len(data[:,0])
    xi = np.append(np.ones([length,1]), data[:, 1:],1) #Append 1s for bias & strip label from data[]
    h_act = np.array(list(map(sigmoid, xi @ w1[:, :]))) #N by (hidden_units)
    hi = np.append(np.ones([length,1]), h_act[:,:],1) #N by (hidden_units+1)

    o_act = np.array(list(map(sigmoid, hi @ w2[:, :]))) #N by 10

    amax = o_act.argmax(axis=1) #Predicted value
    if debug:
        print(o_act.shape)
        print(amax[0:25])
    ret = np.sum(amax == data[:,0])
    return ret/length

#Confusion matrix
def cm(w1,w2, data):
    """
    input: 
        w1: input to hidden weights [785 by hu]
        w2: hidden to output weights [(hu+1) by 10]
        data: matrix of N data points by 785 values, where the first value is the target (0-9), and the rest are
            scaled gray-scale values between 0 and 1
    output: confusion matrix 11x11 including labels (actual vs predicted)
    """
    length = len(data[:,0])
    xi = np.append(np.ones([length,1]), data[:, 1:],1) #Append 1s for bias & strip label from data[]
    h_act = np.array(list(map(sigmoid, xi @ w1[:, :]))) #N by (hidden_units)
    hi = np.append(np.ones([length,1]), h_act[:,:],1) #N by (hidden_units+1)
#sigmoid
    o_act = np.array(list(map(sigmoid, hi @ w2[:, :]))) #N by 10
    #o_act= hi @ w2[:, :]
    #print(o_act[0:3,:])
    
    m = np.zeros([11,11])
    m[0,1:] = np.arange(10)
    m[1:,0] = np.arange(10)
    
    for i in range(0, length):
        actual = int(data[i, 0])
        predic = int((o_act[i,:]).argmax(axis=0))
        m[actual+1, predic+1] += 1
        
    return m.astype(int)

#Training through back-propagation
def train(w1, w2, data, lr, m, debug = 0):
    """
    input: 
        w1: input to hidden weights [785 by hu]
        w2: hidden to output weights [(hu+1) by 10]
        data: matrix of N data points by 785 values, where the first value is the target (0-9), and the rest are
            scaled gray-scale values between 0 and 1
        lr: learning rate, (0:1)
        m: momentum, (0:1)
    output: Altered input weight matricies
    """
    length = len(data[:,0])
    oh_deltas_prev = np.zeros(w2.shape) #initialize previous weights to 0 (momentum calculations)
    hi_deltas_prev = np.zeros(w1.shape)

    for i in range(0,length):
        xi = np.append([1], data[i, 1:]) #add "1" to input beginning for offset -> 1x785
        hidden = np.array(list(map(sigmoid, xi @ w1[:, :]))) #1 by (hidden_units)
        hi = np.append(np.ones([length,1]), hidden[:,:],1) #1 by (hidden_units+1)
        out = np.array(list(map(sigmoid, hi @ w2[:, :]))) #1 by 10
        
        target = np.full((10),0.1)
        tindex = int(data[i,0])
        target[tindex] = 0.9 #1x10 of 0s, with target = 0.9, else 0.1
        #Error
        err_o = out * (1-out) *(target-out) #1 by 10 array, output error
        err_h = hidden * (1-hidden) * (w2[1:,:]@err_o) #hu by 1 array, hidden error
        
        #Backpropagation Deltas
        oh_deltas = lr * np.outer(hi, err_o) +m*oh_deltas_prev #(hu+1) by 10
        hi_deltas = lr * np.outer(xi, err_h) +m*hi_deltas_prev #785 by hu
        oh_deltas_prev = oh_deltas
        hi_deltas_prev = hi_deltas
        #Update weights
        w2 += oh_deltas
        w1 += hi_deltas
    return

In [66]:
epochs = 1
learning_rate = 0.1
momentum = 0.9
hidden_units = [10, 20, 100]


for hu in hidden_units:
    print("***# Hidden = ",hu,"***************")
    w_hi = (np.random.rand(785,hu) * 0.10) - 0.05 #weights, input to hidden
    w_oh = (np.random.rand(hu+1,10) * 0.10) - 0.05 #weights, hidden to output
    acc_test = []
    acc_train = []
    
    for i in range(0,epochs):
        train(w_hi, w_oh, train_data, learning_rate, momentum)
        acc_test.append(acc(w_hi, w_oh,test_data))
        acc_train.append(acc(w_hi, w_oh,train_data))
        print(i, end='', flush=True)
    print(" ") #newline
    
    print(cm(w_hi, w_oh, test_data))
    print("final test acc = ", acc_test[len(acc_test)-1])
    print("final train acc = ", acc_train[len(acc_train)-1])

    plt.plot(acc_test)
    plt.plot(acc_train)
    plt.xlabel("epochs")
    plt.ylabel("accuracy(%)")
    title = "Training and Test accuracy, hidden units = " + str(hu)
    plt.title(title)
    plt.legend(("test", "train"),loc='lower right')
    plt.show()

***# Hidden =  10 ***************


IndexError: too many indices for array

In [62]:
test1 = np.zeros(5)
test2 = np.append(1, test1)
print(list(map(sigmoid,[0.1, 0.2, 0.3, 0.4, 0.8, 0.9])))
t3 = np.full((10),0.1)
print(1-t3)
t1 = np.array([1,2, 3])
t2 = np.array([2,5, 10])
print(t1*t2)

t4 = np.array([[2,3,4],[1,1,1]])
t5= np.ones([3])
print(t4)
print(t5)
print(t4@t5)
print(np.zeros(t4.shape))

[0.52497918747894, 0.549833997312478, 0.574442516811659, 0.598687660112452, 0.6899744811276125, 0.7109495026250039]
[0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9]
[ 2 10 30]
[[2 3 4]
 [1 1 1]]
[1. 1. 1.]
[9. 3.]
[[0. 0. 0.]
 [0. 0. 0.]]
