# d-M-1 neural network with tanh-type neurons from scratch (classification problem)

In [1]:
import numpy as np
import pandas as pd

def loaddata(path):
    data = pd.read_csv(path, sep = '\s+', header = None)
    data = data.to_numpy()
    row, col = data.shape
    X = np.c_[np.ones((row, 1)), data[:, 0: col - 1]]
    Y = data[:, col - 1: col]
    return X, Y

Xtrain, Ytrain = loaddata('D:/Program Files/machine learning/Lin ML Tech/作业四/NNetTraining.txt')
Xtest, Ytest = loaddata('D:/Program Files/machine learning/Lin ML Tech/作业四/NNetTesting.txt')

# Initialize the shallow NN, forward prop and backward prop.

In [2]:
def initialtheta(d, M, r):                          #M: number of hidden neurons, d: input dimension, r: output dimension
    theta1 = np.random.uniform(-r, r, (d, M))
    theta2 = np.random.uniform(-r, r, (M + 1, 1))
    return theta1, theta2

def derivative_tanh(s):
    return 1 - np.tanh(s)**2

def nnetwork(X, Y, M, r, eta, T):
    row, col = X.shape
    theta1, theta2 = initialtheta(col, M, r)
    for i in range(T):
        randpos = np.random.randint(0, row)
        Xone = X[randpos:randpos + 1, :]
        Yone = Y[randpos]
        s1 = np.dot(Xone, theta1)
        x1 = np.tanh(s1)
        x1 = np.c_[np.ones((1, 1)), x1]
        s2 = np.dot(x1, theta2)
        x2 = np.tanh(s2)[0][0]
        delta2 = -2*(Yone - x2)
        delta1 = delta2*theta2[1:, :].T*derivative_tanh(s1)
        theta2 -= eta*x1.T*delta2
        theta1 -= eta*Xone.T.dot(delta1)
    return theta1, theta2

def errfunction(X, Y, theta):
    row, col = X.shape
    l = len(theta)
    x = X
    for i in range(l - 1):
        x = np.c_[np.ones((row, 1)), np.tanh(np.dot(x, theta[i]))]
    x2 = np.tanh(np.dot(x, theta[l - 1]))
    Yhat = x2
    Yhat[Yhat >= 0] = 1
    Yhat[Yhat < 0] = -1
    return np.sum(Yhat != Y)/row

# Test which NO. of hidden neurons gives the lowest error rate.

In [None]:
M = [1, 6, 11, 16, 21] # NO. of hidden neurons
Eout = np.zeros((len(M),))
for i in range(500):
    for j in range(len(M)):
        theta1, theta2 = nnetwork(Xtrain, Ytrain, M[j], 0.1, 0.1, 50000)
        theta = [theta1, theta2]
        Eout[j] += errfunction(Xtest, Ytest, theta)
print(Eout/500)

# Could kept testing a lot of parameters.

In [None]:
r = [0, 0.001, 0.1, 10, 1000]  #range of initialization of weights
Eout = np.zeros((len(r),))
for i in range(500):
    for j in range(len(r)):
        theta1, theta2 = nnetwork(Xtrain, Ytrain, 3, r[j], 0.1, 50000)
        theta = [theta1, theta2]
        Eout[j] += errfunction(Xtest, Ytest, theta)
print(Eout/500)