In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts
from random import seed
from random import random

seed(1)

dataset = pd.read_csv('aggregated_data.csv', header=None).values.tolist()[1:][0:500]

X = np.array(dataset)[:, 0:-1]
t = np.array(dataset)[:, -1].astype(np.float64)

X_train, X_rem, t_train, t_rem = tts(X, t, test_size = 0.6, random_state = 6590)
X_valid, X_test, t_valid, t_test = tts(X_rem, t_rem, test_size=0.5)

sc = StandardScaler()
X_train = sc.fit_transform(X_train) 
X_test = sc.transform(X_test) 
X_valid = sc.transform(X_valid)  


In [10]:
# Initialize a network with 1 hidden layer
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    
    #Puts in random weights for the connections in the hidden layer
    #Includes the weights plus the bias
    hidden_layer = np.zeros(( n_hidden, n_inputs + 1))
    for i in range(n_hidden):
        for j in range(n_inputs + 1):
            hidden_layer[i,j] = random()
                    

    network.append(hidden_layer)
    
    #Random weights to the output layer
    output_layer = np.zeros(( n_output, n_hidden + 1))
    for i in range(n_output):
        for j in range(n_hidden + 1):
            output_layer[i,j] = random()
 
    network.append(output_layer)
    
    return network

#ReLy activation for hidden layers
def ReLu(z):
    if (z.size == 1):
        if z <= 0:
            return 0   
    else:
        for i in range(len(z)):
            if z[i] <= 0:
                z[i] = 0
    return z

#Sigmoid activation for output layer
def sigmoid(z):    
    return  1/ (1.0 + np.exp(-z));

# Forward propagate input to the next network layer
def forward_propagate(network, example):
    #The added one is the bias weight
    inputs = np.copy(example)
    h_output = []
    #first is input, then hidden (possibly hidden again), then output
    i = 0
    for weights in network:
        
        inputs = np.append(1, inputs)

        print(weights)
        print(inputs)
        z = weights @ inputs


        if ((network[-1][0, 1] == weights[0, 1])):
            h_output.append(sigmoid(z))
        else:
            h_output.append(ReLu(z))
        
        inputs = h_output[i]
        
        i += 1 
    return h_output #Becomes the input for the next

In [11]:
#Shuffles data at start of every training epoch
def shuffleData(X, t):
    assert X.shape[0] == len(t)
    p = np.random.permutation(len(t))
    return X[p], t[p]

#Computers the gradients of the weight vectors during back propogation for 1 hidden layer
def computeGradW_J_1hidden(network, h_outputs, X, t):
    delZ2_J = np.array([-t + h_outputs[-1]])   #(Computing Eq. 5 from Notes)
                        
    h1 = np.array([h_outputs[0]]).T
    
    #excluding the biases
    W2 = network[-1][0:, 1:].T
    W1 = network[0][0:, 1:].T

    delZ1_J = np.multiply(h1, W2) * delZ2_J  #(Computing Eq. 3 from Notes)

    X_1 = np.array([np.append(1, X)])
    h1_modified = np.array([np.append(1, h1.T[0])])

    gradW2J = delZ2_J @ h1_modified #(Computing Eq. 7 from Notes)
    gradW1J = delZ1_J @ X_1 #(Computing Eq. 2 from Notes)
    
    return [gradW1J, gradW2J]

#Update the weights using the gradients as shown in slide 23
def update_weights(network, old_network, gradW_J, alpha):
    for W in range(len(network)):
        network[W] = old_network[W] - alpha*gradW_J[W]
    return network

#Trains the network iterating through epochs and training values
def train_network(network, X, X_valid, t, t_valid, alpha, n_epoch, early_stopping, isN1N2):
    training_error = []
    validation_error = []
    
    min_training_error = 0
    
    lowest_weights_network = network
    
    for epoch in range(n_epoch):
        training_cross_entropy = 0
        validation_cross_entropy = 0

        f_x_w = np.zeros(X.shape[0])
        f_x_w_v = np.zeros(X_valid.shape[0])
        
        X, t = shuffleData(X, t)
        X_valid, t_valid = shuffleData(X_valid, t_valid)
    
        #For each epoch, forward propgate through every training example
        #Then evaluate the gradients
        #Perform back propogating, then update the weights
        for i in range(X.shape[0]):
            h_out = forward_propagate(network, X[i, :]) #The g(z) values output
            
            if (not isN1N2):
                gradW_J_all = computeGradW_J_1hidden(network, h_out, X[i, :], t[i])
            else: 
                gradW_J_all = computeGradW_J_2hidden(network, h_out, X[i, :], t[i])
                
            f_x_w[i] = h_out[-1]
            
            
            if (i < X_valid.shape[0]):
                h_out_v = forward_propagate(network, X_valid[i, :])
                f_x_w_v[i] = h_out_v[-1]
            
            
            old_network = network
            print(i)
            print("Network", network)
            print("H OUTPUT", h_out)          
            network = update_weights(network, old_network, gradW_J_all, alpha)
        
        
            
        #Calucating and storign cross entropy losses
        training_cross_entropy =  -sum(t[0:] * np.log(f_x_w[0:]) + (1 - t[0:])*np.log(1 - f_x_w[0:])) / len(t)
        training_error.append(float(training_cross_entropy))
        
        
        validation_cross_entropy =  -sum(t_valid[0:] * np.log(f_x_w_v[0:]) +(1 - t_valid[0:])*np.log(1 - f_x_w_v[0:])) /len(t_valid) 
        validation_error.append(float(validation_cross_entropy))
        
        #Early stopping
        if (validation_cross_entropy <= min(validation_error)):
            min_validation_error = validation_cross_entropy
            lowest_weights_network = network
    
            
    return training_error, validation_error, n_epoch, lowest_weights_network
        #
    

In [13]:

n_input = 8
n_output = 1
alpha = 0.005
epochs = 4

early_stopping = 10

min_validation_errors = []
min_training_errors = []

all_networks  = []
all_validation_errors = []
all_training_errors = []

#Runs through 10 nodes for 1 hidden layer

for n_hidden in range (69, 70):
    
    network = initialize_network(n_input, n_hidden, n_output)
    training_error, validation_error, epochs, network = train_network(network, X_train, X_valid, t_train, t_valid, alpha, epochs, early_stopping, False)
    
    #print("Final Networks", network)
        
    min_validation_errors.append(min(validation_error))
    min_training_errors.append(min(training_error))
    
    all_validation_errors.append(validation_error)
    all_training_errors.append(training_error)
    
    all_networks.append(network)
    
    #print(epochs)
    print(len(training_error))




ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 8 is different from 9)

In [None]:
lowest_index = np.argmin(np.array(min_validation_errors))
print("N1 with the best performance is: " + str(lowest_index + 1))


print(min_training_errors)
print(min_validation_errors)

plt.scatter(range(0, epochs), all_training_errors[lowest_index], color = 'blue', label='training err')
plt.scatter(range(0, epochs), all_validation_errors[lowest_index], color = 'm', label='validation err')
plt.title('Cost function')
plt.legend()
plt.ylabel('cost')
plt.xlabel('iteration number')
plt.title('N Hidden: ' + str(lowest_index+1))
plt.show()