## NNCL ASSIGNMENT 3

In [15]:
import numpy as np
import pandas as pd

In [16]:
class Network:
    def __init__(self, P, N, K, learning_rate):
        # P = amount of input samples
        self.P = P
        # N = amount of neurons in the input layer
        self.N = N
        # K = amount of neurons in the hidden layer
        self.K = K
        # eta = learning rate
        self.eta = learning_rate
        # W = weights form input to hidden layers
        self.W = self.initWeights()
        # V = weights from hidden layer to output, fixed to 1
        self.V = np.ones((1, self.K))

    def initWeights(self):
        
        # Generate random vectors and normalize each vector to have a norm of 1
        weights = np.random.randn(self.N, self.K)
        norms_squared = np.linalg.norm(weights, axis=1, keepdims=True)**2
        normalized_weights = weights / norms_squared

        return normalized_weights
    
    def forwardPass(self, x):
        """
        Tanh activation function. 
        """

        # Calculate the dot product of the first-layer weights and the input.
        dot_product = [0, 0]
        dot_product[0] = np.dot(self.W[:, 0], x)
        dot_product[1] = np.dot(self.W[:, 1], x)
        # Apply hyperbolic tangent element-wise and sum for sigma. 
        tanh_result = np.tanh(dot_product)
        sigma = np.sum(self.V * tanh_result)

        # Sigma is the output of the network for a given input x
        return sigma

    def stochasticGradientDescent(self, sigma, xi, tau):
        """
        Stochastic gradient descent
        """
        # Use the gradient with respect to its contribution to the error
        gradient1 = (sigma - tau) * (1 - np.tanh(np.dot(self.W[:, 0], xi))**2)
        gradient2 = (sigma - tau) * (1 - np.tanh(np.dot(self.W[:, 1], xi))**2)
        # Update the weights
        self.W[:, 0] = self.W[:, 0] - self.eta * gradient1 * xi
        self.W[:, 1] = self.W[:, 1] - self.eta * gradient2 * xi
    
    def calculateError(self, sigma, tau):
        # Error is the quadratic difference between sigma (network output) and
        # tau (target value)
        return ((sigma - tau)**2)/2


    def train(self, t_max, train_set, test_set):
        """
        Train the network using stochastic gradient descent. 
        """
        # Select a random sample from the train_set, and perform a forward pass.
        # Then, update the weights using the SGD algorithm.
        # Run for t_max * P iterations.
        # Select a random sample from the train_set, make sure that for each t,
        # all samples are used, but in random order.
        for epoch in range(t_max):
            # For each epoch, keep track of the error and print the average
            # error for the epoch.
            epoch_error = 0
            epoch_error_test = 0
            for p in np.random.permutation(len(train_set)):
                xi, tau = train_set[p]
                sigma = self.forwardPass(xi)
                epoch_error += self.calculateError(sigma, tau)
                self.stochasticGradientDescent(sigma, xi, tau)
            epoch_error /= len(train_set)

            for p in np.random.permutation(len(test_set)):
                xi, tau = test_set[p]
                sigma = self.forwardPass(xi)
                epoch_error_test += self.calculateError(sigma, tau)
            epoch_error_test /= len(test_set)

            print("Epoch: {}, Error: {} Test Error: {}".format(epoch, epoch_error, epoch_error_test))


# Inputs
xi = pd.read_csv("data/xi.csv", delimiter=',', header=None)
# Labels
tau = pd.read_csv("data/tau.csv", delimiter=',', header=None)

dataset = [(xi[i], tau[i]) for i in range(len(xi))]

# P = amount of input samples
P = len(xi)
# N = input dimensionality
N = 50
# K = amount of neurons in the hidden layer
K = 2

# Take only the first 100 samples
train_set = dataset[:100]
test_set = dataset[100:200]

network = Network(P=P, N=N, K=K, learning_rate=0.05)
network.train(t_max=100, train_set=train_set, test_set=test_set)

ZeroDivisionError: division by zero