In [18]:
from helpers.data import load_synth

import random
import math
import sys

In [19]:
class Activations:

    @staticmethod
    def sigmoid(gamma):
        if gamma < 0.0:
            return 1.0 - 1.0 / (1.0 + float(math.exp(gamma)))

        return 1.0 / (1.0 + float(math.exp(-gamma)))

    @staticmethod
    def softmax(current, values):
        denominator = 0.0

        for i in range(len(values)):
            denominator += float(math.exp(values[i]))

        return float(math.exp(current)) / denominator


In [20]:
class Losses:

    @staticmethod
    def cross_entropy(predicted, targets):
        loss = 0.0

        for i in range(0, len(predicted)):
            loss += float(targets[i]) * float(math.log(predicted[i]))
        
        return  -1.0 * loss

![Img] ("files/neural_net.png")

In [56]:
class Network:
    def __init__(self):
        self.X = None
        self.T = None
        self.initialize()
    
    def predict(self, X: list) -> [ float, float ]:
        self.X = X
        return self.forwards()

    def set_input(self, input: list) -> None:
        self.X = input

    def set_target(self, target: list) -> None:
        self.T = target
   
    def set_learning_rate(self, learning_rate: int) -> None:
        self.learning_rate = learning_rate
    
    def initialize_weight(self) -> float:
        return random.gauss(0, 1)
    
    def initialize(self) -> None:
        # First set of weights mapping X to K
        self.W = [
          [ self.initialize_weight(), self.initialize_weight(), self.initialize_weight() ],
          [ self.initialize_weight(), self.initialize_weight(), self.initialize_weight() ]
        ]

        # Bias for the first layer
        self.B = [ 0.0, 0.0, 0.0 ]

        # Linear outputs for the first hidden layer
        self.K = [ 0.0, 0.0, 0.0 ]

        # Output of the sigmoid function applied to K, H = sigmoid(K)
        self.H = [ 0.0, 0.0, 0.0 ]

        # Second set of weights mapping H to the linear output being fed to the Softmax function
        self.V = [
            [ self.initialize_weight(), self.initialize_weight() ], 
            [ self.initialize_weight(), self.initialize_weight() ], 
            [ self.initialize_weight(), self.initialize_weight() ]
        ]

        # Second set of biases
        self.C = [ 0.0, 0.0 ]

        # Linear output fed to the Softmax function
        self.O = [ 0.0, 0.0 ]

        # One hot encoded output of the softmax function
        self.Y = [ 0.0, 0.0 ]

    def forwards(self) -> None:
        # Calculate K = X * W + B
        for i in range(len(self.X)):
            for j in range(len(self.W[0])):
                self.K[j] += self.X[i] * self.W[i][j]

        for i in range(len(self.B)):
            self.K[i] += self.B[i]

        # Calculate H = sigmoid(K)
        for i in range(len(self.K)):
            self.H[i] = Activations.sigmoid(self.K[i])

        # Calculate O = H * V + C
        for i in range(len(self.H)):
            for j in range(len(self.V[0])):
                self.O[j] += self.H[i] * self.V[i][j]

        for i in range(len(self.C)):
            self.O[i] += self.C[i]

        # Calculate Y = softmax(O)
        self.Y = [ Activations.softmax(self.O[0], self.O), Activations.softmax(self.O[1], self.O) ]

        # return the predicted value
        return self.Y

    def calculate_gradients(self) -> None:
        try:
#             @todo it doesn't seemt like this value actually ever gets used 
#             self.dLdY = [0.0, 0.0]

#             # Calculate the derivative of the loss (L) wrt to Y
#             for i in range(len(self.Y)):
#                 self.dLdY[i] = -1.0 * float(self.T[i]) / float(self.Y[i])

            # Calculate the gradient of the loss (L) wrt to the linear outputs
            self.dLdO = [0.0, 0.0]
            for i in range(len(self.O)):
                self.dLdO[i] = self.Y[i] - self.T[i]

            # Calculate the gradient of the loss (L) wrt to the bias C
            self.dLdC = self.dLdO

            # Calculate the gradient of the loss (L) wrt to the hidden layer H
            self.dLdH = [0.0, 0.0, 0.0]

            for i in range(len(self.V)):
                for j in range(len(self.O)):
                    self.dLdH[i] += self.dLdO[j] * self.V[i][j]

            # Calculate the gradient of the loss (L) wrt to weights V of the hidden layer H
            self.dLdV = [
              [0.0, 0.0],
              [0.0, 0.0],
              [0.0, 0.0]
            ]

            for i in range(len(self.H)):
                for j in range(len(self.O)):
                    self.dLdV[i][j] = self.dLdO[j] * self.H[i]

            # Calculate the gradient of the loss (L) wrt to K
            self.dLdK = [ 0.0, 0.0, 0.0 ]
            for i in range(len(self.dLdH)):
                self.dLdK[i] = Activations.sigmoid(self.dLdH[i]) * Activations.sigmoid(1 - self.dLdH[i])

            # Calculate the gradient of the loss (L) wrt to the bias B
            self.dLdB = self.dLdK

            # Calculate the gradient of the loss (L) wrt to weights W
            self.dLdW = [
              [0.0, 0.0, 0.0],
              [0.0, 0.0, 0.0]
            ]

            for i in range(len(self.dLdW)):
                for j in range(len(self.dLdK)):
                    self.dLdW[i][j] = self.dLdK[j] * self.X[i]
        except:
            print(sys.exc_info())
            print(self.Y)
            print(self.T)

    def step(self):
        # Update C based on the learning rate and dL / dC
        for i in range(len(self.C)):
            self.C[i] = self.C[i] - self.learning_rate * self.dLdC[i]

        # Update V based on the learning rate and dL / dV
        for i in range(len(self.V)):
            for j in range(len(self.V[i])):
                self.V[i][j] = self.V[i][j] - self.learning_rate * self.dLdV[i][j]

        # Update B based on the learning rate and dL / dB
        for i in range(len(self.dLdB)):
            self.B[i] = self.B[i] - self.learning_rate * self.dLdB[i]
            
        # Update W based on the learning rate and dL / dW
        for i in range(len(self.W)):
            for j in range(len(self.W[i])):
                self.W[i][j] = self.W[i][j] - self.learning_rate * self.dLdW[i][j]

    def train(self, data: tuple, epochs: int):
        inputs = data[0]
        targets = data[1]
        
        for num_epoch in range(epochs):
            for i in range(len(inputs)):
                self.set_input(inputs[i])

                one_hot_encoded_target = [ 0.0, 0.0 ]
                one_hot_encoded_target[targets[i]] = 1.0

                self.set_target(one_hot_encoded_target)

                self.forwards()

                # calculate gradients
                self.calculate_gradients()

                # take a step on the gradients
                self.step()
                # return self.W, self.B, self.V, self.C
    

In [58]:
# epoch implies a full pass over the entire dataset
epochs = 50
learning_rate = 0.03

data = load_synth()
training_data = data[0]
validation_data = data[1]

network = Network()
network.set_learning_rate(learning_rate)
network.train(training_data, epochs)

# calculating the loss
print(training_data[0][0])
print(network.predict(training_data[0][0]))
print(training_data[1][0])

print(training_data[0][1])
print(training_data[1][1])

OverflowError: math range error