This is an simplification of our neural network problem. Instead of
converting an 28x28 image to a 784 network layer to and let it figure out which number it is, we want the network to learn addition.

1. We start with the most easiest case of doing addition, addition with 0.
- For learning purposes we create a very simple neural network [1,1,1]. This way core concepts become more apparent. 
2. Then We let it learn addition with 2 random numbers

# Addition with 0

In [7]:
# Only need it for vector multiplication
import numpy as np

class Network_1_D(object):

    def __init__(self, sizes):
        """
        Intitalizing  1 dimensional network of size
        1 -> 1 -> 1
        """
        self.num_layers = len(sizes)
        self.sizes = sizes
        # Deterministically
        self.biases = [0.2, 0.52]
        self.weights = [0.4, 0.7]

    def SGD(self, x, y, eta, epochs):
        """
        Train the neural network using stochastic gradient descent
        """
        for _ in range(epochs):
            # Starting input is first layer of network
            a = x
            # Adjustments for weights and biases
            nabla_b = [0, 0]
            nabla_w = [0, 0]
            activations = [a]
            # storing all z vectors layer, by layer for backpropagation
            zs = []
            # Feedforwarding until we arrive at Final layer
            for b, w in zip(self.biases, self.weights):
                z = (w * a) + b
                zs.append(z)
                a = sigmoid(z)
                activations.append(a)
            # Mathematically the derivative will have a 2 here
            # But we can leave it out because we can just adjust the learning rate
            delta = (a - y) * sigmoid_prime(zs[-1])  # EQ (30) / (BP1)
            delta_C_b = delta  # EQ (BP 3)
            delta_C_w = activations[-2] * delta  # EQ (BP4)
            # Starting to store all the adjustments from the back
            nabla_b[-1] = delta_C_b
            nabla_w[-1] = delta_C_w
            # Starting backprogation now
            for l in range(2, self.num_layers):
                z = zs[-l]
                w = self.weights[-l+1]
                delta = w * delta * sigmoid_prime(z)  # EQ (BP2)
                nabla_b[-l] = delta  # EQ (BP3)
                nabla_w[-l] = activations[-l-1] * delta  # EQ (BP4)

            # Now adjusting weights and biases to deliver a better result next epoch
            new_weights = []
            for w, nw in zip(self.weights, nabla_w):
                new_weight = w - (eta * nw)
                new_weights.append(new_weight)

            new_biases = []
            for b, nb in zip(self.biases, nabla_b):
                new_bias = b - (eta * nb)
                new_biases.append(new_bias)
            self.weights = new_weights
            self.biases = new_biases
            # Cost of the network is defined as C = (a - y) ** 2
            # print("w", self.weights, "b", self.biases)
            # print("a", a, "Cost", Cost)
        return a


def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0 + np.exp(-z))


def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

As you can see we are comming very close to 1. However this approach comes with a number of meaningful asterixes. 
And should not be considered sophisticated but rather as a good learning example.

In [10]:
net = Network_1_D([1, 1, 1])
training_data = 1
excpected_output = 1
learning_rate = 1
print("result", net.SGD(x=training_data, y=excpected_output, eta=learning_rate, epochs=200))

result 0.9590591854736482


# Learning Addition with 2 numbers

In [3]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))
from neuralnets import network
import numpy as np

def vectorized_result(j, l):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((l, 1))
    e[j] = 1.0
    return e

def add_two_numbers():
    td_size = 1000
    num_arrays = [np.random.choice(range(10), replace=True, size=(2, 1)) for x in range(td_size)]
    # Training data has the shape [(2,1),(19,1)], as the second item is the sum represented in array form
    training_data = [(num_array, vectorized_result(j=np.sum(num_array), l=19)) for num_array in num_arrays[:td_size-100]]
    # Note how we are not vectoring but just return the summation
    # Test data has the shape (2,1), 1, as the second item is just the number 0-18 of the sum
    test_data = [(num_array, np.sum(num_array)) for num_array in num_arrays[td_size-100:]]
    net = network.Network([2, 19, 19])
    print("training network")
    
    print("correct results", net.SGD(training_data=training_data, test_data=test_data,eta=1, epochs=100, mini_batch_size=10, should_print=False, mean=True))
    
add_two_numbers()

training network
correct results 0.32550000000000007


Results vary around 30% and I am sure with a different structure one should be able to get it up to 99%. There are many different approaches one can take in representing these numbers (e.g binary). 

Afterall note that by using perceptrons one can very easly train a network to learn addition. 

[More info here](http://neuralnetworksanddeeplearning.com/chap1.html#perceptrons)
