# Neural Network

## Perceptrons

In [1]:
from linearalgebra import Vector, dot

In [2]:
def step_function(x: float) -> float:
    return 1.0 if x >= 0 else 0.0

In [3]:
def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    ''' return 1 if the perceptron "fires",  0 if not'''
    calculation = dot(weights, x) + bias
    return step_function(calculation)

In [4]:
and_weights = [2., 2]
and_bias = -3.

In [5]:
perceptron_output(and_weights, and_bias, [1,1])

1.0

## Feed- forward neural networks

In [6]:
import math

def sigmoid(t: float) -> float:
    return 1/(1+math.exp(-t))

In [7]:
def neuron_output(weights: Vector, inputs: Vector) -> float:
    '''weights includes the bias term, inputs includes a 1'''
    return sigmoid(dot(weights, inputs))

In [8]:
from typing import List

In [9]:
def feed_forward(neural_network: List[List[Vector]],
                 input_vector: Vector) -> List[Vector]:
    '''
    Feeds the inputs vector through the neural network.
    Returns the outputs of all layers (not just the last one).'''
    
    outputs: List[Vector] = []
    
    for layer in neural_network:
        input_with_bias = input_vector + [1]  # Add a constant.
        output = [neuron_output(neuron, input_with_bias)
                  for neuron in layer]
        outputs.append(output)  # add to results.
        #  then the input to the next layers is the output of this one
        
        input_vector = output
        
    return outputs

In [10]:
xor_network = [# hidden layer
                [[20., 20, -30],
                [20., 20, -10]],
                [[-60., 60, -30]]]

In [11]:
feed_forward(xor_network, [1,0])

[[4.5397868702434395e-05, 0.9999546021312976], [0.9999999999999059]]

## Backpropagation

In [12]:
def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                         dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]

In [13]:
import random

In [14]:
random.seed(0)

In [15]:
xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
ys = [[0.], [1.], [1.], [0.]]

In [16]:
network = [[[random.random() for _ in range(2+1)],
            [random.random() for _ in range(2+1)]],
            [random.random() for _ in range(2+1)]]

In [17]:
from GradientDescent import gradient_step
import tqdm

In [18]:
learning_rate = 1.0

for epoch in tqdm.trange(20000, desc="neural net for xor"):
    for x, y in zip(xs, ys):
        gradients = sqerror_gradients(network, x, y)
        network = [[gradient_step(neuron, grad, -learning_rate)
                    for neuron, grad in zip(layer, layer_grad)]
                    for layer, layer_grad in zip(network, gradients)]

neural net for xor:   0%|                                                                    | 0/20000 [00:00<?, ?it/s]


TypeError: object of type 'float' has no len()

## Fizzbuzz

In [19]:
def fizz_buzz_encode(x: int) -> Vector:
    if x % 15 == 0:
        return [0, 0, 0, 1]
    elif x % 5 == 0:
        return [0, 0, 1, 0]
    elif x % 3 == 0:
        return [0, 1, 0, 0]
    else:
        return [1, 0, 0, 0]

In [20]:
print(fizz_buzz_encode(2))
print(fizz_buzz_encode(3))
print(fizz_buzz_encode(5))
print(fizz_buzz_encode(15))
print(fizz_buzz_encode(0))

[1, 0, 0, 0]
[0, 1, 0, 0]
[0, 0, 1, 0]
[0, 0, 0, 1]
[0, 0, 0, 1]


In [21]:
def binary_encode(x: int) -> Vector:
    binary: List[float] = []
    for i in range(10):
        binary.append(x % 2)
        x = x // 2
    return binary

In [22]:
print(binary_encode(2))
print(binary_encode(3))
print(binary_encode(5))
print(binary_encode(7))
print(binary_encode(15))
print(binary_encode(1001))

[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 1, 0, 1, 1, 1, 1, 1]


In [23]:
xs = [binary_encode(n)
      for n in range(101, 1024)]
ys = [fizz_buzz_encode(n)
      for n in range(101, 1024)]

In [24]:
import random

NUM_HIDDEN = 25

network = [# hidden layer: 10 inputs -> Num_hidden outputs
           [[random.random() for i in range(10 + 1)] for _ in range(NUM_HIDDEN)],
           [[random.random() for _ in range(NUM_HIDDEN + 1)]
            for _ in range(4)]
            ]

In [25]:
from linearalgebra import squared_distance

In [26]:
learning_rate = 1.0

with tqdm.trange(500) as t:
    for epoch in t:
        epoch_loss = 0.0
        for x, y in zip(xs, ys):
            predicted = feed_forward(network, x)[-1]
            epoch_loss += squared_distance(predicted, y)
            gradients = sqerror_gradients(network, x, y)
            
            network = [[gradient_step(neuron, grad, -learning_rate)
                       for neuron, grad in zip(layer, layer_grad)]
                        for layer, layer_grad in zip(network, gradients)]
            t.set_description(f"fizz buzz Loss: {epoch_loss:.2f}")

fizz buzz Loss: 30.14: 100%|█████████████████████████████████████████████████████████| 500/500 [20:11<00:00,  2.42s/it]


In [27]:
def argmax(xs: list) -> int:
    '''return the index of the largest value'''
    return max(range(len(xs)), key= lambda i: xs[i])

In [28]:
num_correct = 0

for n in range(1, 101):
    x = binary_encode(n)
    predicted = argmax(feed_forward(network, x)[-1])
    actual = argmax(fizz_buzz_encode(n))
    labels = [str(n), "fizz", "buzz", "fizzbuzz"]
    print(n, labels[predicted], labels[actual])
    
    if predicted == actual:
        num_correct += 1
    
print(num_correct, '/', 1000)

1 1 1
2 2 2
3 fizz fizz
4 4 4
5 buzz buzz
6 fizz fizz
7 7 7
8 8 8
9 fizz fizz
10 buzz buzz
11 11 11
12 fizz fizz
13 13 13
14 14 14
15 fizzbuzz fizzbuzz
16 16 16
17 17 17
18 fizz fizz
19 19 19
20 20 buzz
21 fizz fizz
22 22 22
23 23 23
24 fizz fizz
25 buzz buzz
26 26 26
27 fizz fizz
28 28 28
29 29 29
30 fizzbuzz fizzbuzz
31 31 31
32 32 32
33 fizz fizz
34 34 34
35 buzz buzz
36 fizz fizz
37 37 37
38 38 38
39 fizz fizz
40 buzz buzz
41 41 41
42 fizz fizz
43 43 43
44 44 44
45 fizzbuzz fizzbuzz
46 46 46
47 47 47
48 fizz fizz
49 49 49
50 buzz buzz
51 fizz fizz
52 52 52
53 53 53
54 fizz fizz
55 buzz buzz
56 56 56
57 fizz fizz
58 58 58
59 59 59
60 fizzbuzz fizzbuzz
61 61 61
62 62 62
63 fizz fizz
64 64 64
65 buzz buzz
66 fizz fizz
67 67 67
68 68 68
69 fizz fizz
70 buzz buzz
71 71 71
72 fizz fizz
73 73 73
74 74 74
75 fizzbuzz fizzbuzz
76 76 76
77 77 77
78 fizz fizz
79 79 79
80 80 buzz
81 fizz fizz
82 82 82
83 83 83
84 fizz fizz
85 fizz buzz
86 86 86
87 fizz fizz
88 88 88
89 89 89
90 fizzbuzz fizzbu