### Neural Networks

In [1]:
from scratch.linear_algebra import Vector, dot

def step_function(x:float) -> float:
    return 1.0 if x >= 0 else 0.0

def perceptron_output(weights:Vector, bias:float,x:Vector) -> float:
    calculations = dot(weights, x) + bias
    return step_function(calculations)

In [2]:
#AND_Gate
and_weights = [2. , 2] 
and_bias = - 3.
assert perceptron_output(and_weights, and_bias, [1, 1] ) == 1 
assert perceptron_output(and_weights, and_bias, [0, 1] ) == 0 
assert perceptron_output(and_weights, and_bias, [1, 0] ) == 0
assert perceptron_output(and_weights, and_bias, [0, 0] ) == 0

In [3]:
#OR_Gate
or_weights = [2.,2]
or_bias = -1
assert perceptron_output(or_weights, or_bias, [1, 1] ) == 1 
assert perceptron_output(or_weights, or_bias, [0, 1] ) == 1
assert perceptron_output(or_weights, or_bias, [1, 0] ) == 1
assert perceptron_output(or_weights, or_bias, [0, 0] ) == 0

In [4]:
#NOT_Gate
not_weights = [-2.]
not_bias = 1.
assert perceptron_output(not_weights, not_bias, [0] ) == 1
assert perceptron_output(not_weights, not_bias, [1] ) == 0

<b>Building the Feed Forward Neural Networks</b>

In [5]:
import math

def sigmoid(t:float) -> float:  # a smooth acivation function
    return 1 / (1+math.exp(-t))

def neuron_output(weights:Vector, inputs:Vector) -> float:
    return sigmoid(dot(weights,inputs))

In [6]:
from typing import List

def feed_forward(neural_network: List[List[Vector]],
                 input_vector: Vector) -> List[Vector]:
    
    outputs: List[Vector] = []
    for layer in neural_network:
        input_with_bias = input_vector + [1]
        output = [neuron_output(neuron,input_with_bias)
                  for neuron in layer]
        outputs.append(output)
        input_vector = output
    return outputs

In [7]:
xor_network = [
    [[20.,20,-30],
    [20.,20,-10]],
    [[-60.,60,-30]]]

assert 0.000 < feed_forward(xor_network,[0,0])[-1][0] < 0.001
assert 0.999 < feed_forward(xor_network,[1,0])[-1][0] < 1.000
assert 0.999 < feed_forward(xor_network,[0,1])[-1][0] < 1.000
assert 0.000 < feed_forward(xor_network,[1,1])[-1][0] < 0.001

In [15]:
def sqerror_gradient(network:List[List[Vector]],
                     input_vector: Vector,
                     target_vector: Vector) -> List[List[Vector]]:
    hidden_outputs, outputs = feed_forward(network,input_vector)
    output_deltas = [output * (1-output) * (output-target)
                     for output, target in zip(outputs,target_vector)]
    
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i,output_neuron in enumerate(network[-1])]
    
    hidden_deltas = [hidden_output * (1-hidden_output) * 
                        dot(output_deltas,[n[i] for n in network[-1]])
                    for i,hidden_output in enumerate(hidden_outputs)]
    
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                   for i, hidden_neuron in enumerate(network[0])]
    return [hidden_grads,output_grads]

In [16]:
#Implementing xor gate with Backprop

import random
random.seed(0)

xs = [[0.,0],[0.,1],[1.,0],[1.,1]]
ys = [[0.],[1.],[1.],[0.]]

#starting with random weights

network = [
    [[random.random() for _ in range(2+1)],
      [random.random() for _ in range(2+1)]],
     [[random.random() for _ in range(2+1)]]
]

In [20]:
from scratch.gradient_descent import gradient_step
import tqdm

learning_rate = 1.0

for epoch in tqdm.trange(20000, desc='neural net for xor'):
    for x,y in zip(xs,ys):
        gradients = sqerror_gradient(network,x,y)
        network = [[gradient_step(neuron,grad,-learning_rate)
                    for neuron, grad in zip(layer, layer_grad)]
                   for layer,layer_grad in zip(network,gradients)]


neural net for xor: 100%|█████████████████████████████████████████████████████| 20000/20000 [00:01<00:00, 12002.28it/s]


In [22]:

assert feed_forward(network,[0,0])[-1][0] < 0.01
assert feed_forward(network,[0,1])[-1][0] > 0.99
assert feed_forward(network,[1,0])[-1][0] > 0.99
assert feed_forward(network,[1,1])[-1][0] < 0.01

<b>Example - </b><br>
Question - <br>
Print the numbers 1 to 100, except that if the number is divisible by 3 <br>print " fizz" ; if the number is divisible by 5, print " buzz" ;<br>
and if the number is divisible by 15, print " fizzbuzz" .<br>

In [26]:
def fizz_buzz_encode(x:int) -> Vector:
    if x % 15 == 0:
        return [0,0,0,1]
    elif x % 5 == 0:
        return [0,0,1,0]
    elif x % 3 == 0:
        return [0,1,0,0]
    else:
        return [1,0,0,0]

assert fizz_buzz_encode(2) == [1,0,0,0]
assert fizz_buzz_encode(6) == [0,1,0,0]
assert fizz_buzz_encode(10) == [0,0,1,0]
assert fizz_buzz_encode(30) == [0,0,0,1]

Using  a different approach

In [27]:
#converting numbers to binary representation

def binary_encode(x:int) -> Vector:
    binary: List[float] = []
    for i in range(10):
        binary.append(x%2)
        x = x//2
    return binary

In [28]:
# 1 2 4 8 16 32 64 128 256 512
assert binary_encode(0) == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
assert binary_encode(1) == [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
assert binary_encode(10) == [0, 1, 0, 1, 0, 0, 0, 0, 0, 0] 
assert binary_encode(101) == [1, 0, 1, 0, 0, 1, 1, 0, 0, 0]
assert binary_encode(999) == [1, 1, 1, 0, 0, 1, 1, 1, 1, 1]

In [29]:
xs = [binary_encode(n) for n in range(101,1024)]
ys = [fizz_buzz_encode(n) for n in range(101,1024)]

In [31]:
NUM_HIDDEN = 25
network = [
    [[random.random() for _ in range(10+1)] for _ in range(NUM_HIDDEN)],
    [[random.random()for _ in range(NUM_HIDDEN + 1)] for i in range(4)]
]

In [34]:
from scratch.linear_algebra import squared_distance
learning_rate = 1.0

with tqdm.trange(500) as t:
    for epoch in t:
        epoch_loss = 0.0
        for x,y in zip(xs,ys):
            predicted = feed_forward(network,x)[-1]
            epoch_loss += squared_distance(predicted,y)
            gradients = sqerror_gradient(network,x,y)
            
            network = [[gradient_step(neuron,grad,-learning_rate)
                        for neuron,grad in zip(layer,layer_grad)]
                      for layer, layer_grad in zip(network,gradients)]
        t.set_description(f'fizz buzz (loss:{epoch_loss:.2f})')

fizz buzz (loss:42.27): 100%|████████████████████████████████████████████████████████| 500/500 [02:30<00:00,  3.32it/s]


In [36]:
#getting the predictions - 

def argmax(xs:list) -> int:
    return max(range(len(xs)),key=lambda i: xs[i])

assert argmax([0,-1]) == 0
assert argmax([-1,0]) == 1
assert argmax([-1,10,5,20,-3]) == 3

In [38]:
#finally printing the fizz-buzz

num_correct = 0

for n in range(1,101):
    x = binary_encode(n)
    predicted = argmax(feed_forward(network,x)[-1])
    actual  = argmax(fizz_buzz_encode(n))
    labels = [str(n),'fizz','buzz','fizzbuzz']
    print(n,labels[predicted],labels[actual])
    if predicted == actual:
        num_correct += 1
    print(num_correct,'/',100)

1 1 1
1 / 100
2 2 2
2 / 100
3 fizz fizz
3 / 100
4 4 4
4 / 100
5 buzz buzz
5 / 100
6 fizz fizz
6 / 100
7 7 7
7 / 100
8 8 8
8 / 100
9 fizz fizz
9 / 100
10 buzz buzz
10 / 100
11 11 11
11 / 100
12 fizz fizz
12 / 100
13 13 13
13 / 100
14 14 14
14 / 100
15 fizzbuzz fizzbuzz
15 / 100
16 16 16
16 / 100
17 17 17
17 / 100
18 fizz fizz
18 / 100
19 19 19
19 / 100
20 buzz buzz
20 / 100
21 fizz fizz
21 / 100
22 22 22
22 / 100
23 23 23
23 / 100
24 fizz fizz
24 / 100
25 buzz buzz
25 / 100
26 26 26
26 / 100
27 fizz fizz
27 / 100
28 28 28
28 / 100
29 29 29
29 / 100
30 fizzbuzz fizzbuzz
30 / 100
31 31 31
31 / 100
32 32 32
32 / 100
33 fizz fizz
33 / 100
34 34 34
34 / 100
35 buzz buzz
35 / 100
36 fizz fizz
36 / 100
37 37 37
37 / 100
38 38 38
38 / 100
39 fizz fizz
39 / 100
40 buzz buzz
40 / 100
41 41 41
41 / 100
42 fizz fizz
42 / 100
43 43 43
43 / 100
44 44 44
44 / 100
45 fizzbuzz fizzbuzz
45 / 100
46 46 46
46 / 100
47 47 47
47 / 100
48 fizz fizz
48 / 100
49 49 49
49 / 100
50 buzz buzz
50 / 100
51 fizz fizz