In [2]:
import numpy as np
streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1],
                         [0, 1, 1],
                         [1, 0, 1]])

walk_vs_stop = np.array([0, 1, 0, 1, 1, 0])

# streetlights -> neural network -> walk_vs_stop

In [3]:
weights = np.array([0.5, 0.48, -0.7])
alpha = 0.1

In [4]:
streetlights[0]

array([1, 0, 1])

In [5]:
# learn on 1 streetlight
input = streetlights[0]
goal_prediction = walk_vs_stop[0]

for iteration in range(20):
    prediction = input.dot(weights)
    error = (goal_prediction - prediction) ** 2
    delta = prediction - goal_prediction
    weights -= alpha * (input * delta)
    
    print("error:" + str(error) + " prediction:" + str(prediction))

error:0.03999999999999998 prediction:-0.19999999999999996
error:0.025599999999999973 prediction:-0.15999999999999992
error:0.01638399999999997 prediction:-0.1279999999999999
error:0.010485759999999964 prediction:-0.10239999999999982
error:0.006710886399999962 prediction:-0.08191999999999977
error:0.004294967295999976 prediction:-0.06553599999999982
error:0.002748779069439994 prediction:-0.05242879999999994
error:0.0017592186044416036 prediction:-0.04194304000000004
error:0.0011258999068426293 prediction:-0.03355443200000008
error:0.0007205759403792803 prediction:-0.02684354560000002
error:0.0004611686018427356 prediction:-0.021474836479999926
error:0.0002951479051793508 prediction:-0.01717986918399994
error:0.00018889465931478573 prediction:-0.013743895347199997
error:0.00012089258196146188 prediction:-0.010995116277759953
error:7.737125245533561e-05 prediction:-0.008796093022207963
error:4.951760157141604e-05 prediction:-0.007036874417766459
error:3.169126500570676e-05 prediction:-0.0

In [6]:
# all at once
weights = np.array([0.5, 0.48, -0.7])
alpha = 0.1

In [7]:
# stochastic gradient descent updates weights one example at a time:
for iteration in range(40):
    error_for_all_lights = 0
    for row_index in range(len(walk_vs_stop)):
        input = streetlights[row_index]
        goal_prediction = walk_vs_stop[row_index]
        prediction = input.dot(weights)
        error = (goal_prediction - prediction) ** 2
        error_for_all_lights += error
        
        delta = prediction - goal_prediction
        weights -= alpha * (input * delta)
        print("prediction:" + str(prediction))
        print("error:" + str(error_for_all_lights) + "\n")

prediction:-0.19999999999999996
error:0.03999999999999998

prediction:-0.19999999999999996
error:1.48

prediction:-0.5599999999999999
error:1.7935999999999999

prediction:0.616
error:1.941056

prediction:0.17279999999999995
error:2.62531584

prediction:0.17552
error:2.6561231104

prediction:0.14041599999999999
error:0.019716653055999997

prediction:0.3066464
error:0.50045586768896

prediction:-0.34513824
error:0.6195762723992576

prediction:1.006637344
error:0.619620326734632

prediction:0.4785034751999999
error:0.891578952113109

prediction:0.26700416768
error:0.9628701776715985

prediction:0.213603334144
error:0.04562638435743332

prediction:0.5347420299776
error:0.26209136302679775

prediction:-0.26067345110016
error:0.33004201113526527

prediction:1.131942884509696
error:0.34745093590800424

prediction:0.6274723921901568
error:0.48622775448852856

prediction:0.25433999330650114
error:0.5509165866836796

prediction:0.20347199464520094
error:0.041400852604896676

prediction:0.6561967

In [8]:
# learning rewards correlation. correlation can happen accidentally -> overfitting (heaviest weights don't go to best inputs)
# training on just 1 stoplight leads to overfitting but since we have the rest of our inputs we are gucci

# generalize vs memorize

# regularization forces weights with conflicting presure to move toward 0.

In [9]:
# if there's no correlation between input and output dataset, we can create another layer. our new layer (intermediate data) 
# will have correlation with our output layer 

In [None]:
# process of moving delta signal around is called "backpropagation".

# for any three-layer network we crate, there's a two-layer network that has identical behavior. we want our middle layer
# to sometimes correlate with an output and sometimes not correlate. "conditional correlation" or "sometimes correlation"
# to do this we can turn off nodes / set them to 0 when it would be negative (RELU BABY) - "nonlinearity".

# by turning off any middle node whenver it would be negative, we allow the network to sometimes subscribe to correlation
# from various inputs - this is impossible with a two-layer NN.

# without nonlinearity, two matrix multiplications might as well be 1.

In [13]:
# deep NN
np.random.seed(1)

def relu(x):
    """returns 0 for all negative values."""
    return (x > 0) * x 

alpha = 0.2
hidden_size = 4

streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1]])

walk_vs_stop = np.array([[1, 1, 0, 0]]).T

# randomly initialize weights
weights_0_1 = 2*np.random.random((3, hidden_size)) - 1
weights_1_2 = 2*np.random.random((hidden_size, 1)) - 1

layer_0 = streetlights[0]
layer_1 = relu(np.dot(layer_0, weights_0_1))
layer_2 = np.dot(layer_1, weights_1_2)

In [19]:
# backpropagation
def relu2deriv(output):
    """returns 1 for input > 0 and 0 otherwise."""
    return output>0

weights_0_1 = 2*np.random.random((3, hidden_size)) - 1
weights_1_2 = 2*np.random.random((hidden_size, 1)) - 1

for iteration in range(60):
    layer_2_error = 0
    for i in range(len(streetlights)):
        layer_0 = streetlights[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1])**2)
        layer_2_delta = (layer_2 - walk_vs_stop[i:i+1])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1) # reul2deriv is like an on/off
        
        weight_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weight_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_1_2 -= alpha * weight_delta_1_2
        weights_0_1 -= alpha * weight_delta_0_1
        
    if (iteration % 10 == 9):
        print("error:" + str(layer_2_error))
    

error:0.805971789248886
error:0.11685362533240146
error:0.02434571722428782
error:0.006908063506375486
error:0.002980621140921236
error:0.0016097841590370096


In [None]:
# why do we need intermediate datasets?
# if we have dataset of images with cats and without cats, no individual pixel correlates with whether there's a cat
# in the picture. only different configurations of pixels correlate with whether there's a cat.

# deep learning is about creating intermediate layers (datasets) wherein each node in an intermediate layer represents
# the presence or absence of a different configuration of inputs

# this way , no individual pixel has to correlate with whether there's a cat or not. instead middle layer will attempt
# to identify different configs of pixels that may or may not correlate with a cat. The precense of many cat-like configs
# will then giv final layer the info (correlation) it needs to predict presense or absence of un gato.

In [43]:

# from mem...




















def relu(x):
    return (x > 0) * x


def relu_deriv(x):
    return x > 0



alpha = 0.2
hidden_size = 4

streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1]])

walk_vs_stop = np.array([[1, 1, 0, 0]]).T


weights_0_1 = 2*np.random.random((3, hidden_size)) - 1 # [-1, 1] scaling 
weights_1_2 = 2*np.random.random((hidden_size, 1)) - 1 

for iteration in range(60):
    error = 0
    for i in range(len(streetlights)):
        layer_0 = streetlights[i:i+1] # input
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((layer_2 - walk_vs_stop[i:i+1])**2)
        layer_2_delta = layer_2 - walk_vs_stop[i:i+1]
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu_deriv(layer_1)
        
        weight_delta_1_2 = layer_1.T.dot(layer_2_delta)
        weight_delta_0_1 = layer_0.T.dot(layer_1_delta)
        
        weights_1_2 -= alpha * weight_delta_1_2
        weights_0_1 -= alpha * weight_delta_0_1
        
    if (iteration % 10 == 9):
        print("error:" + str(error))


error:1.0949028990945844
error:0.8016570571851256
error:0.46426088850161695
error:0.15815346101556205
error:0.00825906290291474
error:4.40363376554635e-05


In [None]:
# scratch paper below..

In [33]:
ex = np.random.random((3, hidden_size))
print(ex) # [0, 1]
ex = 2 * ex
print(ex) # [0, 2]
ex = ex - 1
print(ex) # [-1, 1]

[[0.92618143 0.91873344 0.39487561 0.96326253]
 [0.17395567 0.12632952 0.13507916 0.50566217]
 [0.02152481 0.94797021 0.82711547 0.01501898]]
[[1.85236285 1.83746687 0.78975123 1.92652506]
 [0.34791133 0.25265904 0.27015832 1.01132433]
 [0.04304961 1.89594042 1.65423094 0.03003796]]
[[ 0.85236285  0.83746687 -0.21024877  0.92652506]
 [-0.65208867 -0.74734096 -0.72984168  0.01132433]
 [-0.95695039  0.89594042  0.65423094 -0.96996204]]


In [28]:
streetlights = np.array([[1, 0, 1],
                         [0, 1, 1],
                         [0, 0, 1],
                         [1, 1, 1]])

print(streetlights[0])
print(streetlights[0:1])

[1 0 1]
[[1 0 1]]


In [29]:
print(streetlights[1])
print(streetlights[1:2])

[0 1 1]
[[0 1 1]]


In [34]:
print(streetlights[1:3])

[[0 1 1]
 [0 0 1]]


In [35]:
print(streetlights[1:4])

[[0 1 1]
 [0 0 1]
 [1 1 1]]
