### Gradient Descent: Generalized

In [85]:
def neural_network(X, W):
    return X.dot(W)

def error_function(P, GT):
    return (P-GT)**2

# Calculating the weight derivative
def weight_derivative(P, GT, X):
    dE_dP = (P-GT)
    dP_dW = X
    dE_dW = dE_dP * dP_dW
    return dE_dW

def weight_delta(P, GT, X):
    return np.multiply(X,(P-GT))

def update_weights(alpha, weight_deltas, W):
    for weight in range(len(W)):
        W[weight] -= alpha * weight_deltas[weight]
    return W

def error_optimization(X, W, GT, epochs, alpha):
    errors = []
    weights = []
    for epoch in range(epochs):
        P = neural_network(X, W)
        E = error_function(P, GT)
        errors.append(E)
        weights.append(W.copy())
        WD = weight_delta(P, GT, X)
        W = update_weights(alpha, WD, W)
        print(f"Epoch {epoch} | Prediction {P} | Error {E}")
        print(f"Weights {W} | Weight Deltas {WD}")
    return errors,weights

In [86]:
import numpy as np
W = [0.1, 0.2, -.1] 
X = np.array([8.5,0.65,1.2])
W = np.array(W)
GT = 1
alpha = 0.01


errors, weights = error_optimization(X, W, GT, 300, 0.01)


Epoch 0 | Prediction 0.8600000000000001 | Error 0.01959999999999997
Weights [ 0.1119   0.20091 -0.09832] | Weight Deltas [-1.19  -0.091 -0.168]
Epoch 1 | Prediction 0.9637574999999999 | Error 0.0013135188062500048
Weights [ 0.11498061  0.20114558 -0.09788509] | Weight Deltas [-0.30806125 -0.02355763 -0.043491  ]
Epoch 2 | Prediction 0.9906177228125002 | Error 8.802712522307997e-05
Weights [ 0.11577811  0.20120656 -0.0977725 ] | Weight Deltas [-0.07974936 -0.00609848 -0.01125873]
Epoch 3 | Prediction 0.997571162993086 | Error 5.899249206154892e-06
Weights [ 0.11598456  0.20122235 -0.09774336] | Weight Deltas [-0.02064511 -0.00157874 -0.0029146 ]
Epoch 4 | Prediction 0.9993712348198351 | Error 3.953456517877471e-07
Weights [ 0.116038    0.20122644 -0.09773581] | Weight Deltas [-0.0053445  -0.0004087  -0.00075452]
Epoch 5 | Prediction 0.9998372284139849 | Error 2.6494589213863218e-08
Weights [ 0.11605184  0.20122749 -0.09773386] | Weight Deltas [-0.00138356 -0.0001058  -0.00019533]
Epoch 

### Gradient Descent: Freezing one weight

In [83]:
def frozen_update_weights(alpha, weight_deltas, W):
    for weight in range(len(W)):
        W[weight] -= alpha * weight_deltas[weight]
    W[0] = 0
    return W

def error_optimization(X, W, GT, epochs, alpha):
    errors = []
    weights = []
    for epoch in range(epochs):
        P = neural_network(X, W)
        E = error_function(P, GT)
        errors.append(E)
        weights.append(W.copy())
        WD = weight_delta(P, GT, X)
        W = frozen_update_weights(alpha, WD, W)
        print(f"Epoch {epoch} | Prediction {P} | Error {E}")
        print(f"Weights {W} | Weight Deltas {WD}")
    return errors,weights

In [84]:
W = [0, 0.2, -.1] 
X = np.array(X)
W = np.array(W)
GT = 1
alpha = 0.03


errors, ws = error_optimization(X, W, GT, 100, 0.03)

Epoch 0 | Prediction 0.010000000000000009 | Error 0.9801
Weights [ 0.        0.219305 -0.06436 ] | Weight Deltas [-8.415  -0.6435 -1.188 ]
Epoch 1 | Prediction 0.06531625000000002 | Error 0.8736337125140625
Weights [ 0.          0.23753133 -0.03071139] | Weight Deltas [-7.94481188 -0.60754444 -1.1216205 ]
Epoch 2 | Prediction 0.11754170453124999 | Error 0.7787326432416116
Weights [0.         0.25473927 0.00105711] | Weight Deltas [-7.50089551 -0.57359789 -1.05894995]
Epoch 3 | Prediction 0.1668490617905664 | Error 0.6941404858392594
Weights [0.         0.27098571 0.03105055] | Weight Deltas [-7.08178297 -0.54154811 -0.99978113]
Epoch 4 | Prediction 0.21340137046301852 | Error 0.6187374039894574
Weights [0.         0.28632439 0.0593681 ] | Weight Deltas [-6.68608835 -0.51128911 -0.94391836]
Epoch 5 | Prediction 0.25735256888839736 | Error 0.5515252069366626
Weights [0.         0.30080601 0.08610341] | Weight Deltas [-6.31250316 -0.48272083 -0.89117692]
Epoch 6 | Prediction 0.29884799410

This is an extremely important point. When one weight finds the minima, all weights find the minima. The fact that we converged with one frozen weight points to an extremely damaging property of neural networks. An input variable might be a very powerful predictor, but if the neural network accidentally figures out how to predict accurately without using a particular input variable, then it will never learn to incoporate it into production.

The way the neural network reaches the minima is by moving the error-weights curve because it is unable to move the frozen weight.

### Gradient Descent with Multiple Outputs

In [99]:
W = np.array([0.3, 0.2, 0.9])

X = np.array([0.65])
GT = [0.1,1,0.1]

def neural_network(X,W):
    return X*W

def error_function(P, GT):
    return (P-GT)**2

def error_optimization(X, W, GT, epochs, alpha):
    errors = []
    weights = []
    for epoch in range(epochs):
        P = neural_network(X, W)
        E = error_function(P, GT)
        errors.append(E)
        weights.append(W.copy())
        WD = weight_delta(P, GT, X)
        W = update_weights(alpha, WD, W)
        print(f"Epoch {epoch} | Prediction {P} | Error {E}")
        print(f"Weights {W} | Weight Deltas {WD}")
    return errors,weights

In [101]:
alpha = 0.01
errors, ws = error_optimization(X, W, GT, 1000, 0.03)

Epoch 0 | Prediction [0.12652998 0.7570412  0.23544255] | Error [0.00070384 0.05902898 0.01834468]
Weights [0.19414418 1.16941646 0.35957818] | Weight Deltas [ 0.01724449 -0.15792322  0.08803766]
Epoch 1 | Prediction [0.12619372 0.7601207  0.23372582] | Error [0.00068611 0.05754208 0.01788259]
Weights [0.1936334  1.1740941  0.35697053] | Weight Deltas [ 0.01702592 -0.15592155  0.08692178]
Epoch 2 | Prediction [0.12586171 0.76316117 0.23203084] | Error [0.00066883 0.05609263 0.01743214]
Weights [0.1931291  1.17871246 0.35439593] | Weight Deltas [ 0.01681011 -0.15394524  0.08582005]
Epoch 3 | Prediction [0.12553391 0.7661631  0.23035735] | Error [0.00065198 0.0546797  0.01699304]
Weights [0.19263119 1.18327228 0.35185396] | Weight Deltas [ 0.01659704 -0.15199398  0.08473228]
Epoch 4 | Prediction [0.12521027 0.76912698 0.22870507] | Error [0.00063556 0.05330235 0.016565  ]
Weights [0.19213959 1.18777431 0.34934421] | Weight Deltas [ 0.01638668 -0.15006746  0.0836583 ]
Epoch 5 | Prediction

Weights [0.15442966 1.53311782 0.15682513] | Weight Deltas [ 0.0002497  -0.00228671  0.00127477]
Epoch 333 | Prediction [0.10037928 0.99652658 0.10193633] | Error [1.43854406e-07 1.20646426e-05 3.74937979e-06]
Weights [0.15442227 1.53318555 0.15678737] | Weight Deltas [ 0.00024653 -0.00225772  0.00125862]
Epoch 334 | Prediction [0.10037447 0.99657061 0.10191179] | Error [1.40230808e-07 1.17607422e-05 3.65493537e-06]
Weights [0.15441497 1.53325242 0.15675009] | Weight Deltas [ 0.00024341 -0.00222911  0.00124266]
Epoch 335 | Prediction [0.10036973 0.99661407 0.10188756] | Error [1.36698485e-07 1.14644968e-05 3.56286994e-06]
Weights [0.15440776 1.53331845 0.15671328] | Weight Deltas [ 0.00024032 -0.00220085  0.00122691]
Epoch 336 | Prediction [0.10036504 0.99665699 0.10186363] | Error [1.33255140e-07 1.11757136e-05 3.47312358e-06]
Weights [0.15440064 1.53338364 0.15667694] | Weight Deltas [ 0.00023728 -0.00217296  0.00121136]
Epoch 337 | Prediction [0.10036041 0.99669936 0.10184001] | Err

Epoch 599 | Prediction [0.10001275 0.99988328 0.10006507] | Error [1.62448012e-10 1.36240333e-08 4.23399819e-09]
Weights [0.15386551 1.53828424 0.15394499] | Weight Deltas [ 8.28458117e-06 -7.58693223e-05  4.22949670e-05]
Epoch 600 | Prediction [0.10001258 0.99988476 0.10006424] | Error [1.58356053e-10 1.32808528e-08 4.12734655e-09]
Weights [0.15386527 1.53828649 0.15394374] | Weight Deltas [ 8.17957410e-06 -7.49076786e-05  4.17588783e-05]
Epoch 601 | Prediction [0.10001242 0.99988622 0.10006343] | Error [1.54367168e-10 1.29463168e-08 4.02338140e-09]
Weights [0.15386503 1.53828871 0.1539425 ] | Weight Deltas [ 8.07589800e-06 -7.39582238e-05  4.12295845e-05]
Epoch 602 | Prediction [0.10001227 0.99988766 0.10006263] | Error [1.50478760e-10 1.26202076e-08 3.92203506e-09]
Weights [0.15386479 1.5382909  0.15394128] | Weight Deltas [ 7.97353599e-06 -7.30208033e-05  4.07069996e-05]
Epoch 603 | Prediction [0.10001211 0.99988908 0.10006183] | Error [1.46688299e-10 1.23023129e-08 3.82324157e-09]

Epoch 891 | Prediction [0.10000031 0.99999718 0.10000157] | Error [9.45001719e-14 7.92544932e-12 2.46302526e-12]
Weights [0.15384662 1.53845726 0.15384854] | Weight Deltas [ 1.99815722e-07 -1.82989135e-06  1.02011184e-06]
Epoch 892 | Prediction [0.1000003  0.99999722 0.10000155] | Error [9.21197745e-14 7.72581245e-12 2.40098326e-12]
Weights [0.15384661 1.53845732 0.15384851] | Weight Deltas [ 1.97283057e-07 -1.80669747e-06  1.00718192e-06]
Epoch 893 | Prediction [0.1000003  0.99999726 0.10000153] | Error [8.97993378e-14 7.53120430e-12 2.34050407e-12]
Weights [0.15384661 1.53845737 0.15384848] | Weight Deltas [ 1.94782495e-07 -1.78379758e-06  9.94415894e-07]
Epoch 894 | Prediction [0.1000003  0.99999729 0.10000151] | Error [8.75373513e-14 7.34149820e-12 2.28154831e-12]
Weights [0.1538466  1.53845742 0.15384845] | Weight Deltas [ 1.92313627e-07 -1.76118795e-06  9.81811672e-07]
Epoch 895 | Prediction [0.10000029 0.99999732 0.10000149] | Error [8.53323429e-14 7.15657067e-12 2.22407760e-12]

### Gradient Descent: Multiple Inputs and Outputs

In [107]:
def neural_network(X, W):
    return X.dot(W.T)

def error_function(P, GT):
    return (P-GT)**2

# Calculating the weight derivative
def weight_derivative(P, GT, X):
    dE_dP = (P-GT)
    dP_dW = X
    dE_dW = dE_dP * dP_dW
    return dE_dW

def weight_delta(P, GT, X):
    return np.multiply(X,(P-GT))

def update_weights(alpha, weight_deltas, W):
    for weight in range(len(W)):
        W[weight] -= alpha * weight_deltas[weight]
    return W

def error_optimization(X, W, GT, epochs, alpha):
    errors = []
    weights = []
    for epoch in range(epochs):
        P = neural_network(X, W)
        E = error_function(P, GT)
        errors.append(E)
        weights.append(W.copy())
        WD = weight_delta(P, GT, X)
        W = update_weights(alpha, WD, W)
        print(f"Epoch {epoch} | Prediction {P} | Error {E}")
        print(f"Weights {W} | Weight Deltas {WD}")
    return errors,weights

In [108]:
W = np.array([[0.1, 0.1, -0.3],
            [0.1, 0.2, 0.0],
            [0.0, 1.3, 0.1]])

X = np.array([8.5,0.65,1.2])
W = np.array(W)
GT = np.array([0.1,1,0.1])
alpha = 0.01


errors, weights = error_optimization(X, W, GT, 1000, 0.01)



Epoch 0 | Prediction [0.555 0.98  0.965] | Error [2.07025e-01 4.00000e-04 7.48225e-01]
Weights [[ 6.13250e-02  6.13250e-02 -3.38675e-01]
 [ 1.00130e-01  2.00130e-01  1.30000e-04]
 [-1.03800e-02  1.28962e+00  8.96200e-02]] | Weight Deltas [ 3.8675 -0.013   1.038 ]
Epoch 1 | Prediction [0.15471375 0.9813455  0.857567  ] | Error [2.99359444e-03 3.47990370e-04 5.73907759e-01]
Weights [[ 5.66743313e-02  5.66743313e-02 -3.43325669e-01]
 [ 1.00251254e-01  2.00251254e-01  2.51254250e-04]
 [-1.94708040e-02  1.28052920e+00  8.05291960e-02]] | Weight Deltas [ 0.46506687 -0.01212542  0.9090804 ]
Epoch 2 | Prediction [0.10657933 0.98260048 0.76347718] | Error [4.32875627e-05 3.02743244e-04 4.40201967e-01]
Weights [[ 5.61150883e-02  5.61150883e-02 -3.43884912e-01]
 [ 1.00364351e-01  2.00364351e-01  3.64351120e-04]
 [-2.74325301e-02  1.27256747e+00  7.25674699e-02]] | Weight Deltas [ 0.05592429 -0.01130969  0.79617261]
Epoch 3 | Prediction [0.10079116 0.98377103 0.68107331] | Error [6.25940862e-07 2.

 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -1.09184017e-10  5.82867088e-16]
Epoch 268 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 2.45470576e-20 2.22637502e-31]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -1.01838754e-10  5.66213743e-16]
Epoch 269 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 2.13553388e-20 2.22637502e-31]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -9.49875290e-11  5.66213743e-16]
Epoch 270 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.85786204e-20 2.09733771e-31]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -8.85972185e-11  5.49560397e-16]
Epoch 271 | Prediction [0.1 1.  0.1] | Err

Epoch 534 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.97215226e-31 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -2.88657986e-16  1.66533454e-16]
Epoch 535 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.97215226e-31 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -2.88657986e-16  1.66533454e-16]
Epoch 536 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.97215226e-31 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -2.88657986e-16  1.66533454e-16]
Epoch 537 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.97215226e-31 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.1019

Epoch 782 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.23259516e-32 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -7.21644966e-17  1.66533454e-16]
Epoch 783 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.23259516e-32 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -7.21644966e-17  1.66533454e-16]
Epoch 784 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.23259516e-32 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.10193237  0.20193237  0.00193237]
 [-0.08357488  1.21642512  0.01642512]] | Weight Deltas [-2.35922393e-16 -7.21644966e-17  1.66533454e-16]
Epoch 785 | Prediction [0.1 1.  0.1] | Error [7.70371978e-34 1.23259516e-32 1.92592994e-32]
Weights [[ 0.05603865  0.05603865 -0.34396135]
 [ 0.1019