# GD WITH MULTIPLE INPUTS AND OUTPUTS

## GD with multiple inputs

In [9]:
import numpy as np

def neural_network(input, weights):
    pred = np.sum(list(input * weights))  # vector * vector
    return pred

toes = np.array([8.5, 9.5, 9.9, 9.0])
wlrec = np.array([0.65, 0.8, 0.8, 0.9])
nfans = np.array([1.2, 1.3, 0.5, 1.0])

win_or_lose_binary = [1, 1, 0, 1]
target_pred = win_or_lose_binary[0]

alpha = 0.01
weights = np.array([0.1, 0.2, -.1])
input = np.array([toes[0], wlrec[0], nfans[0]])

for iteration in range(3):
    pred = neural_network(input, weights)
    
    error = (pred - target_pred) ** 2
    delta = pred - target_pred
    weight_deltas = delta * input
    
    print('----')
    print(f'Iteration {iteration+1}')
    print(f'Pred {pred:.4f} Error {error:.4f}')
    print(f'Delta {delta:.4f}')
    print(f'Weights {weights}')
    print(f'Weight deltas {weight_deltas}')
    
    weights -= alpha * weight_deltas

----
Iteration 1
Pred 0.8600 Error 0.0196
Delta -0.1400
Weights [ 0.1  0.2 -0.1]
Weight deltas [-1.19  -0.091 -0.168]
----
Iteration 2
Pred 0.9638 Error 0.0013
Delta -0.0362
Weights [ 0.1119   0.20091 -0.09832]
Weight deltas [-0.30806125 -0.02355763 -0.043491  ]
----
Iteration 3
Pred 0.9906 Error 0.0001
Delta -0.0094
Weights [ 0.11498061  0.20114558 -0.09788509]
Weight deltas [-0.07974936 -0.00609848 -0.01125873]


## Freezing one weight

* Frozen weight still find the bottom of the bowl
* Curves are a measure of each individual weight relative to the global error
* Thus, because `error` is shared, when one weight finds the bottom of the bowl, all the weights do too.
* But the NN learns without incorporating frozen weight into its predictions
* And instead of the point moving on the curve, the curve itself moves   
(means that the data point can move horizontally only if the weight is updated)
* Weights are 100% determined by input data

In [17]:
import numpy as np

def neural_network(input, weights):
    pred = weights.dot(input)
    return pred

toes = np.array([8.5, 9.5, 9.9, 9.0])
wlrec = np.array([0.65, 0.8, 0.8, 0.9])
nfans = np.array([1.2, 1.3, 0.5, 1.0])

win_or_lose_binary = [1, 1, 0, 1]
target_pred = win_or_lose_binary[0]

alpha = 0.3
weights = np.array([0.1, 0.2, -.1])
input = np.array([toes[0], wlrec[0], nfans[0]])

for iteration in range(3):
    pred = neural_network(input, weights)
    
    error = (pred - target_pred) ** 2
    delta = pred - target_pred
    weight_deltas = delta * input
    weight_deltas[0] = 0              # FREEZE THIS WEIGHT
    
    print('----')
    print(f'Iteration {iteration+1}')
    print(f'Pred {pred:.4f} Error {error:.4f}')
    print(f'Delta {delta:.4f}')
    print(f'Weights {weights}')
    print(f'Weight deltas {weight_deltas}')
    
    weights -= alpha * weight_deltas

----
Iteration 1
Pred 0.8600 Error 0.0196
Delta -0.1400
Weights [ 0.1  0.2 -0.1]
Weight deltas [ 0.    -0.091 -0.168]
----
Iteration 2
Pred 0.9382 Error 0.0038
Delta -0.0618
Weights [ 0.1     0.2273 -0.0496]
Weight deltas [ 0.         -0.04015375 -0.07413   ]
----
Iteration 3
Pred 0.9727 Error 0.0007
Delta -0.0273
Weights [ 0.1         0.23934612 -0.027361  ]
Weight deltas [ 0.         -0.01771784 -0.03270986]


## GD with multiple outputs

Same mechanism called **_Stochastic Gradient Descent_** is used to perform learning accross varieties of architectures

In [16]:
weights = [0.3, 0.2, 0.9]

def neural_network(input, weights):
    pred = [(input*w) for w in weights]
    return pred

# labels
wlrec = np.array([0.65, 1.0, 1.0, 0.9])

# input data
hurt = np.array([0.1, 0.0, 0.0, 0.1])
win  = np.array([  1,   1,   0,   1])
sad  = np.array([0.1, 0.0, 0.1, 0.2])

# an instance and its label
input = wlrec[0]
target_pred = np.array([hurt[0], win[0], sad[0]])

pred = neural_network(input, weights)

error = [0, 0, 0]
delta = [0, 0, 0]

error = (pred - target_pred) ** 2
delta = pred - target_pred 
weight_deltas = [(input*w) for w in weights]

alpha = 0.1
weights -= np.array([(alpha*wd) for wd in weight_deltas])

print('----')
print(f'Weight deltas {weight_deltas}')
print(f'Weights {weights}')

----
Weight deltas [0.195, 0.13, 0.5850000000000001]
Weights [0.2805 0.187  0.8415]


## GD with multiple inputs and outputs

In [46]:
import numpy as np

def neural_network(input, weights):
    preds = [(row.dot(input)) for row in weights]
    return preds

                     #toes - %win - #fans
weights = np.array([ [0.1, 0.1, -0.3],    # hurt ?
                     [0.1, 0.2,  0.0],    # win?
                     [0.0, 1.3,  0.1] ])  # sad?

# training data
toes  = np.array([8.5,  9.5, 9.9, 9.0])
wlrec = np.array([0.65, 0.8, 0.8, 0.9])
nfans = np.array([1.2,  1.3, 0.5, 1.0])

# training labels
hurt = np.array([0.1, 0.0, 0.0, 0.1])
win  = np.array([  1,   1,   0,   1])
sad  = np.array([0.1, 0.0, 0.1, 0.2])

alpha = 0.01

# test input
input = np.array([toes[0], wlrec[0], nfans[0]])
target_preds = np.array([hurt[0], win[0], sad[0]])

errors = np.array([0, 0, 0])
deltas = np.array([0, 0, 0])

# learning
preds = neural_network(input, weights)
errors = (preds - target_preds) ** 2
deltas = preds - target_preds
weight_deltas = deltas * input
weights -= np.array([(alpha * wd) for wd in weight_deltas])


print('----')
print(f"Errors        {[f'{e:.3f}' for e in errors]}")
print(f"Predictions   {[f'{p:.3f}' for p in preds]}")
print(f"Deltas        {[f'{d:.3f}' for d in deltas]}")
print(f"Weight deltas {[f'{wd:.3f}' for wd in weight_deltas]}")
print(f"Weights\n{weights}")

----
Errors        ['0.207', '0.000', '0.748']
Predictions   ['0.555', '0.980', '0.965']
Deltas        ['0.455', '-0.020', '0.865']
Weight deltas ['3.868', '-0.013', '1.038']
Weights
[[ 0.061325  0.10013  -0.31038 ]
 [ 0.061325  0.20013  -0.01038 ]
 [-0.038675  1.30013   0.08962 ]]


## Visualizing weights

* If a **weight is high**, it means that the model believes there is a high degree of **_correlation_** between the corresponding node and the target prediction.   
The node can be one pixel in an image, or one linguistic feature in a sentence.
* If we **print the weights** into an image of same dimensions as input images, we can see which pixels have the highest correlation with a particular output node   
That's because if the `weights` vector is similar to the `input` vector for a given label, then dot product will show a close similarity
