In [75]:
%reload_ext autoreload
import colabexts
from colabexts.jcommon import *

jpath=os.path.dirname(colabexts.__file__)
jcom = f'{jpath}/jcommon.ipynb'
%run $jcom

# Derivative of a function

In [76]:
# Your own way of computing the derivative of a function

def fn(x):
    return x * x;

e  = 0.001
x  = 2
f1 = fn(x-e)
f2 = fn(x+e)
f2, f1, f2-f1, (f2-f1)/(2*e)

(4.004001, 3.9960010000000006, 0.007999999999999119, 3.9999999999995595)

# Auto Differentition and Gradients

In [77]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

tf.__version__, keras.__version__

('2.8.0', '2.8.0')

# Gradient Computations

To differentiate automatically, tf needs to remember what operations happen in what order during the forward pass. Then, during the backward pass, tf traverses this list of operations in reverse order to compute gradients.

In [78]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x**2

dy_dx = tape.gradient(y, x)
print( f"derivative of 'x' w.r.t. 'y' @ 3 is {dy_dx}")


derivative of 'x' w.r.t. 'y' @ 3 is 6.0


GradientTape works on any length of computation and on any tensor

In [102]:
tf.random.set_seed(1)
#w = tf.Variable(tf.random.normal((3, 2)), name='w')
w = tf.Variable([[.1, .2, .3], [.1,0.,.2]])
w = tf.Variable(w, name='w')
b = tf.Variable(tf.zeros((1,2) , dtype=tf.float32), name='b')
x = [[1.], [2.], [3.]]

with tf.GradientTape(persistent=False) as tape:
    y = w @ x  + b
    loss = tf.reduce_mean(y**2)
    print( loss)
[dl_dw, dl_db] = tape.gradient(loss, [w, b])
  
print(f'''
Loss: {loss} 
Y: 
{y}
Shape of w  {w.shape}  ; b.shape = {b.shape} 
Shape of w' {dl_dw.shape}
W-tanspose: 
==========
{w.numpy()}

Derivative:
==========
{dl_dw.numpy()}
{dl_db.numpy()}
''' )

# persistent is se to False - therefoire, calling gradient gives error

try:
    [dl_dw, dl_db] = tape.gradient(loss, [w, b])
except Exception as e:
    print(f"ERROR: {e}")

#if you make persistent as True make sure to delete te tape to reduce memory usage

tf.Tensor(1.2250001, shape=(), dtype=float32)

Loss: 1.2250001430511475 
Y: 
[[1.4000001  1.4000001 ]
 [0.70000005 0.70000005]]
Shape of w  (2, 3)  ; b.shape = (1, 2) 
Shape of w' (2, 3)
W-tanspose: 
[[0.1 0.2 0.3]
 [0.1 0.  0.2]]

Derivative:
[[1.4000001  2.8000002  4.2000003 ]
 [0.70000005 1.4000001  2.1000001 ]]
[[1.0500001 1.0500001]]

ERROR: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)


# More on Gradients

## Keras Model Dense and Counting # of parameters

Notice here, input size is 3; and,
first layer has 2 nodes
similarly, second layer has 2 nodes

The weight matrix must be 2x3 + 2 for the bias = 8 parameters
For the 2nd layer, The weight matrix must be 2x2 + 2 for the bias = 6 parameters


In [157]:
# A simple MLP
model = tf.keras.Sequential()
model.add(Dense(2, activation='relu', input_shape=[3] ) )
model.add(Dense(2, activation='relu' ) )
model.summary()
print(model.layers[0].get_weights())
print(model.layers[1].get_weights())


Model: "sequential_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_49 (Dense)            (None, 2)                 8         
                                                                 
 dense_50 (Dense)            (None, 2)                 6         
                                                                 
Total params: 14
Trainable params: 14
Non-trainable params: 0
_________________________________________________________________
[array([[-0.43570435,  0.7112912 ],
       [ 0.68243146, -0.49273908],
       [-0.21341771,  0.23050165]], dtype=float32), array([0., 0.], dtype=float32)]
[array([[0.7621423 , 0.6043643 ],
       [0.8276237 , 0.96114886]], dtype=float32), array([0., 0.], dtype=float32)]


# Simple Network with one node

### Forward pass and Backard pass

In [218]:
#%%writefile /tmp/t.py

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

tf.__version__, keras.__version__

# A Simple Network with One Node weights

model = tf.keras.Sequential()
model.add(Dense(1, activation='relu', input_dim=1, name="w1"))
lr =0.1
opt=tf.keras.optimizers.SGD(learning_rate=lr, momentum=0, nesterov=False)
model.compile(optimizer=opt, loss='mse')

# ~~~ Set custom weights
wts, bias =( np.array([[0.5]]), np.array([0]) )
model.layers[0].set_weights((wts, bias) )

# ~~~ Print the weights
wts11=model.layers[0].get_weights()
print('Layer Weights:', np.array(wts11).shape, wts11[0], wts11[1] )

data=np.array([[1]]).reshape(1,-1)
y = np.array([10])
yhat = model.predict(data, verbose=0)
print("Yhat: " , yhat, "must be equal to = 0.5")

print ("Lets do backward pass - see notes")
f = model.fit(data, y, epochs=1, verbose=1)

print("Loss: is: " , f.history['loss'][-1], " must be equal to = (10-0.5)^2 = 90.25")

# ~~~ Print the weights - see lecture on youtube
wts11=model.layers[0].get_weights()
print('Layer Weights:', np.array(wts11).shape, wts11[0], wts11[1] )

'''#--- Do one pass through NN

print('Call Predict on untrained model'  )

f = model.fit(data, y, epochs=1000, verbose=0)
for i in range(10):
    wts11= model.layers[0].get_weights()
    f = model.fit(data, y, epochs=1000, verbose=0)
    wts12= model.layers[0].get_weights()
    yhat = model.predict(data, verbose=0)
    print(f\' ''Fit {i}: {data} y: {y}  y^: {yhat}; Loss: {f.history['loss'][-1]}
weights:   {wts11[0][0], wts11[1][0] }; 
after fit: {wts12[0][0], wts12[1][0] }\n\n' '' 
         
         )
    if ( isclose(wts11[0][0], wts12[0][0], abs_tol=1e-9)):
        print(f"*** CHANGING LR: {model.optimizer.learning_rate.numpy()}")
        K.set_value(model.optimizer.learning_rate, model.optimizer.learning_rate*4)
    
print(f"#--- Weights after {i} pass");
wts1=model.layers[0].get_weights()
op=model.layers[0].output

yhat = model.predict(data, verbose=0)
wts11= np.array(model.layers[0].get_weights())
print('Weights:' , wts11.shape, wts11,  "Output: ", yhat )
'''
;

Layer Weights: (2, 1) [[0.5]] [0.]
Yhat:  [[0.5]] must be equal to = 0.5
Lets do backward pass - see notes
Loss: is:  90.25  must be equal to = (10-0.5)^2 = 90.25
Layer Weights: (2, 1) [[2.4]] [1.9]


''

In [229]:
loss = (10-0.5)**2
dl_dw= np.sqrt(loss) * -data.flatten() * lr
w1 = wts - dl_dw
w1, wts, dl_dw

(array([[1.45]]), array([[0.5]]), array([-0.95]))

In [220]:
def f1(x, y= 10, b =0):
    return (y - (0.5 * x +b)) **2

e = 1e-9
x = 1
(f1(x+e) - f1(x-e))/(2*e)


-9.499999009676685

In [225]:
-2 * np.sqrt(f1(x)) *lr 

-1.9000000000000001

In [238]:
2 * np.sqrt(loss) * -data.flatten() * lr

array([-1.9])

## Little more extensions

In [210]:
# A Simple Network with One Node weights
from keras import backend as K
from math import isclose

model = tf.keras.Sequential()
model.add(Dense(1, activation='relu', input_dim=1))
opt=tf.keras.optimizers.SGD(learning_rate=1, momentum=0.0, nesterov=False)
#model.compile(optimizer=opt, loss='binary_crossentropy')
model.compile(optimizer=opt, loss='mse')

# ~~~ Set custom weights
wts1=( np.array([[0.5]]),  np.array([0]) )
print('Weights(1):' , wts1[0].shape, wts1[0] )
model.layers[0].set_weights(wts1)
wts11=model.layers[0].get_weights()
print('Layer Weights:' , wts11[0].shape, wts11[1].shape, wts11[0], wts11[1] )

#--- Do one pass through NN
data=np.array([[1]]).reshape(1,-1)
y = np.array([10])

print('Call Predict on untrained model'  )
yhat = model.predict(data, verbose=0)
print("Before Fit: " , yhat)

f = model.fit(data, y, epochs=1000, verbose=0)
for i in range(10):
    wts11= model.layers[0].get_weights()
    f = model.fit(data, y, epochs=1000, verbose=0)
    wts12= model.layers[0].get_weights()
    yhat = model.predict(data, verbose=0)
    print(f'''Fit {i}: {data} y: {y}  y^: {yhat}; Loss: {f.history['loss'][-1]}
weights:   {wts11[0][0], wts11[1][0] }; 
after fit: {wts12[0][0], wts12[1][0] }\n\n''' 
         
         )
    if ( isclose(wts11[0][0], wts12[0][0], abs_tol=1e-9)):
        print(f"*** CHANGING LR: {model.optimizer.learning_rate.numpy()}")
        K.set_value(model.optimizer.learning_rate, model.optimizer.learning_rate*4)
    
print(f"#--- Weights after {i} pass");
wts1=model.layers[0].get_weights()
op=model.layers[0].output

yhat = model.predict(data, verbose=0)
wts11= np.array(model.layers[0].get_weights())
print('Weights:' , wts11.shape, wts11,  "Output: ", yhat )



Weights(1): (1, 1) [[0.5]]
Layer Weights: (1, 1) (1,) [[0.5]] [0.]
Call Predict on untrained model
Before Fit:  [[0.5]]
Fit 0: [[1]] y: [10]  y^: [[0.]]; Loss: 100.0
weights:   (array([-37.5], dtype=float32), -38.0); 
after fit: (array([-37.5], dtype=float32), -38.0)


*** CHANGING LR: 1
Fit 1: [[1]] y: [10]  y^: [[0.]]; Loss: 100.0
weights:   (array([-37.5], dtype=float32), -38.0); 
after fit: (array([-37.5], dtype=float32), -38.0)


*** CHANGING LR: 4
Fit 2: [[1]] y: [10]  y^: [[0.]]; Loss: 100.0
weights:   (array([-37.5], dtype=float32), -38.0); 
after fit: (array([-37.5], dtype=float32), -38.0)


*** CHANGING LR: 16
Fit 3: [[1]] y: [10]  y^: [[0.]]; Loss: 100.0
weights:   (array([-37.5], dtype=float32), -38.0); 
after fit: (array([-37.5], dtype=float32), -38.0)


*** CHANGING LR: 64
Fit 4: [[1]] y: [10]  y^: [[0.]]; Loss: 100.0
weights:   (array([-37.5], dtype=float32), -38.0); 
after fit: (array([-37.5], dtype=float32), -38.0)


*** CHANGING LR: 256
Fit 5: [[1]] y: [10]  y^: [[0.]

# Simple NW with predefined weights and one pass

In [156]:
# A Simple Network with predefined weights

model = Sequential()
model.add(Dense(2, activation='relu', input_dim=2))
model.add(Dense(1))
opt=tf.keras.optimizers.SGD(learning_rate=0.5, momentum=0.0, nesterov=False)
#model.compile(optimizer=opt, loss='binary_crossentropy')
model.compile(optimizer='sgd', loss='mse')

# ~~~ Set custom weights
wts0=( np.array([[0.5, 0.1], [0.1, 0.5]]),  np.array([0,0]) )
print('Weights(0):' , wts0[0].shape, wts0[1].shape, "\n", wts0[0], wts0[1] )

model.layers[0].set_weights(wts0)
wts01=model.layers[0].get_weights()
print('Weights:' , wts01[0].shape, wts01[1].shape, wts01[0], wts01[1] )

wts1=( np.array([[0.5], [0.5]]),  np.array([0]) )
print('Weights(1):' , wts1[0].shape, wts1[1].shape, "\n", wts1[0], wts1[1] )
model.layers[1].set_weights(wts1)
wts11=model.layers[1].get_weights()
print('Weights:' , wts11[0].shape, wts11[1].shape, wts11[0], wts11[1] )

#--- Do one pass through NN
data=np.array([[1,1]])
y =np.array([10])
f=model.fit(data, y, epochs=1, verbose=1)

print("#--- Weights after one pass");
wts1=model.layers[0].get_weights()
print('Weights:' , wts1[0].shape, wts1[1].shape, wts1[0], wts1[1] )
wts11=model.layers[1].get_weights()
print('Weights:' , wts11[0].shape, wts11[1].shape, wts11[0], wts11[1] )


Weights(0): (2, 2) (2,) 
 [[0.5 0.1]
 [0.1 0.5]] [0 0]
Weights: (2, 2) (2,) [[0.5 0.1]
 [0.1 0.5]] [0. 0.]
Weights(1): (2, 1) (1,) 
 [[0.5]
 [0.5]] [0]
Weights: (2, 1) (1,) [[0.5]
 [0.5]] [0.]
#--- Weights after one pass
Weights: (2, 2) (2,) [[0.594 0.194]
 [0.194 0.594]] [0.094 0.094]
Weights: (2, 1) (1,) [[0.6128]
 [0.6128]] [0.188]
