In [8]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
import sys
sys.path.append('.venv/lib/python3.7/site-packages/')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
import tensorflow as tf
import keras

### Gradient

In [21]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

# a = tf.Variable(a) # Making "a" a variable 

with tf.GradientTape() as tape:
    tape.watch(a)  # Start recording the history of operations applied to `a`
                   # If a was a variable this would not be needed as it would be watched by default
    c = tf.sqrt(tf.square(a) + tf.square(b))  # Do some math using `a`
    # What's the gradient of `c` with respect to `a`?
    dc_da = tape.gradient(c, a)
    print('Numerical Gradient')
    print(dc_da)

print('\nAnalytical Gradient')    
print(a / tf.sqrt(tf.square(a) + tf.square(b))) 

Numerical Gradient
tf.Tensor(
[[ 0.79789436  0.7079957 ]
 [ 0.30566776 -0.71412086]], shape=(2, 2), dtype=float32)

Analytical Gradient
tf.Tensor(
[[ 0.79789436  0.7079957 ]
 [ 0.30566776 -0.7141209 ]], shape=(2, 2), dtype=float32)


### Layer

In [30]:
class Linear(keras.layers.Layer):
    """y = w.x + b"""

    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [47]:
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype("float32") / 255, y_train)
)
dataset = dataset.shuffle(buffer_size=1024).batch(64)

# Instantiate our linear layer (defined above) with 10 units.
linear_layer = Linear(10)

# Instantiate a logistic loss function that expects integer targets.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Instantiate an optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

# Instantiate a metric object
accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

# Iterate over the batches of the dataset.
for epoch in range(2):
    for step, (x, y) in enumerate(dataset):

        # Open a GradientTape.
        with tf.GradientTape() as tape:

            # Forward pass.
            logits = linear_layer(x)

            # Loss value for this batch.
            loss = loss_fn(y, logits)

            # Update the state of the `accuracy` metric.
            accuracy.update_state(y, logits)

            # Get gradients of the loss wrt the weights.
            gradients = tape.gradient(loss, linear_layer.trainable_weights)

            # Update the weights of our linear layer.
            optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights))

            # Logging.
            if step % 200 == 0:
                print("Epoch:", epoch, "Step:", step)
                print("Total running accuracy so far: %.3f" % accuracy.result())

Epoch: 0 Step: 0
Total running accuracy so far: 0.125
Epoch: 0 Step: 200
Total running accuracy so far: 0.769
Epoch: 0 Step: 400
Total running accuracy so far: 0.821
Epoch: 0 Step: 600
Total running accuracy so far: 0.843
Epoch: 0 Step: 800
Total running accuracy so far: 0.855
Epoch: 1 Step: 0
Total running accuracy so far: 0.863
Epoch: 1 Step: 200
Total running accuracy so far: 0.871
Epoch: 1 Step: 400
Total running accuracy so far: 0.876
Epoch: 1 Step: 600
Total running accuracy so far: 0.880
Epoch: 1 Step: 800
Total running accuracy so far: 0.884
