In [1]:
from datetime import datetime
import os
import tensorflow as tf
import numpy as np

In [2]:
logdir = os.path.join("script1", "fail1")
train_writer = tf.summary.create_file_writer(os.path.join(logdir, "train"))
test_writer = tf.summary.create_file_writer(os.path.join(logdir, "test"))

In [4]:

tf.keras.backend.clear_session()


# get the data
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()


def preprocess_images(images):
    return images.reshape(-1, 784).astype(np.float32) / 255


def preprocess_labels(labels):
    return labels.reshape(-1).astype(np.int32)


train_images = preprocess_images(train_images)
test_images = preprocess_images(test_images)
train_labels = preprocess_labels(train_labels)
test_labels = preprocess_labels(test_labels)

train_data = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(60000).batch(128).repeat()
#test_data = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(128)


# define the model first, from input to output

# this is a super deep model, cool!
n_units = 100
n_layers = 8
w_range = 0.4

# just set up a "chain" of hidden layers
# model is represented by a list where each element is a layer,
# and each layer is in turn a list of the layer variables (w, b)

# first layer goes from n_input to n_hidden
w_input = tf.Variable(tf.random.uniform([784, n_units], -w_range, w_range),
                      name="w0")
b_input = tf.Variable(tf.zeros(n_units), name="b0")
layers = [[w_input, b_input]]

# all other hidden layers go from n_hidden to n_hidden
for layer in range(n_layers - 1):
    w = tf.Variable(tf.random.uniform([n_units, n_units], -w_range, w_range),
                    name="w" + str(layer+1))
    b = tf.Variable(tf.zeros(n_units), name="b" + str(layer+1))
    layers.append([w, b])

# finally add the output layer
w_out = tf.Variable(tf.random.uniform([n_units, 10], -w_range, w_range),
                    name="wout")
b_out = tf.Variable(tf.zeros(10), name="bout")

#fix1
layers.append([w_out, b_out])

# flatten the layers to get a list of variables
all_variables = [variable for layer in layers for variable in layer]


def model_forward(inputs):
    x = inputs
    for w, b in layers[:-1]:
        x = tf.nn.relu(tf.matmul(x, w) + b)
    #fix2
    logits = tf.matmul(x, layers[-1][0]) + layers[-1][1]

    return logits


lr = 0.1
train_steps = 2000
for step, (img_batch, lbl_batch) in enumerate(train_data):
    if step > train_steps:
        break

    with tf.GradientTape() as tape:
        # here we just run all the layers in sequence via a for-loop
        logits = model_forward(img_batch)
        xent = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=lbl_batch))

    grads = tape.gradient(xent, all_variables)
    for grad, var in zip(grads, all_variables):
        var.assign_sub(lr*grad)

    if not step % 100:
        preds = tf.argmax(logits, axis=1, output_type=tf.int32)
        acc = tf.reduce_mean(tf.cast(tf.equal(preds, lbl_batch), tf.float32))
        print("Loss: {} Accuracy: {}".format(xent, acc))
    
#         with train_writer.as_default():
#             tf.summary.scalar("accuracy", acc, step=step)
#             tf.summary.scalar("loss", xent, step=step)
#             tf.summary.scalar("gradients", xent, step=step)
    #Check the gradients
    #Visualisation cannot happen with tensorboard, since it is giving nan values
    print("Loss")
    print(xent)
    print("Gradients Start")
#     print(tf.norm(grads))
    for grad, var in zip(grads, all_variables):
        print(tf.norm(grad))
    print("Gradients End")
    
    if step >= 1:
        break


test_preds = model_forward(test_images)
test_preds = tf.argmax(test_preds, axis=1, output_type=tf.int32)
acc = tf.reduce_mean(tf.cast(tf.equal(test_preds, test_labels), tf.float32))
print("Final test accuracy: {}".format(acc))


Loss: 141.92880249023438 Accuracy: 0.109375
Loss
tf.Tensor(141.9288, shape=(), dtype=float32)
Gradients Start
tf.Tensor(141.42628, shape=(), dtype=float32)
tf.Tensor(20.889233, shape=(), dtype=float32)
tf.Tensor(179.99341, shape=(), dtype=float32)
tf.Tensor(13.409813, shape=(), dtype=float32)
tf.Tensor(212.08914, shape=(), dtype=float32)
tf.Tensor(9.176125, shape=(), dtype=float32)
tf.Tensor(236.01181, shape=(), dtype=float32)
tf.Tensor(5.355973, shape=(), dtype=float32)
tf.Tensor(241.79071, shape=(), dtype=float32)
tf.Tensor(3.3743954, shape=(), dtype=float32)
tf.Tensor(264.14432, shape=(), dtype=float32)
tf.Tensor(2.0200324, shape=(), dtype=float32)
tf.Tensor(277.11722, shape=(), dtype=float32)
tf.Tensor(1.3508981, shape=(), dtype=float32)
tf.Tensor(323.43802, shape=(), dtype=float32)
tf.Tensor(1.0104355, shape=(), dtype=float32)
tf.Tensor(300.51187, shape=(), dtype=float32)
tf.Tensor(0.62285787, shape=(), dtype=float32)
Gradients End
Loss
tf.Tensor(59642696.0, shape=(), dtype=float3