In [1]:
import tensorflow as tf
from tensorflow.compat import v1
import numpy as np

%matplotlib inline


TensorFlow: Static Graphs
-------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation uses basic TensorFlow operations to set up a computational
graph, then executes the graph many times to actually train the network.

One of the main differences between TensorFlow and PyTorch is that TensorFlow
uses static computational graphs while PyTorch uses dynamic computational
graphs.

In TensorFlow we first set up the computational graph, then execute the same
graph many times.



In [2]:
v1.disable_eager_execution()
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N = 64
D_in = 1000
H = 100
D_out = 10

x = v1.placeholder(tf.float32, shape=(None, D_in))
y = v1.placeholder(tf.float32, shape=(None, D_out))

w1 = tf.Variable(tf.random.normal((D_in, H)))
w2 = tf.Variable(tf.random.normal((H, D_out)))

h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)

loss = tf.reduce_sum(tf.square(y - y_pred))

grad_w1, grad_w2 = tf.gradients(loss, (w1, w2))

learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

with v1.Session() as sess:
    sess.run(v1.global_variables_initializer())

    x_value = np.random.randn(N, D_in)
    y_value = np.random.randn(N, D_out)
    for t in range(500):
        loss_value, _, _ = sess.run(
            [loss, new_w1, new_w2], feed_dict={x: x_value, y: y_value}
        )
        if t % 100 == 99:
            print(t, loss_value)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
99 405.4677
199 1.955998
299 0.026078273
399 0.0007728277
499 0.000104765175
