# Eager Execution
https://www.tensorflow.org/get_started/eager

In [1]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

print("TensorFlow version: {}".format(tf.VERSION))
print("Eager execution: {}".format(tf.executing_eagerly()))

  from ._conv import register_converters as _register_converters


TensorFlow version: 1.8.0
Eager execution: True


In [2]:
# create model
class MNISTModel(tf.keras.Model):
  def __init__(self):
    super(MNISTModel, self).__init__()
    self.dense1 = tf.keras.layers.Dense(units=10)
    self.dense2 = tf.keras.layers.Dense(units=10)

  def call(self, input):
    """Run the model."""
    result = self.dense1(input)
    result = self.dense2(result)
    result = self.dense2(result)  # reuse variables from dense2 layer
    return result

model = MNISTModel()

In [6]:
#download data
import dataset  # download dataset.py file
dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32)


In [7]:
def loss(model, x, y):
  prediction = model(x)
  return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction)

def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return tape.gradient(loss_value, model.variables)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

x, y = iter(dataset_train).next()
print("Initial loss: {:.3f}".format(loss(model, x, y)))

# Training loop
for (i, (x, y)) in enumerate(dataset_train):
  # Calculate derivatives of the input function with respect to its parameters.
  grads = grad(model, x, y)
  # Apply the gradient to the model
  optimizer.apply_gradients(zip(grads, model.variables),
                            global_step=tf.train.get_or_create_global_step())
  if i % 200 == 0:
    print("Loss at step {:04d}: {:.3f}".format(i, loss(model, x, y)))

print("Final loss: {:.3f}".format(loss(model, x, y)))

Initial loss: 2.611
Loss at step 0000: 2.668
Loss at step 0200: 2.145
Loss at step 0400: 2.047
Loss at step 0600: 1.950
Loss at step 0800: 1.909
Loss at step 1000: 1.511
Loss at step 1200: 1.546
Loss at step 1400: 1.536
Loss at step 1600: 1.585
Loss at step 1800: 1.469
Loss at step 2000: 1.266
Loss at step 2200: 1.170
Loss at step 2400: 1.338
Loss at step 2600: 1.114
Loss at step 2800: 1.282
Loss at step 3000: 1.143
Loss at step 3200: 0.817
Loss at step 3400: 0.808
Loss at step 3600: 0.708
Loss at step 3800: 1.055
Loss at step 4000: 0.870
Loss at step 4200: 0.810
Loss at step 4400: 0.769
Loss at step 4600: 0.863
Loss at step 4800: 0.701
Loss at step 5000: 0.787
Loss at step 5200: 0.749
Loss at step 5400: 0.668
Loss at step 5600: 0.867
Loss at step 5800: 0.723
Loss at step 6000: 0.536
Loss at step 6200: 0.528
Loss at step 6400: 0.535
Loss at step 6600: 0.684
Loss at step 6800: 0.756
Loss at step 7000: 0.629
Loss at step 7200: 0.488
Loss at step 7400: 0.926
Final loss: 0.670


## Linear regression with tf.GradeintTape function

In [4]:
# A toy dataset of points around 3 * x + 2
NUM_EXAMPLES = 1000
training_inputs = tf.random_normal([NUM_EXAMPLES])
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise

def prediction(input, weight, bias):
  return input * weight + bias

# A loss function using mean-squared error
def loss(weights, biases):
  error = prediction(training_inputs, weights, biases) - training_outputs
  return tf.reduce_mean(tf.square(error))

# Return the derivative of loss with respect to weight and bias
def grad(weights, biases):
  with tf.GradientTape() as tape:
    loss_value = loss(weights, biases)
  return tape.gradient(loss_value, [weights, biases])

train_steps = 200
learning_rate = 0.01
# Start with arbitrary values for W and B on the same batch of data
W = tfe.Variable(5.)
B = tfe.Variable(10.)

print("Initial loss: {:.3f}".format(loss(W, B)))

for i in range(train_steps):
  dW, dB = grad(W, B)
  W.assign_sub(dW * learning_rate)
  B.assign_sub(dB * learning_rate)
  if i % 20 == 0:
    print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))

print("Final loss: {:.3f}".format(loss(W, B)))
print("W = {}, B = {}".format(W.numpy(), B.numpy()))

Initial loss: 68.071
Loss at step 000: 65.429
Loss at step 020: 29.849
Loss at step 040: 13.919
Loss at step 060: 6.787
Loss at step 080: 3.592
Loss at step 100: 2.161
Loss at step 120: 1.520
Loss at step 140: 1.233
Loss at step 160: 1.104
Loss at step 180: 1.047
Final loss: 1.022
W = 2.9865317344665527, B = 2.1900746822357178


In [8]:
class Model(tf.keras.Model):
  def __init__(self):
    super(Model, self).__init__()
    self.W = tfe.Variable(5., name='weight')
    self.B = tfe.Variable(10., name='bias')
  def predict(self, inputs):
    return inputs * self.W + self.B

# A toy dataset of points around 3 * x + 2
NUM_EXAMPLES = 2000
training_inputs = tf.random_normal([NUM_EXAMPLES])
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise

# The loss function to be optimized
def loss(model, inputs, targets):
  error = model.predict(inputs) - targets
  return tf.reduce_mean(tf.square(error))

def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return tape.gradient(loss_value, [model.W, model.B])

# Define:
# 1. A model.
# 2. Derivatives of a loss function with respect to model parameters.
# 3. A strategy for updating the variables based on the derivatives.
model = Model()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))

# Training loop
for i in range(300):
  grads = grad(model, training_inputs, training_outputs)
  optimizer.apply_gradients(zip(grads, [model.W, model.B]),
                            global_step=tf.train.get_or_create_global_step())
  if i % 20 == 0:
    print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs)))

print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy()))

Initial loss: 69.625
Loss at step 000: 66.865
Loss at step 020: 29.978
Loss at step 040: 13.743
Loss at step 060: 6.595
Loss at step 080: 3.447
Loss at step 100: 2.060
Loss at step 120: 1.448
Loss at step 140: 1.179
Loss at step 160: 1.060
Loss at step 180: 1.007
Loss at step 200: 0.984
Loss at step 220: 0.974
Loss at step 240: 0.969
Loss at step 260: 0.967
Loss at step 280: 0.966
Final loss: 0.966
W = 2.971496105194092, B = 2.0467147827148438


In [9]:
tfe.num_gpus()

0