In [2]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import numpy as np
tfe = tf.contrib.eager

In [6]:
tf.enable_eager_execution()
tf.executing_eagerly()

1

# Basic Usage

In [4]:
x = [[2.0]]
square = tf.matmul(x, x)
print("square: {}".format(square))

square: [[4.]]


In [5]:
a = tf.constant([[1, 2], [3, 4]])
b = a + 1
print(a)
print(b)
c = np.multiply(a, b)
print(c)

tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[2 3]
 [4 5]], shape=(2, 2), dtype=int32)
[[ 2  6]
 [12 20]]


In [6]:
def fizzbuzz(max_num):
    counter = tf.constant(0)
    for num in range(max_num):
        num = tf.constant(num)
        if int(num % 3) == 0 and int(num % 5) == 0:
            print('FizzBuzz')
        elif int(num % 3) == 0:
            print('Fizz')
        elif int(num % 5) == 0:
            print('Buzz')
        else:
            print(num)
        counter += 1
    return counter
counter = fizzbuzz(5)
print(counter)

FizzBuzz
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
Fizz
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)


# build a model

In [7]:
class MySimpleLayer(tf.keras.layers.Layer):
    def __init__(self, output_units):
        self.output_units = output_units
    
    def build(self):
        self.kernel = self.add_variable(
      "kernel", [input.shape[-1], self.output_units])
    
    def call(self, input):
        return tf.matmul(input, self.kernel)

In [8]:
class MNISTModel(tf.keras.Model):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(units = 10)
        self.dense2 = tf.keras.layers.Dense(units = 10)
        
    def call(self, input):
        result = self.dense1(input)
        result = self.dense2(result)
        result = self.dense2(result)
        return result

# Training a model

In [9]:
w = tfe.Variable([[1.0]])
with tf.GradientTape() as tape:
    loss = w * w
grad = tape.gradient(loss, [w])
print(grad)

[<tf.Tensor: id=72, shape=(1, 1), dtype=float32, numpy=array([[2.]], dtype=float32)>]


In [10]:
# A toy dataset of points around 3 * x + 2
NUM_EXAMPLES = 1000
training_inputs = tf.random_normal([NUM_EXAMPLES])
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise

def prediction(input, weight, bias):
    return input * weight + bias

# A loss function using mean-squared error
def loss(weights, biases):
    error = prediction(training_inputs, weights, biases) - training_outputs
    return tf.reduce_mean(tf.square(error))

# Return the derivative of loss with respect to weight and bias
def grad(weights, biases):
    with tf.GradientTape() as tape:
        loss_value = loss(weights, biases)
    return tape.gradient(loss_value, [weights, biases])

train_steps = 200
learning_rate = 0.01
# Start with arbitrary values for W and B on the same batch of data
W = tfe.Variable(5.)
B = tfe.Variable(10.)

print("Initial loss: {:.3f}".format(loss(W, B)))

for i in range(train_steps):
    dW, dB = grad(W, B)
    W.assign_sub(dW * learning_rate)
    B.assign_sub(dB * learning_rate)
    if i % 20 == 0:
        print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))

print("Final loss: {:.3f}".format(loss(W, B)))
print("W = {}, B = {}".format(W.numpy(), B.numpy()))

Initial loss: 70.052
Loss at step 000: 67.285
Loss at step 020: 30.252
Loss at step 040: 13.902
Loss at step 060: 6.681
Loss at step 080: 3.491
Loss at step 100: 2.082
Loss at step 120: 1.459
Loss at step 140: 1.183
Loss at step 160: 1.061
Loss at step 180: 1.007
Final loss: 0.984
W = 3.06354093552, B = 2.1071972847


# for MNIST Model

In [26]:
dataset_train, dataset_test = tf.keras.datasets.mnist.load_data()
mnist_x, mnist_y = dataset_train

In [38]:
mnist_x = tf.data.Dataset.from_tensor_slices(mnist_x)
mnist_y = tf.data.Dataset.from_tensor_slices(mnist_y)

In [69]:
from mnist import dataset

In [29]:
def loss(model, x, y):
    prediction = model(x)
    return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction)

In [30]:
def grad(model, inputs, target):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, target)
    return tape.gradient(loss_value, model.variables)

In [32]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

# Tf Variables and Optimizer

In [70]:
class LinearModel(tf.keras.Model):
    def __init__(self):
        super(tf.keras.Model, self).__init__()
        self.W = tfe.Variable(5., name="weight")
        self.b = tfe.Variable(10., name="bias")
        
    def predict(self, inputs):
        return inputs * self.W + self.b
    
num_examples = 2000
training_inputs = tf.random_normal([num_examples])
noise = tf.random_normal([num_examples])
training_outputs = training_inputs * 3 + 2 + noise

In [71]:
def loss(model, inputs, y):
    target = model.predict(inputs)
    error = tf.reduce_mean(tf.square(target - y))
    return error

In [72]:
def grad(model, inputs, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, y)
    return tape.gradient(loss_value, [model.W, model.b])

In [73]:
model = LinearModel()

In [74]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
print("init loss: {}".format(loss(model, training_inputs, training_outputs)))

init loss: 67.8505935669


In [75]:
for i in range(3000):
    grads = grad(model, training_inputs, training_outputs)
    optimizer.apply_gradients(zip(grads, [model.W, model.b]), global_step=tf.train.get_or_create_global_step())
    if i % 20 == 0:
        print("step {}, loss is {}".format(i, loss(model, training_inputs, training_outputs)))
print("final loss: {}".format(loss(model, training_inputs, training_outputs)))
print("model W: {}, b: {}".format(model.W.numpy(), model.b.numpy()))

step 0, loss is 65.2383117676
step 20, loss is 29.9568004608
step 40, loss is 14.0671339035
step 60, loss is 6.90828132629
step 80, loss is 3.68180131912
step 100, loss is 2.22712731361
step 120, loss is 1.57106149197
step 140, loss is 1.27507555485
step 160, loss is 1.14149880409
step 180, loss is 1.0811984539
step 200, loss is 1.05396926403
step 220, loss is 1.04166996479
step 240, loss is 1.03611302376
step 260, loss is 1.03360176086
step 280, loss is 1.0324665308
step 300, loss is 1.03195333481
step 320, loss is 1.03172123432
step 340, loss is 1.03161621094
step 360, loss is 1.03156864643
step 380, loss is 1.03154695034
step 400, loss is 1.03153729439
step 420, loss is 1.03153300285
step 440, loss is 1.03153073788
step 460, loss is 1.03153014183
step 480, loss is 1.03152966499
step 500, loss is 1.03152954578
step 520, loss is 1.03152954578
step 540, loss is 1.03152942657
step 560, loss is 1.03152930737
step 580, loss is 1.03152906895
step 600, loss is 1.03152918816
step 620, loss i

# Use objects for state during eager execution

In [76]:
with tf.device("cpu:0"):
    v = tfe.Variable(tf.random_normal([1000, 1000]))
    v = None

In [78]:
x = tfe.Variable(10.)
checkpoint = tfe.Checkpoint(x = x)
x.assign(2.)
save_path = checkpoint.save('./ckpt/')
x.assign(11.)  # Change the variable after saving.
# Restore values from the checkpoint
checkpoint.restore(save_path)
print(x.numpy())  # => 2.0

2.0


In [81]:
metrics = tfe.metrics.Mean("loss")
metrics(0)
metrics(5)
metrics.result()

<tf.Tensor: id=338632, shape=(), dtype=float64, numpy=2.5>

# Advanced automatic differentiation topics

In [8]:
def line_search_step(fn, init_x, rate=1.0):
    with tf.GradientTape() as tape:
        tape.watch(init_x)
        value = fn(init_x)
    grad = tape.gradient(value, init_x)
    grad_norm = tf.reduce_sum(grad * grad)
    init_value = value
    while value > init_value - rate * grad_norm:
        x = init_x - rate * grad
        value = fn(x)
        rate /= 2.0
    return x, value

In [9]:
def fun(x):
    return 2 * x + 3

In [10]:
x = tfe.Variable(3.)
line_search_step(fun, x)

(<tf.Tensor: id=28, shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: id=30, shape=(), dtype=float32, numpy=5.0>)

# Additional functions to compute gradients

In [11]:
def square(x):
    return x * x

In [16]:
grad = tfe.gradients_function(square)
gradgrad = tfe.gradients_function(lambda x: grad(x))
gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x))

In [15]:
gradgrad(3)

[<tf.Tensor: id=68, shape=(), dtype=int32, numpy=2>]

In [17]:
gradgradgrad(3)

[None]

In [18]:
@tf.custom_gradient
def log1pexp(x):
    e = tf.exp(x)
    def grad(dy):
        return dy * (1 - 1 / (1 + e))
    return tf.log(1 + e), grad

grad_log1pexp = tfe.gradients_function(log1pexp)

# As before, the gradient computation works fine at x = 0.
print(grad_log1pexp(0.))  # => [0.5]

# And the gradient computation also works at x = 100.
print(grad_log1pexp(100.))  # => [1.0]

[<tf.Tensor: id=91, shape=(), dtype=float32, numpy=0.5>]
[<tf.Tensor: id=102, shape=(), dtype=float32, numpy=1.0>]


# Performance Test

In [27]:
import time

In [28]:
def measure(x, steps):
    tf.matmul(x, x)
    start = time.time()
    for i in range(steps):
        v = tf.matmul(x, x)
        _ = v.numpy()
    end = time.time()
    return (end - start)
    

In [29]:
shape = [1000, 1000]
steps = 200

In [30]:
with tf.device("/cpu:0"):
    x = tf.random_normal(shape)
    period = measure(x, steps)
    print("cpu: {}s".format(period))

cpu: 5.12560606003s


# Use eager execution in Graph execution

In [5]:
def eager_fun(x):
    res = tf.matml(x, x)
    print(res)
    return res

with tf.Session() as sess:
    v = tf.placeholder(dtype=tf.float32)
    pf = tfe.py_func(eager_fun, [v], tf.float32)
#     sess.run(pf, feed_dict={v: [[2.0]]})