# 3.3 - Using `tf.function`

In [1]:
!wget -nc --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/2021.deeplearning/main/content/init.py
import init; init.init(force_download=False); 

In [2]:
try:
    %tensorflow_version 2.x
    print ("Using TF2 in Google Colab")
except:
    pass
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext tensorboard

from sklearn.datasets import *
from local.lib import mlutils
tf.__version__

'2.4.0'

## `tf.function` automatically converts pythonic code to a computational graph, using Tensors

In [3]:
def f(x):
    return x**2 + x*3
f(2)

10

In [4]:
@tf.function
def f(x):
    return x**2 + x*3
f(2)

<tf.Tensor: shape=(), dtype=int32, numpy=10>

and also works with a symbolic tensor

In [5]:
x = tf.Variable(3.)
f(x)

<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

a `tf.function` is **traced** (converted to computation graph) the first time it is executed, then it is cached

In [6]:
@tf.function
def f47(x):
    print('Tracing!')
    tf.print('Executing')  
    return x**2 + x*47

In [7]:
f47(2)

Tracing!
Executing


<tf.Tensor: shape=(), dtype=int32, numpy=98>

In [8]:
f47(2)

Executing


<tf.Tensor: shape=(), dtype=int32, numpy=98>

observe that if the type changes, the function is **traced** again since a different computational graph must be created

In [9]:
f47(2.1)

Tracing!
Executing


<tf.Tensor: shape=(), dtype=float32, numpy=103.11>

observe the actual generated code by `tf.autograph`

In [10]:
print(tf.autograph.to_code(f47.python_function))


def tf__f47(x):
    with ag__.FunctionScope('f47', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
        do_return = False
        retval_ = ag__.UndefinedReturnValue()
        ag__.ld(print)('Tracing!')
        ag__.converted_call(ag__.ld(tf).print, ('Executing',), None, fscope)
        try:
            do_return = True
            retval_ = ((ag__.ld(x) ** 2) + (ag__.ld(x) * 47))
        except:
            do_return = False
            raise
        return fscope.ret(retval_, do_return)



## performance of `tf.function`

In [11]:
x = tf.Variable(7.0)
@tf.function
def f1(x):
    
    def ff1(x):
        return x**2+x*3
    
    return ff1(x)

In [12]:
def f2(xval):
    
    @tf.function
    def ff2(x):
        return x**2+x*3

    return ff2(xval)

In [13]:
@tf.function
def f3(x):
    return x**2+x*3

In [14]:
@tf.function
def f3(x):
    return tf.pow(x,2)+tf.multiply(x,3)

def f4(x):
    return x**2 + x*3


In [15]:
f1(10.), f2(10.), f3(10.), f4(10.)

(<tf.Tensor: shape=(), dtype=float32, numpy=130.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=130.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=130.0>,
 130.0)

In [16]:
%timeit f1(10.)

99.8 µs ± 3.89 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [17]:
%timeit f2(10.)







































































































































5.65 ms ± 111 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
%timeit f3(10.)

95.8 µs ± 710 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [19]:
%timeit f3.python_function(10.)

28.9 µs ± 574 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [20]:
%timeit f4(10.)

137 ns ± 0.54 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


## Underlying `concrete` functions are actual TF graphs with no polymorphism, tied to specific input types

`tf.function` maps python polymorphism to a set of different underlying concrete functions

In [21]:
@tf.function
def f(x):
    return x+x

In [22]:
f(10), f(10.), f("a")

(<tf.Tensor: shape=(), dtype=int32, numpy=20>,
 <tf.Tensor: shape=(), dtype=int32, numpy=20>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'aa'>)

observe different hash codes for each concrete function

In [23]:
fs = f.get_concrete_function(tf.TensorSpec(shape=None, dtype=tf.string))
fs, fs(tf.constant("aa"))

(<ConcreteFunction f(x) at 0x7FE8A9F4CD90>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'aaaa'>)

In [24]:
fi = f.get_concrete_function(tf.TensorSpec(shape=None, dtype=tf.int32))
fi, fi(tf.constant(1))

(<ConcreteFunction f(x) at 0x7FE893B72F10>,
 <tf.Tensor: shape=(), dtype=int32, numpy=2>)

In [25]:
ff = f.get_concrete_function(tf.TensorSpec(shape=None, dtype=tf.float32))
ff, ff(tf.constant(1.))

(<ConcreteFunction f(x) at 0x7FE893B72EE0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=2.0>)

## `tf.function` with `keras` layers

In [26]:
import numpy as np
np.random.seed(0)
data = np.random.randn(3, 2)
data

array([[ 1.76405235,  0.40015721],
       [ 0.97873798,  2.2408932 ],
       [ 1.86755799, -0.97727788]])

In [27]:
inputer = tf.keras.layers.InputLayer(input_shape=(2))
denser1 = tf.keras.layers.Dense(4, activation='relu')
denser2 = tf.keras.layers.Dense(1, activation='sigmoid')

observe that, in eager mode, layers graphs are created as their code is being executed

In [28]:
def model_1(data):
    x = inputer(data)
    x = denser1(x)
    print('After the first layer:', x)
    out = denser2(x)
    print('After the second layer:', out)
    return out

print('Model output:\n', model_1(data))
print("--")
print('Model output:\n', model_1(data))

After the first layer: tf.Tensor(
[[0.         0.3713722  0.         1.2488008 ]
 [0.         2.1803753  0.         2.5198963 ]
 [0.         0.         0.         0.05428201]], shape=(3, 4), dtype=float32)
After the second layer: tf.Tensor(
[[0.2602086 ]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)
Model output:
 tf.Tensor(
[[0.2602086 ]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)
--
After the first layer: tf.Tensor(
[[0.         0.3713722  0.         1.2488008 ]
 [0.         2.1803753  0.         2.5198963 ]
 [0.         0.         0.         0.05428201]], shape=(3, 4), dtype=float32)
After the second layer: tf.Tensor(
[[0.2602086 ]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)
Model output:
 tf.Tensor(
[[0.2602086 ]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)


however, with `tf.function`, **FIRST** the function is **traced** resulting in a computational graph, which is what is **THEN** used in subsequent calls

In [29]:
@tf.function
def model_2(data):
    x = inputer(data)
    x = denser1(x)
    print('After the first layer:', x)
    out = denser2(x)
    print('After the second layer:', out)
    return out


print('Model\'s output:', model_2(data))
print('--')
print('Model\'s output:', model_2(data))

After the first layer: Tensor("dense/Relu:0", shape=(3, 4), dtype=float32)
After the second layer: Tensor("dense_1/Sigmoid:0", shape=(3, 1), dtype=float32)
Model's output: tf.Tensor(
[[0.26020858]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)
--
Model's output: tf.Tensor(
[[0.26020858]
 [0.14438558]
 [0.4877198 ]], shape=(3, 1), dtype=float32)


`tf.function` usually requires less compute time, since in eager mode, everytime the function is called the graph is created

In [30]:
def model_1(data):
    x = inputer(data)
    x = denser1(x)
    out = denser2(x)
    return out

@tf.function
def model_2(data):
    x = inputer(data)
    x = denser1(x)
    out = denser2(x)
    return out

In [31]:
%timeit model_1(data)

322 µs ± 15.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [32]:
%timeit model_2(data)

147 µs ± 1.15 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### including graphs in upstream functions. 

observe how we compute the gradient of a computational graph:

- with `model_1` the graph is generated eagerly each time the function is called
- with `model_2` the graph is only generated in the first call

In [33]:
def g1(data):
    with tf.GradientTape() as t:
        y = model_1(data)

    return t.gradient(y, denser1.variables)

def g2(data):
    with tf.GradientTape() as t:
        y = model_2(data)

    return t.gradient(y, denser1.variables)

g2(data), g1(data)



([<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
  array([[ 0.        ,  0.10590069,  0.        , -0.8391187 ],
         [ 0.        ,  0.08137974,  0.        , -0.09928459]],
        dtype=float32)>,
  <tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 0.        ,  0.07268029,  0.        , -0.51218534], dtype=float32)>],
 [<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
  array([[ 0.        ,  0.1059007 ,  0.        , -0.8391187 ],
         [ 0.        ,  0.08137975,  0.        , -0.09928459]],
        dtype=float32)>,
  <tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 0.        ,  0.0726803 ,  0.        , -0.51218534], dtype=float32)>])

In [34]:
%timeit g1(data)

781 µs ± 71.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [35]:
%timeit g2(data)

593 µs ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


however, even in `g2` the gradient graph is still computed eagerly.

if we wrap either function, now everything is a cached computational graph.

In [36]:
fg1 = tf.function(g1)
%timeit fg1(data)

169 µs ± 2.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [37]:
fg2 = tf.function(g2)
%timeit fg2(data)

173 µs ± 6.25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
