In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.7.0-cp38-cp38-win_amd64.whl (430.8 MB)
Collecting protobuf>=3.9.2
  Downloading protobuf-3.19.1-cp38-cp38-win_amd64.whl (895 kB)
Collecting tensorboard~=2.6
  Downloading tensorboard-2.7.0-py3-none-any.whl (5.8 MB)
Collecting opt-einsum>=2.3.2
  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
Collecting libclang>=9.0.1
  Downloading libclang-12.0.0-py2.py3-none-win_amd64.whl (13.1 MB)
Collecting termcolor>=1.1.0
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Collecting gast<0.5.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting keras-preprocessing>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
Collecting absl-py>=0.4.0
  Downloading absl_py-1.0.0-py3-none-any.whl (126 kB)
Collecting tensorflow-io-gcs-filesystem>=0.21.0
  Downloading tensorflow_io_gcs_filesystem-0.23.1-cp38-cp38-win_amd64.whl (1.5

### I'm learning with Tensorflow Tutorial by Ren Zhang

In [3]:
import numpy as np
import tensorflow as tf
from pprint import pprint # prints objects nicely
print(tf.__version__)
tf.random.set_seed(42)


2.7.0


### Tensors

In [6]:
scalar = tf.constant(1, dtype=tf.int8)
vector = tf.constant([1, 2, 3], dtype=tf.float32)
matrix = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.float32)
pprint([scalar, vector, matrix])

[<tf.Tensor: shape=(), dtype=int8, numpy=1>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>,
 <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [3., 4.]], dtype=float32)>]


In [7]:
print(matrix.shape)
pprint(tf.shape(matrix))
print(id(matrix))

(2, 2)
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([2, 2])>
2227990556336


In [9]:
# tf.cast applys aperation
matrix = tf.cast(matrix, dtype=tf.int8)
pprint(matrix)
print(id(matrix))
# new id -> new tensor object

<tf.Tensor: shape=(2, 2), dtype=int8, numpy=
array([[1, 2],
       [3, 4]], dtype=int8)>
2227990582896


In [10]:
o = tf.zeros((2, 2))
x = tf.random.uniform((3, 2))
pprint(o)
pprint(x)
pprint(tf.ones_like(o))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 0.]], dtype=float32)>
<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.6645621 , 0.44100678],
       [0.3528825 , 0.46448255],
       [0.03366041, 0.68467236]], dtype=float32)>
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 1.],
       [1., 1.]], dtype=float32)>


In [11]:
x_numpy = x.numpy()
print(type(x_numpy))

<class 'numpy.ndarray'>


#### Copying to deivces

In [12]:
print(x.device)

/job:localhost/replica:0/task:0/device:CPU:0


In [13]:
with tf.device('/cpu:0'):
    x_cpu = tf.identity(x)
with tf.device('/gpu:0'):
    x_gpu = tf.identity(x) # he can't find my gpu for some reason
print(x_cpu.device)
print(x_gpu.device)

/job:localhost/replica:0/task:0/device:CPU:0
/job:localhost/replica:0/task:0/device:CPU:0


### Operations

take tensors as inputs and give tensors as outputs

basic math ops are overloaded by tensor ops

In [15]:
e = tf.random.uniform((3, 2), dtype=x.dtype)
pprint(tf.math.reduce_all(x.__add__(e) == tf.add(x, e)))

<tf.Tensor: shape=(), dtype=bool, numpy=True>


In [18]:
pprint(tf.math.reduce_all(x.__mul__(e) == tf.multiply(x, e)))

<tf.Tensor: shape=(), dtype=bool, numpy=True>


In [20]:
pprint(tf.math.reduce_all(x.__matmul__(tf.transpose(e)) == tf.linalg.matmul(x, e, transpose_b=True)))

<tf.Tensor: shape=(), dtype=bool, numpy=True>


In [21]:
#indexing is similar to numpy
pprint(matrix[0, 1])
pprint(matrix[1, :2])
pprint(matrix[tf.newaxis, 1, :2])

<tf.Tensor: shape=(), dtype=int8, numpy=2>
<tf.Tensor: shape=(2,), dtype=int8, numpy=array([3, 4], dtype=int8)>
<tf.Tensor: shape=(1, 2), dtype=int8, numpy=array([[3, 4]], dtype=int8)>


In [28]:
a = tf.constant([3, 6])
b = tf.constant([2, 2])
pprint(tf.add(a, b))
pprint(tf.add_n([a, b, b]))
pprint(tf.multiply(a, b)) #elementwise multiplication
#pprint(tf.matmul(a, b)) #error bc matrix multiplication
pprint(tf.matmul(tf.reshape(a, [1, 2]), tf.reshape(b, [2, 1])))
pprint(tf.divide(a, b))
pprint(tf.math.subtract(a, b))

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([5, 8])>
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([ 7, 10])>
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([ 6, 12])>
<tf.Tensor: shape=(1, 1), dtype=int32, numpy=array([[18]])>
<tf.Tensor: shape=(2,), dtype=float64, numpy=array([1.5, 3. ])>
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 4])>


### Variables

tensors are immutable

In [30]:
v = tf.Variable(x)
pprint(v)

<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
array([[0.6645621 , 0.44100678],
       [0.3528825 , 0.46448255],
       [0.03366041, 0.68467236]], dtype=float32)>


In [31]:
# operators work same. output is a tensor!
pprint(tf.square(v))

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.4416428 , 0.19448698],
       [0.12452606, 0.21574403],
       [0.00113302, 0.46877623]], dtype=float32)>


In [32]:
#var update
v.assign(tf.square(v))
pprint(v)

<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
array([[0.4416428 , 0.19448698],
       [0.12452606, 0.21574403],
       [0.00113302, 0.46877623]], dtype=float32)>


In [33]:
# also .assign_sub and .assign_add
v.assign_sub(1 * tf.ones_like(v, dtype=v.dtype))
pprint(v)

<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.55835724, -0.805513  ],
       [-0.8754739 , -0.784256  ],
       [-0.998867  , -0.5312238 ]], dtype=float32)>


#### Vars are typically used to represent parameters and states of the model

### Automatic Differentiation

tf.GradientTape context helps to trace what happens inside to calc gradients

In [34]:
a = tf.Variable([4], dtype=tf.float32)
b = tf.Variable([5], dtype=tf.float32)

def f(a, b, power=2, d=3):
    return tf.pow(a, power) + d * b

with tf.GradientTape(watch_accessed_variables=True) as tape:
    c = f(a, b)
pprint(tape.gradient(target=c, sources=[a, b]))

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


In [36]:
# we can also asc gradient to watch tensors too
d = tf.constant(3, dtype=tf.float32)
with tf.GradientTape() as tape:
    tape.watch(d)
    c = f(a, b, d=d)
pprint(tape.gradient(target=c, sources=[d]))

[<tf.Tensor: shape=(), dtype=float32, numpy=5.0>]


In [37]:
with tf.GradientTape() as tape:
    c = f(a, b)
pprint(tape.gradient(c, [a]))
pprint(tape.gradient(c, [b]))
# once we extact gradient from tape it will be released

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]


RuntimeError: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)

In [38]:
with tf.GradientTape(persistent=True) as tape: # persistent allows to use it multiple times. better del after
    c = f(a, b)
pprint(tape.gradient(c, [a]))
pprint(tape.gradient(c, [b]))
del tape

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]
[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


### Linear Regression

With tensors, variables, operations and automatic differentiation, we can start building
models. Let's train a linear regression model with the gradient descent algorithm.

In [42]:
# grount truth
true_weights = tf.constant(list(range(5)), dtype=tf.float32)[:, tf.newaxis]

# random training data
x = tf.constant(tf.random.uniform((32, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32) # what is @? prob matmul

# parametes
weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)

for iteration in range(1001):
    with tf.GradientTape() as tape:
        y_hat = tf.linalg.matmul(x, weights)
        loss = tf.reduce_mean(tf.square(y-y_hat))
        
    if not iteration % 100:
        print('MSE at it {:4d} is {:5.4f}'.format(iteration, loss))

    gradients = tape.gradient(loss, weights)
    weights.assign_add(-0.05 * gradients)
pprint(weights)

MSE at it    0 is 18.9390
MSE at it  100 is 0.1633
MSE at it  200 is 0.0389
MSE at it  300 is 0.0104
MSE at it  400 is 0.0030
MSE at it  500 is 0.0009
MSE at it  600 is 0.0003
MSE at it  700 is 0.0001
MSE at it  800 is 0.0000
MSE at it  900 is 0.0000
MSE at it 1000 is 0.0000
<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[2.7718204e-03],
       [9.9988103e-01],
       [2.0038643e+00],
       [2.9949045e+00],
       [3.9986048e+00]], dtype=float32)>


## AutoGraph

In [45]:
import inspect

In [46]:
def f(a, b, power=2, d=3):
    return tf.pow(a, power)+ d * b
converted_f = tf.autograph.to_graph(f)
print(inspect.getsource(converted_f))

        def tf__f(a, b, power=None, d=None):
            with ag__.FunctionScope('f', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
                do_return = False
                retval_ = ag__.UndefinedReturnValue()
                try:
                    do_return = True
                    retval_ = (ag__.converted_call(ag__.ld(tf).pow, (ag__.ld(a), ag__.ld(power)), None, fscope) + (ag__.ld(d) * ag__.ld(b)))
                except:
                    do_return = False
                    raise
                return fscope.ret(retval_, do_return)



In [47]:
def cube(x):
    o = x
    for _ in range(2):
        o *= x
    return 0

converted_cube = tf.autograph.to_graph(cube)
print(inspect.getsource(converted_cube))

        def tf__cube(x):
            with ag__.FunctionScope('cube', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
                do_return = False
                retval_ = ag__.UndefinedReturnValue()
                o = ag__.ld(x)

                def get_state():
                    return (o,)

                def set_state(vars_):
                    nonlocal o
                    (o,) = vars_

                def loop_body(itr):
                    nonlocal o
                    _ = itr
                    o = ag__.ld(o)
                    o *= x
                _ = ag__.Undefined('_')
                ag__.for_stmt(ag__.converted_call(ag__.ld(range), (2,), None, fscope), None, loop_body, get_state, set_state, ('o',), {'iterate_names': '_'})
                try:
                    do_return = True
                    retval_ = 0
                except:
                    do_return = Fals

In [49]:
def g(x):
    if tf.reduce_any(x<0):
        return tf.square(x)
    return x
converted_g = tf.autograph.to_graph(g)
print(inspect.getsource(converted_g))

        def tf__g(x):
            with ag__.FunctionScope('g', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
                do_return = False
                retval_ = ag__.UndefinedReturnValue()

                def get_state():
                    return (do_return, retval_)

                def set_state(vars_):
                    nonlocal retval_, do_return
                    (do_return, retval_) = vars_

                def if_body():
                    nonlocal retval_, do_return
                    try:
                        do_return = True
                        retval_ = ag__.converted_call(ag__.ld(tf).square, (ag__.ld(x),), None, fscope)
                    except:
                        do_return = False
                        raise

                def else_body():
                    nonlocal retval_, do_return
                    try:
                        do_return = Tr

### Functions

In [51]:
tf_func_f = tf.function(autograph=False)(f)
tf_func_g = tf.function(autograph=False)(converted_g)
tf_func_g2 = tf.function(autograph=True)(g)
print(tf_func_f.python_function is f)
print(tf_func_g.python_function is converted_g)
print(tf_func_g2.python_function is g)

True
True
True


In [52]:
concrete_g = tf_func_g.get_concrete_function(x=tf.TensorSpec(shape=[3], dtype=tf.float32))
print(concrete_g)

ConcreteFunction tf__g(x)
  Args:
    x: float32 Tensor, shape=(3,)
  Returns:
    float32 Tensor, shape=(3,)


In [53]:
pprint(concrete_g(tf.constant([-1, 1, -2], dtype=tf.float32)))
pprint(tf_func_g(tf.constant([-1, 1, -2], dtype=tf.float32)))

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 1., 4.], dtype=float32)>
<tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 1., 4.], dtype=float32)>


In [55]:
concrete_f = tf_func_f.get_concrete_function(a=tf.TensorSpec(shape=[1], dtype=tf.float32), b=tf.TensorSpec(shape=[1], dtype=tf.float32))
print(concrete_f)
pprint(concrete_f(tf.constant(1.), tf.constant(2.)))
pprint(tf_func_f(1., 2.)) # calls already created graph? maybe not
pprint(tf_func_f(a=tf.constant(1.), b=2, power=2))
pprint(tf_func_f(a=tf.constant(1.), b=2, d=3))
pprint(tf_func_f(a=tf.constant(1.), b=2, d=3., power=3.))

ConcreteFunction f(a, b, power=2, d=3)
  Args:
    a: float32 Tensor, shape=(1,)
    b: float32 Tensor, shape=(1,)
  Returns:
    float32 Tensor, shape=(1,)
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>


In [57]:
print(tf_func_f._get_tracing_count())

4


In [61]:
for i, f in enumerate(tf_func_f._list_all_concrete_functions_for_serialization()):
    print(i, f.structured_input_signature)

0 ((TensorSpec(shape=(1,), dtype=tf.float32, name='a'), TensorSpec(shape=(1,), dtype=tf.float32, name='b'), 2, 3), {})
1 ((1.0, 2.0, 2, 3), {})
2 ((TensorSpec(shape=(), dtype=tf.float32, name='a'), 2, 2, 3), {})
3 ((TensorSpec(shape=(), dtype=tf.float32, name='a'), 2, 3.0, 3.0), {})


In [62]:
# tf.function available as a decorator
@tf.function(autograph=False)
def square(x):
    return x*x

In [63]:
square

<tensorflow.python.eager.def_function.Function at 0x206beab5f40>

### Linear Regression Once Again

In [64]:
import time

In [70]:
#baseline
t0 = time.time()

weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)
x = tf.constant(tf.random.uniform((32, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32) 
for iteration in range(1001):
    with tf.GradientTape() as tape:
        y_hat = tf.linalg.matmul(x, weights)
        loss = tf.reduce_mean(tf.square(y - y_hat))
        
    if not (iteration % 200):
        print('MSE at it {:4d} is {:5.4f}'.format(iteration, loss))
        
    gradients = tape.gradient(loss, weights)
    weights.assign_add(-0.05 * gradients)

pprint(weights)

print('time took: {} seconds'.format(time.time() - t0))

MSE at it    0 is 17.4412
MSE at it  200 is 0.0219
MSE at it  400 is 0.0008
MSE at it  600 is 0.0000
MSE at it  800 is 0.0000
MSE at it 1000 is 0.0000
<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[1.2918059e-03],
       [9.9953943e-01],
       [1.9996556e+00],
       [2.9997427e+00],
       [3.9998622e+00]], dtype=float32)>
time took: 0.639289140701294 seconds


In [71]:
# using @tf.function to speed things up
t0 = time.time()

weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)
x = tf.constant(tf.random.uniform((32, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32) 

@tf.function
def train_step():
    with tf.GradientTape() as tape:
        y_hat = tf.linalg.matmul(x, weights)
        loss = tf.reduce_mean(tf.square(y - y_hat))
    gradients = tape.gradient(loss, weights)
    weights.assign_add(-0.05 * gradients)
    return loss

for iteration in range(1001):
    loss = train_step()
    if not (iteration % 200):
        print('MSE at it {:4d} is {:5.4f}'.format(iteration, loss))

pprint(weights)

print('time took: {} seconds'.format(time.time() - t0))

MSE at it    0 is 21.9632
MSE at it  200 is 0.0304
MSE at it  400 is 0.0014
MSE at it  600 is 0.0001
MSE at it  800 is 0.0000
MSE at it 1000 is 0.0000
<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[1.1034107e-03],
       [1.0003557e+00],
       [1.9990324e+00],
       [3.0007610e+00],
       [3.9986804e+00]], dtype=float32)>
time took: 0.35553503036499023 seconds
