# Tensorflow Basics

Here we introduce Tensors, Operations, Variables and Automatic Differentiation .

In [1]:
import numpy as np
import tensorflow as tf
from pprint import pprint

## 1 Tensors
**Tensors** are multidimensional arrays. Each tensor has *shape* and *data* type property.  

In [2]:
scaler = tf.constant(1, dtype=tf.int8)
vector = tf.constant([1, 2, 3], dtype=tf.float32)
matrix = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)

pprint([scaler, vector, matrix])

[<tf.Tensor: id=0, shape=(), dtype=int8, numpy=1>,
 <tf.Tensor: id=1, shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>,
 <tf.Tensor: id=2, shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [3., 4.]], dtype=float32)>]


Data type convertion needs the `tf.cast` funtion

In [3]:
tf.cast(matrix, dtype=tf.int8)

<tf.Tensor: id=3, shape=(2, 2), dtype=int8, numpy=
array([[1, 2],
       [3, 4]], dtype=int8)>

Note that the `shape` property of a tensor and result from `tf.shape` function on a tensor is different.

In [4]:
print(tf.shape(matrix))
print(matrix.shape)

tf.Tensor([2 2], shape=(2,), dtype=int32)
(2, 2)


Indexing tensors works pretty much as you would imagined with arrays.

In [5]:
print(matrix[:, 1])
print(matrix[1, :])

tf.Tensor([2. 4.], shape=(2,), dtype=float32)
tf.Tensor([3. 4.], shape=(2,), dtype=float32)


There are handy functions to create special tensors.

In [6]:
o = tf.zeros((5, 4))
o

<tf.Tensor: id=15, shape=(5, 4), dtype=float32, numpy=
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)>

In [7]:
b = tf.ones_like(o)
b

<tf.Tensor: id=18, shape=(5, 4), dtype=float32, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float32)>

In [8]:
a = tf.random.uniform((5, 4))
print(a)
print(a.shape)
print(type(a))

tf.Tensor(
[[0.6121652  0.7685324  0.5049937  0.5713525 ]
 [0.4032278  0.48369217 0.24434793 0.14046729]
 [0.21229136 0.2735753  0.0347935  0.5860987 ]
 [0.3452642  0.5200219  0.41229272 0.8110796 ]
 [0.6535505  0.14351332 0.51372063 0.89075303]], shape=(5, 4), dtype=float32)
(5, 4)
<class 'tensorflow.python.framework.ops.EagerTensor'>


Converting between numpy ndarrary and Tensor is pretty strightforwad.

In [9]:
b = a.numpy()
print(b)
print(type(b))

[[0.6121652  0.7685324  0.5049937  0.5713525 ]
 [0.4032278  0.48369217 0.24434793 0.14046729]
 [0.21229136 0.2735753  0.0347935  0.5860987 ]
 [0.3452642  0.5200219  0.41229272 0.8110796 ]
 [0.6535505  0.14351332 0.51372063 0.89075303]]
<class 'numpy.ndarray'>


In [10]:
c = tf.convert_to_tensor(b)
print(c)
print(type(c))

tf.Tensor(
[[0.6121652  0.7685324  0.5049937  0.5713525 ]
 [0.4032278  0.48369217 0.24434793 0.14046729]
 [0.21229136 0.2735753  0.0347935  0.5860987 ]
 [0.3452642  0.5200219  0.41229272 0.8110796 ]
 [0.6535505  0.14351332 0.51372063 0.89075303]], shape=(5, 4), dtype=float32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


Moving Tensors between devices is also easy, well if you do have gpu. 

In [11]:
a = a.cpu()
a.device

W1218 16:01:08.288960 140541290612480 deprecation.py:323] From <ipython-input-11-db019b1b2704>:1: _EagerTensorBase.cpu (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.identity instead.


'/job:localhost/replica:0/task:0/device:CPU:0'

In [12]:
# a.gpu()
# a.device

## Operations 

They works pretty much as you'd expect.

In [13]:
a + b == tf.add(a, b)

<tf.Tensor: id=32, shape=(5, 4), dtype=bool, numpy=
array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])>

In [14]:
a * b == tf.multiply(a, b)

<tf.Tensor: id=37, shape=(5, 4), dtype=bool, numpy=
array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])>

In [15]:
a @ tf.transpose(b) == tf.matmul(a, b, transpose_b=True)

<tf.Tensor: id=44, shape=(5, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>

## Variables  

Tensors are immutable, they can't be updated. So, we need the updatable Tensors that is called `Variable` for parameters in the model. It works pretty much like Tensors, just updatable.

In [16]:
v = tf.Variable(a)
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[0.6121652 , 0.7685324 , 0.5049937 , 0.5713525 ],
       [0.4032278 , 0.48369217, 0.24434793, 0.14046729],
       [0.21229136, 0.2735753 , 0.0347935 , 0.5860987 ],
       [0.3452642 , 0.5200219 , 0.41229272, 0.8110796 ],
       [0.6535505 , 0.14351332, 0.51372063, 0.89075303]], dtype=float32)>


In [17]:
tf.square(v)

<tf.Tensor: id=54, shape=(5, 4), dtype=float32, numpy=
array([[0.37474623, 0.59064204, 0.25501862, 0.32644367],
       [0.16259266, 0.23395811, 0.05970591, 0.01973106],
       [0.04506762, 0.07484345, 0.00121059, 0.34351164],
       [0.11920737, 0.2704228 , 0.16998528, 0.65785015],
       [0.42712826, 0.02059607, 0.2639089 , 0.79344094]], dtype=float32)>

In [18]:
v.assign(tf.square(v))
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[0.37474623, 0.59064204, 0.25501862, 0.32644367],
       [0.16259266, 0.23395811, 0.05970591, 0.01973106],
       [0.04506762, 0.07484345, 0.00121059, 0.34351164],
       [0.11920737, 0.2704228 , 0.16998528, 0.65785015],
       [0.42712826, 0.02059607, 0.2639089 , 0.79344094]], dtype=float32)>


In [19]:
v.assign_sub(1 * tf.ones_like(v, dtype=tf.float32))
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[-0.6252538 , -0.40935796, -0.7449814 , -0.6735563 ],
       [-0.83740735, -0.7660419 , -0.9402941 , -0.98026896],
       [-0.9549324 , -0.92515653, -0.9987894 , -0.65648836],
       [-0.8807926 , -0.7295772 , -0.8300147 , -0.34214985],
       [-0.57287174, -0.9794039 , -0.73609114, -0.20655906]],
      dtype=float32)>


## Automatic Differentiation 

Tensorflow can compute the partial derivatives of a computation with respect to the inputs for us. 

`tf.GradientTape` is a context in which all the operations around the variables are recorded. As a result, after the computation, we can ask if for the gradients.

In [20]:
def f(a, b, d=3):
    return tf.pow(a, 2) + d * b


a = tf.Variable([4], dtype=tf.float32)
b = tf.Variable([5], dtype=tf.float32)

with tf.GradientTape() as tape:
    c = f(a, b)

pprint(tape.gradient(c, [a, b]))

[<tf.Tensor: id=93, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 <tf.Tensor: id=100, shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


Note that by default the context only keep track of `Variable`s. If you so wish to keep track of other stuff, you need just ask for it.

In [21]:
d = tf.constant(3, dtype=tf.float32)
with tf.GradientTape() as tape:
    tape.watch(d)
    c = f(a, b, d)

pprint(tape.gradient(c, [d]))

[<tf.Tensor: id=122, shape=(), dtype=float32, numpy=5.0>]


Also note the tape is not persistent, it will be deleted once used. Unless specificlly asked to be persistent. But then, you will need to mannually delete it to free up resources

In [22]:
with tf.GradientTape() as tape:
    c = f(a, b)

print(tape.gradient(c, [a]))
print(tape.gradient(c, [b]))

[<tf.Tensor: id=140, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

In [23]:
with tf.GradientTape(persistent=True) as tape:
    c = f(a, b)

print(tape.gradient(c, [a]))
print(tape.gradient(c, [b]))

del tape

[<tf.Tensor: id=162, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]
[<tf.Tensor: id=184, shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


## Linear Regression

With all this we can start writing and training models already.

In [24]:
true_weights = tf.constant([1,2,3,4,5], dtype=tf.float32)[:, tf.newaxis]
x = tf.constant(tf.random.uniform((5, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32)

weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)

learning_rate = .5
max_epochs = 500


def forward_pass(x, y, weights):
    y_hat = x @ weights
    mse = tf.reduce_mean(tf.square(y - y_hat))
    return mse

In [25]:
for it in range(max_epochs):
    with tf.GradientTape() as tape:
        loss = forward_pass(x, y, weights)
    if not (it % 100):
        print('mse loss at iteration {} is {:5.4f}'.format(it, loss))
    gradients = tape.gradient(loss, weights)
    weights.assign_add(- learning_rate * gradients)

mse loss at iteration 0 is 25.3439
mse loss at iteration 100 is 0.0022
mse loss at iteration 200 is 0.0002
mse loss at iteration 300 is 0.0001
mse loss at iteration 400 is 0.0001


In [26]:
weights

<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[1.1119765],
       [1.9741836],
       [2.8355412],
       [4.088153 ],
       [4.9799294]], dtype=float32)>