# Tensorflow Basics

Here we introduce Tensors, Operations, Variables and Automatic Differentiation .

In [1]:
import numpy as np
import tensorflow as tf
from pprint import pprint

## 1 Tensors
**Tensors** are multidimensional arrays. Each tensor has *shape* and *data* type property.  

In [2]:
scaler = tf.constant(1, dtype=tf.int8)
vector = tf.constant([1, 2, 3], dtype=tf.float32)
matrix = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)

pprint([scaler, vector, matrix])

[<tf.Tensor: id=0, shape=(), dtype=int8, numpy=1>,
 <tf.Tensor: id=1, shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>,
 <tf.Tensor: id=2, shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [3., 4.]], dtype=float32)>]


Data type convertion needs the `tf.cast` funtion

In [3]:
tf.cast(matrix, dtype=tf.int8)

<tf.Tensor: id=3, shape=(2, 2), dtype=int8, numpy=
array([[1, 2],
       [3, 4]], dtype=int8)>

Note that the `shape` property of a tensor and result from `tf.shape` function on a tensor is different.

In [4]:
print(tf.shape(matrix))
print(matrix.shape)

tf.Tensor([2 2], shape=(2,), dtype=int32)
(2, 2)


Indexing tensors works pretty much as you would imagined with arrays.

In [5]:
print(matrix[:, 1])
print(matrix[1, :])

tf.Tensor([2. 4.], shape=(2,), dtype=float32)
tf.Tensor([3. 4.], shape=(2,), dtype=float32)


There are handy functions to create special tensors.

In [6]:
o = tf.zeros((5, 4))
o

<tf.Tensor: id=15, shape=(5, 4), dtype=float32, numpy=
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)>

In [7]:
b = tf.ones_like(o)
b

<tf.Tensor: id=18, shape=(5, 4), dtype=float32, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float32)>

In [8]:
a = tf.random.uniform((5, 4))
print(a)
print(a.shape)
print(type(a))

tf.Tensor(
[[0.51489913 0.14878559 0.87637496 0.16338205]
 [0.5680398  0.6436268  0.85502243 0.16321206]
 [0.15703404 0.23542702 0.26656926 0.20130396]
 [0.9828062  0.09003305 0.04659343 0.64389896]
 [0.40983427 0.8913517  0.30197513 0.29110909]], shape=(5, 4), dtype=float32)
(5, 4)
<class 'tensorflow.python.framework.ops.EagerTensor'>


Converting between numpy ndarrary and Tensor is pretty strightforwad.

In [9]:
b = a.numpy()
print(b)
print(type(b))

[[0.51489913 0.14878559 0.87637496 0.16338205]
 [0.5680398  0.6436268  0.85502243 0.16321206]
 [0.15703404 0.23542702 0.26656926 0.20130396]
 [0.9828062  0.09003305 0.04659343 0.64389896]
 [0.40983427 0.8913517  0.30197513 0.29110909]]
<class 'numpy.ndarray'>


In [10]:
c = tf.convert_to_tensor(b)
print(c)
print(type(c))

tf.Tensor(
[[0.51489913 0.14878559 0.87637496 0.16338205]
 [0.5680398  0.6436268  0.85502243 0.16321206]
 [0.15703404 0.23542702 0.26656926 0.20130396]
 [0.9828062  0.09003305 0.04659343 0.64389896]
 [0.40983427 0.8913517  0.30197513 0.29110909]], shape=(5, 4), dtype=float32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


Moving Tensors between devices is also easy, well if you do have gpu. 

In [11]:
a = a.cpu()
a.device

Instructions for updating:
Use tf.identity instead.


'/job:localhost/replica:0/task:0/device:CPU:0'

In [12]:
a = a.gpu()
a.device

Instructions for updating:
Use tf.identity instead.


'/job:localhost/replica:0/task:0/device:GPU:0'

## 2 Operations 

They works pretty much as you'd expect.

In [13]:
a + b == tf.add(a, b)

<tf.Tensor: id=33, shape=(5, 4), dtype=bool, numpy=
array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])>

In [14]:
a * b == tf.multiply(a, b)

<tf.Tensor: id=38, shape=(5, 4), dtype=bool, numpy=
array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])>

In [15]:
a @ tf.transpose(b) == tf.matmul(a, b, transpose_b=True)

<tf.Tensor: id=45, shape=(5, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>

## 3 Variables  

Tensors are immutable, they can't be updated. So, we need the updatable Tensors that is called `Variable` for parameters in the model. It works pretty much like Tensors, just updatable.

In [16]:
v = tf.Variable(a)
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[0.51489913, 0.14878559, 0.87637496, 0.16338205],
       [0.5680398 , 0.6436268 , 0.85502243, 0.16321206],
       [0.15703404, 0.23542702, 0.26656926, 0.20130396],
       [0.9828062 , 0.09003305, 0.04659343, 0.64389896],
       [0.40983427, 0.8913517 , 0.30197513, 0.29110909]], dtype=float32)>


In [17]:
tf.square(v)

<tf.Tensor: id=56, shape=(5, 4), dtype=float32, numpy=
array([[0.26512113, 0.02213715, 0.7680331 , 0.0266937 ],
       [0.32266918, 0.41425547, 0.73106337, 0.02663818],
       [0.02465969, 0.05542588, 0.07105917, 0.04052328],
       [0.96590805, 0.00810595, 0.00217095, 0.4146059 ],
       [0.16796413, 0.79450786, 0.09118898, 0.0847445 ]], dtype=float32)>

In [18]:
v.assign(tf.square(v))
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[0.26512113, 0.02213715, 0.7680331 , 0.0266937 ],
       [0.32266918, 0.41425547, 0.73106337, 0.02663818],
       [0.02465969, 0.05542588, 0.07105917, 0.04052328],
       [0.96590805, 0.00810595, 0.00217095, 0.4146059 ],
       [0.16796413, 0.79450786, 0.09118898, 0.0847445 ]], dtype=float32)>


In [19]:
v.assign_sub(1 * tf.ones_like(v, dtype=tf.float32))
print(v)

<tf.Variable 'Variable:0' shape=(5, 4) dtype=float32, numpy=
array([[-0.7348789 , -0.97786283, -0.23196691, -0.9733063 ],
       [-0.67733085, -0.5857445 , -0.26893663, -0.97336185],
       [-0.9753403 , -0.9445741 , -0.92894083, -0.9594767 ],
       [-0.03409195, -0.99189407, -0.9978291 , -0.58539414],
       [-0.8320359 , -0.20549214, -0.90881103, -0.9152555 ]],
      dtype=float32)>


## 4 Automatic Differentiation 

Tensorflow can compute the partial derivatives of a computation with respect to the inputs for us. 

`tf.GradientTape` is a context in which all the operations around the variables are recorded. As a result, after the computation, we can ask if for the gradients.

In [20]:
def f(a, b, d=3):
    return tf.pow(a, 2) + d * b


a = tf.Variable([4], dtype=tf.float32)
b = tf.Variable([5], dtype=tf.float32)

with tf.GradientTape() as tape:
    c = f(a, b)

pprint(tape.gradient(c, [a, b]))

[<tf.Tensor: id=97, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 <tf.Tensor: id=104, shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


Note that by default the context only keep track of `Variable`s. If you so wish to keep track of other stuff, you need just ask for it.

In [21]:
d = tf.constant(3, dtype=tf.float32)
with tf.GradientTape() as tape:
    tape.watch(d)
    c = f(a, b, d)

pprint(tape.gradient(c, [d]))

[<tf.Tensor: id=126, shape=(), dtype=float32, numpy=5.0>]


Also note the tape is not persistent, it will be deleted once used. Unless specificlly asked to be persistent. But then, you will need to mannually delete it to free up resources

In [22]:
with tf.GradientTape() as tape:
    c = f(a, b)

print(tape.gradient(c, [a]))
print(tape.gradient(c, [b]))

[<tf.Tensor: id=144, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

In [23]:
with tf.GradientTape(persistent=True) as tape:
    c = f(a, b)

print(tape.gradient(c, [a]))
print(tape.gradient(c, [b]))

del tape

[<tf.Tensor: id=166, shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>]
[<tf.Tensor: id=188, shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]


## 5 Linear Regression Example

With all this we can start writing and training models already.

In [24]:
true_weights = tf.constant([1,2,3,4,5], dtype=tf.float32)[:, tf.newaxis]
x = tf.constant(tf.random.uniform((5, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32)

weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)

learning_rate = .5
max_epochs = 1000


def forward_pass(x, weights):
    y_hat = x @ weights
    return y_hat

In [25]:
for it in range(max_epochs):
    with tf.GradientTape() as tape:
        y_hat = forward_pass(x, weights)
        loss = tf.reduce_mean(tf.square(y - y_hat))
    if not (it % 100):
        print('mse loss at iteration {} is {:5.4f}'.format(it, loss))
    gradients = tape.gradient(loss, weights)
    weights.assign_add(-learning_rate * gradients)

mse loss at iteration 0 is 46.4647
mse loss at iteration 100 is 0.0013
mse loss at iteration 200 is 0.0002
mse loss at iteration 300 is 0.0001
mse loss at iteration 400 is 0.0000
mse loss at iteration 500 is 0.0000
mse loss at iteration 600 is 0.0000
mse loss at iteration 700 is 0.0000
mse loss at iteration 800 is 0.0000
mse loss at iteration 900 is 0.0000


In [26]:
weights

<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[1.0025035],
       [1.9981416],
       [3.0003157],
       [3.9995728],
       [5.000189 ]], dtype=float32)>