In [1]:
### Eager execution:

# Creating and using Tensor
# Using GPU acceleration
# Datasets

In [2]:
from __future__ import absolute_import, division, print_function

# import TensorFlow
import tensorflow as tf

# enable eager execution
# (more interactive front-end)
tf.enable_eager_execution()

W0830 17:02:35.616378 140736020308864 __init__.py:690] 

  TensorFlow's `tf-nightly` package will soon be updated to TensorFlow 2.0.

  Please upgrade your code to TensorFlow 2.0:
    * https://www.tensorflow.org/beta/guide/migration_guide

  Or install the latest stable TensorFlow 1.X release:
    * `pip install -U "tensorflow==1.*"`

  Otherwise your code may be broken by the change.

  


In [3]:
### TENSOR
# Tensor is a multi-dimensional array with a specific shape and data type
# Tensor can be stored un GPU.
# TF offers a lot of operations which take Tensor as input and ouput Tensor
# These operations convert native Python data type

print(tf.add(1, 2))
print(tf.add([1, 2], [3, 4])) # column-wise
print(tf.square(5))
print(tf.reduce_sum([1, 2, 3])) # compute the sum of elements across dimensions 
print(tf.encode_base64('hello world')) # encode string into base64 format

# Standard operators( +, -,  *, /) are also supported
print(tf.square(2) + tf.square(3))

tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor([4 6], shape=(2,), dtype=int32)
tf.Tensor(25, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(b'aGVsbG8gd29ybGQ', shape=(), dtype=string)
tf.Tensor(13, shape=(), dtype=int32)


In [4]:
x = tf.matmul([[1]], [[1, 2]]) # matrix multiplication (1, 1) * (1, 2)
print('Shape: ', x.shape)
print('Data type', x.dtype)

Shape:  (1, 2)
Data type <dtype: 'int32'>


In [5]:
# The differences between numpy arrays and Tensors are:
# 1/ Tensors can make use of accelerator memory (GPU)
# 2/ Tensors are IMMUTABLE

# Convertion between numpy arrays and Tensors is automatic (and it is cheap(often same memory representation))

import numpy as np

# Create a numpy array
ndarray = np.ones((3,3))

print('TensorFlow converts numpy array into Tensor automatically: \n')
tensor = tf.multiply(ndarray, 42) # element-wise multiplication
print(tensor)

print('Similarly for numpy:  \n')
print(np.add(tensor, 1))

print('the .numpy() method converts a Tensor to a numpy array')
print(tensor.numpy())

TensorFlow converts numpy array into Tensor automatically: 

tf.Tensor(
[[42. 42. 42.]
 [42. 42. 42.]
 [42. 42. 42.]], shape=(3, 3), dtype=float64)
Similarly for numpy:  

[[43. 43. 43.]
 [43. 43. 43.]
 [43. 43. 43.]]
the .numpy() method converts a Tensor to a numpy array
[[42. 42. 42.]
 [42. 42. 42.]
 [42. 42. 42.]]


In [6]:
### GPU Acceleration
# Many tensorflow operations can be accelerated by using GPU for computation(it is done automatically)

x = tf.random_uniform((3, 3))
print(x)

print('\nGPU available: ', tf.test.is_gpu_available())
print('Is the tensor x on GPU #0 ?', x.device.endswith('GPU:0')) # x.device provide the string name of the device hosting the content of the tensor

tf.Tensor(
[[0.47963166 0.9232795  0.27804363]
 [0.24093306 0.05942643 0.17866504]
 [0.92106414 0.41756856 0.06134248]], shape=(3, 3), dtype=float32)

GPU available:  False
Is the tensor x on GPU #0 ? False


In [7]:
# We can explicitely decide which device to execute an operation on (GPU, CPU)
# This is called Explicit Device Placement

import time

def time_matmul(x):
    ''' Compute the computational time for computing
        X squared 10 times (matrix multiplication)
    '''
    t0 = time.time()
    for iter in range(10):
        tf.matmul(x, x)
    duration = time.time() - t0
    print('10 matmul duration: ', duration)
    
# Compare the running on CPU/ GPU if avaiable
    
# Force execution on CPU
print('On CPU')
with tf.device('CPU:0'):
    x = tf.random_uniform((1000, 1000))
    assert x.device.endswith('CPU:0')
    time_matmul(x)
    
# Force execution on GPU if available
if tf.test.is_gpu_available():
    print('On GPU')
    with tf.device('GPU:0'):
        x = tf.random_uniform((1000, 1000))
        assert x.device.endswith('GPU:0')
        time_matmul(0)
else:
    print('GPU not available')

On CPU
10 matmul duration:  0.27369070053100586
GPU not available


In [8]:
### Datasets 
# 1/ How to create dataset
# 2/ Build pipeline to feed data to a model

In [9]:
# Create a source dataset

# Exist many other methods for creating Dataset object
# 1st method
ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])

# 2nd method
# Crate a CSV file
import tempfile
_, filename = tempfile.mkstemp() # Create a temporary file

with open(filename, 'w') as f:
    f.write('''Line 1
    Line 2
    Line 3
    ''')

ds_file = tf.data.TextLineDataset(filename)

In [10]:
# Now we can apply transformation to the source dataset

# map(): This transformation applies f() to each element of this dataset,
# and returns a new dataset containing the transformed elements,
# in the same order as they appeared in the input.

# shuffle(buffer_size): Randomly shuffles the elements of this dataset. 
# This dataset fills a buffer with buffer_size elements, then randomly samples elements from this buffer,
# replacing the selected elements with new elements.

# batch(batch_size): Combines consecutive elements of this dataset into batches.
# The tensors in the resulting element will have an additional outer dimension, which will be batch_size 

ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)

ds_file = ds_file.batch(2)

In [11]:
# Iterate over the source dataset 
# When eager execution is enabled Dataset objects support iteration.

print('Elements in ds_tensor: ')
for elem in ds_tensors:
    print(elem)
    
print('\n Elements in ds_file: ')
for elem in ds_file:
    print(elem)

Elements in ds_tensor: 
tf.Tensor([4 1], shape=(2,), dtype=int32)
tf.Tensor([ 9 25], shape=(2,), dtype=int32)
tf.Tensor([36 16], shape=(2,), dtype=int32)

 Elements in ds_file: 
tf.Tensor([b'Line 1' b'    Line 2'], shape=(2,), dtype=string)
tf.Tensor([b'    Line 3' b'    '], shape=(2,), dtype=string)
