## Introduction to TensorFlow

In [None]:
import h5py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework.ops import EagerTensor
from tensorflow.python.ops.resource_variable_ops import ResourceVariable
import time

### Checking TensorFlow Version

In [None]:
tf.__version__

## Basic Optimization with GradientTape

### Datasets
Since TensorFlow Datasets are generators, you can't access directly the contents unless you iterate over them in a for loop, or by explicitly creating a Python iterator using `iter` and consuming its
elements using `next`. Also, you can inspect the `shape` and `dtype` of each element using the `element_spec` attribute.

In [None]:
train_dataset = h5py.File('datasets/train_signs.h5', "r")
test_dataset = h5py.File('datasets/test_signs.h5', "r")

In [None]:
x_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_x'])
y_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_y'])

x_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_x'])
y_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_y'])

In [None]:
type(x_train)

In [None]:
x_train.element_spec

In [None]:
print(next(iter(x_train)).shape)

In [None]:
# printing all unique labels
unique_labels = set()
for element in y_train:
    unique_labels.add(element.numpy())
print(unique_labels)

In [None]:
images_iter = iter(x_train)
labels_iter = iter(y_train)
plt.figure(figsize=(7, 7))
for i in range(25):
    ax = plt.subplot(5, 5, i + 1)
    plt.imshow(next(images_iter).numpy().astype("uint8"))
    plt.title(next(labels_iter).numpy().astype("uint8"))
    plt.axis("off")

There's one more additional difference between TensorFlow datasets and Numpy arrays: If you need to transform one, you would invoke the `map` method to apply the function passed as an argument to each of the elements.

In [None]:
def normalize(image):
    image = tf.cast(image, tf.float32)
    image /= 255
    image = tf.reshape(image, [-1,])
    return image

In [None]:
new_train = x_train.map(normalize)
new_test = x_test.map(normalize)

In [None]:
new_train.element_spec

In [None]:
print(next(iter(new_train)))

In [None]:
print(f"new_train shape: {next(iter(new_train)).shape}")
print(f"new_test shape: {next(iter(new_test)).shape}")

### Linear Function
$Y = WX + b$

In [None]:
X = tf.constant(np.random.randn(3,1), name="X")
print(X)

In [None]:
def linear_function():
    """
    Implement a linear function
        Initialize X, W and b
        X -> (3,1)
        W -> (4,3)
        b -> (4,1)
    Returns:
    result -- Y = WX + b
    """
    
    np.random.seed(1)

    X = tf.constant(np.random.randn(3,1), name="X")
    W = tf.constant(np.random.randn(4,3), name="W")
    b = tf.constant(np.random.randn(4,1), name="b")

    Y = tf.add(tf.matmul(W, X) , b)

    # Y = (W.matmul(X)).add(b) -- doesn't work in tensorflow

    return Y

In [None]:
result = linear_function()
print(result)

assert type(result) == EagerTensor, "Use the TensorFlow API"
assert np.allclose(result, [[-2.15657382], [ 2.95891446], [-1.08926781], [-0.84538042]]), "Error"
print("\033[92mAll test passed")

### Computing the Sigmoid

In [None]:
def sigmoid(Z):
    Z = tf.cast(Z, tf.float32)
    A = tf.keras.activations.sigmoid(Z)

    return A

In [None]:
result = sigmoid(-1)
print ("type: " + str(type(result)))
print ("dtype: " + str(result.dtype))
print ("sigmoid(-1) = " + str(result))
print ("sigmoid(0) = " + str(sigmoid(0.0)))
print ("sigmoid(12) = " + str(sigmoid(12)))

def sigmoid_test(target):
    result = target(0)
    assert(type(result) == EagerTensor)
    assert (result.dtype == tf.float32)
    assert sigmoid(0) == 0.5, "Error"
    assert sigmoid(-1) == 0.26894143, "Error"
    assert sigmoid(12) == 0.99999386, "Error"

    print("\033[92mAll test passed")

sigmoid_test(sigmoid)

### Using One Hot Encodings
define function for one label and then use map function

In [None]:
def one_hot_matrix(label, depth=6):
    one_hot = tf.reshape(tf.one_hot(label, depth, axis=0), (depth,))

    return one_hot

In [None]:
def one_hot_matrix_test(target):
    label = tf.constant(1)
    depth = 4
    result = target(label, depth)
    print("Test 1:",result)
    assert result.shape[0] == depth, "Use the parameter depth"
    assert np.allclose(result, [0., 1. ,0., 0.] ), "Wrong output. Use tf.one_hot"
    label_2 = [2]
    result = target(label_2, depth)
    print("Test 2:", result)
    assert result.shape[0] == depth, "Use the parameter depth"
    assert np.allclose(result, [0., 0. ,1., 0.] ), "Wrong output. Use tf.reshape as instructed"
    
    print("\033[92mAll test passed")

one_hot_matrix_test(one_hot_matrix)

In [None]:
new_y_test = y_test.map(one_hot_matrix)
new_y_train = y_train.map(one_hot_matrix)

In [None]:
print(next(iter(new_y_test)))

### Initialize Parameters
Now you'll initialize a vector of numbers with the Glorot initializer. The function you'll be calling is `tf.keras.initializers.GlorotNormal`, which draws samples from a truncated normal distribution centered on 0, with `stddev = sqrt(2 / (fan_in + fan_out))`, where `fan_in` is the number of input units and `fan_out` is the number of output units, both in the weight tensor. 

To initialize with zeros or ones you could use `tf.zeros()` or `tf.ones()` instead. 

 - `tf.keras.initializers.GlorotNormal(seed=1)`
 - `tf.Variable(initializer(shape=())`

In [None]:
def initialize_parameters():

    initializer = tf.keras.initializers.GlorotNormal(seed = 1)

    parameters = {}

    parameters["W1"] = tf.Variable(initializer(shape=(25, 12288)))
    parameters["b1"] = tf.Variable(initializer(shape=(25, 1)))
    parameters["W2"] = tf.Variable(initializer(shape=(12, 25)))
    parameters["b2"] = tf.Variable(initializer(shape=(12, 1)))
    parameters["W3"] = tf.Variable(initializer(shape=(6, 12)))
    parameters["b3"] = tf.Variable(initializer(shape=(6, 1)))

    return parameters

In [None]:
def initialize_parameters_test(target):
    parameters = target()

    values = {"W1": (25, 12288),
              "b1": (25, 1),
              "W2": (12, 25),
              "b2": (12, 1),
              "W3": (6, 12),
              "b3": (6, 1)}

    for key in parameters:
        print(f"{key} shape: {tuple(parameters[key].shape)}")
        assert type(parameters[key]) == ResourceVariable, "All parameter must be created using tf.Variable"
        assert tuple(parameters[key].shape) == values[key], f"{key}: wrong shape"
        assert np.abs(np.mean(parameters[key].numpy())) < 0.5,  f"{key}: Use the GlorotNormal initializer"
        assert np.std(parameters[key].numpy()) > 0 and np.std(parameters[key].numpy()) < 1, f"{key}: Use the GlorotNormal initializer"

    print("\033[92mAll test passed")
    
initialize_parameters_test(initialize_parameters)

## Building Neural Network in TensorFlow
- Implement forward propagation
- Retrieve the gradients and train the model

In [None]:
parameters = initialize_parameters()

### Forward Propagation

In [None]:
def forward_propagation(X, parameters):
    """
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z3 -- the output of the last LINEAR unit
    """

    Z1 = tf.add(tf.matmul(parameters["W1"], X), parameters["b1"])
    A1 = tf.keras.activations.relu(Z1)
    Z2 = tf.add(tf.matmul(parameters["W2"], A1), parameters["b2"])
    A2 = tf.keras.activations.relu(Z2)
    Z3 = tf.add(tf.matmul(parameters["W3"], A2), parameters["b3"])

    return Z3

In [None]:
def forward_propagation_test(target, examples):
    minibatches = examples.batch(2)
    for minibatch in minibatches:
        forward_pass = target(tf.transpose(minibatch), parameters)
        print(forward_pass)
        assert type(forward_pass) == EagerTensor, "Your output is not a tensor"
        assert forward_pass.shape == (6, 2), "Last layer must use W3 and b3"
        assert np.allclose(forward_pass, 
                            [[-0.13430887,  0.14086473],
                             [ 0.21588647, -0.02582335],
                             [ 0.7059658,   0.6484556 ],
                             [-1.1260961,  -0.9329492 ],
                             [-0.20181894, -0.3382722 ],
                             [ 0.9558965,   0.94167566]]), "Output does not match"
        break
    

    print("\033[92mAll test passed")

forward_propagation_test(forward_propagation, new_train)

### Compute Cost
```
tf.keras.metrics.categorical_crossentropy(
    y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1
)
```
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.

In [None]:
def compute_cost(logits, labels):
    """
    Arguments:
    logits -- shape (6, num_examples)
    labels -- "true" labels vector, same shape as Z3
    
    Returns:
    cost - Tensor of the cost function
    """

    labels = tf.transpose(labels) # shape -> (num_examples, 6)
    logits = tf.transpose(logits) # shape -> (num_examples, 6)

    cost = tf.keras.losses.categorical_crossentropy(labels, logits, from_logits=True)
    # from_logits = False; 
    # By Defaults assumes y_pred encodes a probability distribution
    cost = tf.reduce_mean(cost)

    return cost

In [None]:
def compute_cost_test(target, Y):
    pred = tf.constant([[ 2.4048107,   5.0334096 ],
             [-0.7921977,  -4.1523376 ],
             [ 0.9447198,  -0.46802214],
             [ 1.158121,    3.9810789 ],
             [ 4.768706,    2.3220146 ],
             [ 6.1481323,   3.909829  ]])
    minibatches = Y.batch(2)
    for minibatch in minibatches:
        result = target(pred, tf.transpose(minibatch))
        break
        
    print(result)
    assert(type(result) == EagerTensor), "Use the TensorFlow API"
    assert (np.abs(result - (0.25361037 + 0.5566767) / 2.0) < 1e-7), "Test does not match. Did you get the mean of your cost functions?"

    print("\033[92mAll test passed")

compute_cost_test(compute_cost, new_y_train )

### Train the Model
- `tf.Data.dataset = dataset.prefetch(8)` 

What this does is prevent a memory bottleneck that can occur when reading from disk. `prefetch()` sets aside some data and keeps it ready for when it's needed. It does this by creating a source dataset from your input data, applying a transformation to preprocess the data, then iterating over the dataset the specified number of elements at a time. This works because the iteration is streaming, so the data doesn't need to fit into the memory. 