# Initialization

In [2]:
num_GPU = 0

import tensorflow as tf
print('Tensorflow version: ', tf.__version__)

gpus = tf.config.experimental.list_physical_devices("GPU")
print('Number of GPUs available :', len(gpus))

tf.config.experimental.set_visible_devices(gpus[num_GPU], 'GPU')
tf.config.experimental.set_memory_growth(gpus[num_GPU], True)
print('Only GPU number', num_GPU, 'used')

Tensorflow version:  2.0.0
Number of GPUs available : 4
Only GPU number 0 used


In [3]:
import os
import copy
import sys
import time
from datetime import datetime

import numpy as np
import pickle
import scipy
import scipy.fftpack as fp
import matplotlib
import matplotlib.pyplot as plt  
from matplotlib.mlab import psd
from tqdm import tqdm_notebook

In [33]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 80%; }
    div#menubar-container     { width: 100%; }
    div#maintoolbar-container { width: 100%; }
</style>
"""))

# Tensorflow for beginners

In [5]:
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [7]:
# Load dataset, contains 4 Numpy arrays
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Convert Numpy arrays to Tensors
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32) # [60000, 28, 28]
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32) # [60000]

x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) # [10000, 28, 28]
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32) # [10000]

# Scale the dataset and add a channel dimension
x_train = x_train/255.0 
x_train = tf.expand_dims(x_train, axis=-1) # [60000, 28, 28, 1]

x_test = x_test/255.0 
x_test = tf.expand_dims(x_test, axis=-1) # [10000, 28, 28, 1]

In [8]:
my_model = tf.keras.models.Sequential([
    
    Conv2D(filters=6, kernel_size=8, activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(filters=15, kernel_size=4, activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Flatten(),
    
    Dense(128, activation='relu'),
    Dense(10, activation='softmax') # Outputs a probability distribution
])

In [9]:
my_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', # Only one correct class
              metrics=['accuracy']) # Percentage of good predictions

my_model.fit(x_train, y_train, epochs=3, batch_size=1024)

my_model.evaluate(x_test,  y_test, verbose=2)

Train on 60000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
10000/1 - 1s - loss: 0.1376 - accuracy: 0.9435


[0.1858606786608696, 0.9435]

# Tensorflow for experts

In [10]:
BATCH_SIZE = 1024

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(BATCH_SIZE)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

In [11]:
from tensorflow.keras.layers import Layer

class Linear(Layer):
    """y = Wx + b"""

    def __init__(self, units=32): # Called when creating the layer
        super(Linear, self).__init__()
        self.units = units # units = number of neurons = output shape

    def build(self, input_shape): # Called the first time the layer is used
        self.W = self.add_weight(shape=(input_shape[-1], self.units), 
                               initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                               initializer='random_normal', trainable=True)

    def call(self, inputs): # What the layer actually does
        return tf.matmul(inputs, self.W) + self.b

In [14]:
class DoubleDense(Layer):
    """" Linear-relu + Linear-Softmax """
    
    def __init__(self, nb_classes): # Called when creating the layer
        super(DoubleDense, self).__init__()
        self.nb_classes = nb_classes
        
    def build(self, input_shape):  # Called the first time the layer is used
        self.linear_1 = Linear(units=128)
        self.linear_2 = Linear(units=self.nb_classes)

    def call(self, inputs): # What the layer actually does
        x = tf.nn.relu(self.linear_1(inputs))
        x = tf.nn.softmax(self.linear_2(x)) # Outputs a probability distribution
        return x

In [15]:
class ConvPool2D(Layer):
    """ Conv2D-relu + MaxPooling2D """
    
    def __init__(self, nb_kernels, kernel_size): # Called at layer creation
        super(ConvPool2D, self).__init__()
        self.nb_kernels = nb_kernels
        self.kernel_size = kernel_size
        
    def build(self, input_shape):  # Called the first time the layer is used
        self.conv_2D = Conv2D(filters=self.nb_kernels,
                              kernel_size=self.kernel_size, 
                              activation='relu')
        self.pool_2D = MaxPooling2D(pool_size=(2, 2))

    def call(self, inputs): # What the layer actually does
        x = self.conv_2D(inputs)
        x = self.pool_2D(x)
        return x

In [18]:
from tensorflow.keras import Model

class MyModel(Model):
    def __init__(self, nb_classes):  # Called when creating the model
        super(MyModel, self).__init__()
        self.nb_classes = nb_classes
    
    def build(self, input_shape):  # Called the first time the layer is used    
        self.conv_pool_1 = ConvPool2D(nb_kernels=6, kernel_size=8)
        self.conv_pool_2 = ConvPool2D(nb_kernels=15, kernel_size=4)
        self.flatten = Flatten()
        self.double_dense = DoubleDense(nb_classes=self.nb_classes)
    
    def call(self, inputs): # What the model actually does
        self.x_0 = self.conv_pool_1(inputs)
        self.x_1 = self.conv_pool_2(self.x_0)
        self.x_2 = self.flatten(self.x_1)
        self.predictions = self.double_dense(self.x_2)
        return self.predictions

In [19]:
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

my_model = MyModel(nb_classes=10)
my_model.compile(optimizer, loss_function)

In [23]:
my_model.fit(train_ds, epochs=3);

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:
my_model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_pool2d (ConvPool2D)     multiple                  390       
_________________________________________________________________
conv_pool2d_1 (ConvPool2D)   multiple                  1455      
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
double_dense (DoubleDense)   multiple                  18698     
Total params: 20,543
Trainable params: 20,543
Non-trainable params: 0
_________________________________________________________________


# Building a custom training loop

In [25]:
# Define the loss function
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()

# Define the optimizer
optimizer = tf.keras.optimizers.Adam()

# Define the metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [26]:
x = tf.Variable(3.0)
with tf.GradientTape() as tape:
    y = tf.square(x)
dy_dx = tape.gradient(y, x)
print(dy_dx)

tf.Tensor(6.0, shape=(), dtype=float32)


In [27]:
# One SGD step with a given batch
def train_step(images, labels):
    
    # Open a GradientTape
    with tf.GradientTape() as tape:
        
        #Forward pass
        predictions = my_model(images)
        # Loss for this batch
        loss = loss_function(labels, predictions)
    
    # Get gradients of loss w.r.t. the weights
    gradients = tape.gradient(loss, my_model.trainable_variables)
    # Update the weights according to our optimizer
    optimizer.apply_gradients(zip(gradients, my_model.trainable_variables))
    
    # Save loss and accuracy
    train_loss(loss)
    train_accuracy(labels, predictions)

In [28]:
# Test the model on a given batch
def test_step(images, labels):
    
    # Forward pass 
    predictions = my_model(images)
    # Loss for this batch
    t_loss = loss_function(labels, predictions)
    
    # Save loss and accuracy
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [29]:
my_model = MyModel(nb_classes=10)
start = time.time()

# Iterate over 3 epochs
for epoch in range(3):
    # Train over every batch in the training dataset
    for images, labels in train_ds:
        train_step(images, labels)
        
    # Test over every batch in the testing dataset
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)
    
    # Print result
    template = 'Epoch {:.0f},   Loss: {:.3f}, Accuracy: {:.3f}    '+ \
               'Test Loss: {:.3f}, Test Accuracy: {:.3f}'
    print(template.format(epoch+1, train_loss.result(), train_accuracy.result()*100,
                          test_loss.result(), test_accuracy.result()*100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

# Display elapsed time
end = time.time()
print('TIME = ', end - start)

Epoch 1,   Loss: 1.376, Accuracy: 67.325    Test Loss: 0.508, Test Accuracy: 84.890
Epoch 2,   Loss: 0.419, Accuracy: 87.733    Test Loss: 0.325, Test Accuracy: 90.670
Epoch 3,   Loss: 0.306, Accuracy: 91.063    Test Loss: 0.251, Test Accuracy: 92.750
TIME =  5.570061445236206


In [30]:
# Without @tf.function
my_model.x_2

<tf.Tensor: id=37538, shape=(784, 135), dtype=float32, numpy=
array([[1.6669028 , 3.6323135 , 0.        , ..., 2.73629   , 3.6377664 ,
        1.6620849 ],
       [0.47391742, 3.3834014 , 1.136492  , ..., 3.8753707 , 4.171783  ,
        3.6308408 ],
       [1.7861953 , 2.916195  , 0.        , ..., 3.0124679 , 3.9319437 ,
        2.3233078 ],
       ...,
       [1.1755902 , 3.4892776 , 0.        , ..., 4.403638  , 4.303809  ,
        2.549237  ],
       [1.625002  , 1.8590158 , 0.        , ..., 2.4259222 , 3.6148438 ,
        3.53547   ],
       [1.6129595 , 4.1189694 , 0.21221375, ..., 5.450149  , 4.1386952 ,
        5.3056626 ]], dtype=float32)>

 ### Build a **graph** to speed up training
 
 Adding the @tf.function decorator before the train_step and test_step function

In [48]:
# One SGD step with a given batch
@tf.function
def train_step(images, labels):
    
    # Open a GradientTape
    with tf.GradientTape() as tape:
        
        #Forward pass
        predictions = my_model(images)
        # Loss for this batch
        loss = loss_function(labels, predictions)
    
    # Get gradients of loss w.r.t. the weights
    gradients = tape.gradient(loss, my_model.trainable_variables)
    # Update the weights according to our optimizer
    optimizer.apply_gradients(zip(gradients, my_model.trainable_variables))
    
    # Save loss and accuracy
    train_loss(loss)
    train_accuracy(labels, predictions)

In [49]:
# Test the model on a given batch
@tf.function
def test_step(images, labels):
    
    # Forward pass 
    predictions = my_model(images)
    # Loss for this batch
    t_loss = loss_function(labels, predictions)
    
    # Save loss and accuracy
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [50]:
my_model = MyModel(nb_classes=10)
start = time.time()

# Iterate over 3 epochs
for epoch in range(3):
    # Train over every batch in the training dataset
    for images, labels in train_ds:
        train_step(images, labels)
        
    # Test over every batch in the testing dataset
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)
    
    # Print result
    template = 'Epoch {:.0f},   Loss: {:.3f}, Accuracy: {:.3f}    '+ \
               'Test Loss: {:.3f}, Test Accuracy: {:.3f}'
    print(template.format(epoch+1, train_loss.result(), train_accuracy.result()*100,
                          test_loss.result(), test_accuracy.result()*100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

# Display elapsed time
end = time.time()
print('TIME = ', end - start)

Epoch 1,   Loss: 0.791, Accuracy: 76.390    Test Loss: 0.297, Test Accuracy: 91.380
Epoch 2,   Loss: 0.265, Accuracy: 91.920    Test Loss: 0.195, Test Accuracy: 93.950
Epoch 3,   Loss: 0.186, Accuracy: 94.363    Test Loss: 0.146, Test Accuracy: 95.510
TIME =  3.81099271774292


The training should be much quicker!

In [27]:
# With @tf.function
my_model.x_2

<tf.Tensor 'my_model_2/flatten/Reshape:0' shape=(784, 135) dtype=float32>

But we loose access to the value of the model’s attributes 

### Adding regularization

Using Keras layers' parameters

In [51]:
class ConvPool2D(Layer):
    """ Conv2D-relu + MaxPooling2D """

    def __init__(self, nb_kernels, kernel_size): # Called at layer creation
        super(ConvPool2D, self).__init__()
        self.nb_kernels = nb_kernels
        self.kernel_size = kernel_size
        
    def build(self, input_shape):  # Called the first time the layer is used
        self.conv_2D = Conv2D(filters=self.nb_kernels,
                              kernel_size=self.kernel_size, 
                              activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(l=1.))
        self.pool_2D = MaxPooling2D(pool_size=(2, 2))

    def call(self, inputs): # What the layer actually does
        x = self.conv_2D(inputs)
        x = self.pool_2D(x)
        return x

Using the layers' *loss* property

In [52]:
from tensorflow.keras.layers import Layer

class Linear(Layer):
    """y = Wx + b"""

    def __init__(self, units=32): # Called when creating the layer
        super(Linear, self).__init__()
        self.units = units # units = number of neurons = output shape

    def build(self, input_shape): # Called the first time the layer is used
        self.W = self.add_weight(shape=(input_shape[-1], self.units), 
                               initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                               initializer='random_normal', trainable=True)

    def call(self, inputs): # What the layer actually does
        self.l1_reg = tf.reduce_sum(tf.abs(self.W)) + tf.reduce_sum(tf.abs(self.b))
        self.add_loss(self.l1_reg)
        return tf.matmul(inputs, self.W) + self.b

And adding those losses to the training loop

In [53]:
# One SGD step with a given batch
@tf.function
def train_step(images, labels):
    
    # Open a GradientTape
    with tf.GradientTape() as tape:
        
        #Forward pass
        predictions = my_model(images)
        # Loss for this batch
        loss = loss_function(labels, predictions)
        # Add extra losses created during this forward pass:
        loss += 1e-3 * sum(my_model.losses)
    
    # Get gradients of loss w.r.t. the weights
    gradients = tape.gradient(loss, my_model.trainable_variables)
    # Update the weights according to our optimizer
    optimizer.apply_gradients(zip(gradients, my_model.trainable_variables))
    
    # Save loss and accuracy
    train_loss(loss)
    train_accuracy(labels, predictions)

In [54]:
# Test the model on a given batch
@tf.function
def test_step(images, labels):
    
    # Forward pass 
    predictions = my_model(images)
    # Loss for this batch
    t_loss = loss_function(labels, predictions)
    # Add extra losses created during this forward pass:
    t_loss += 1e-3 * sum(my_model.losses)
    
    # Save loss and accuracy
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [33]:
my_model = MyModel(nb_classes=10)
start = time.time()

# Iterate over 3 epochs
for epoch in range(3):
    # Train over every batch in the training dataset
    for images, labels in train_ds:
        train_step(images, labels)
        
    # Test over every batch in the testing dataset
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)
    
    # Print result
    template = 'Epoch {:.0f},   Loss: {:.3f}, Accuracy: {:.3f}    '+ \
               'Test Loss: {:.3f}, Test Accuracy: {:.3f}'
    print(template.format(epoch+1, train_loss.result(), train_accuracy.result()*100,
                          test_loss.result(), test_accuracy.result()*100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

# Display elapsed time
end = time.time()
print('TIME = ', end - start)

Epoch 1,   Loss: 1.436, Accuracy: 69.803    Test Loss: 0.726, Test Accuracy: 88.010
Epoch 2,   Loss: 0.651, Accuracy: 89.240    Test Loss: 0.551, Test Accuracy: 91.790
Epoch 3,   Loss: 0.540, Accuracy: 91.537    Test Loss: 0.477, Test Accuracy: 93.130
TIME =  4.245409965515137


### Building a custom loss function

In [34]:
from tensorflow.keras.losses import Loss

class CustomLoss(Loss):
    """ Custom Sparse Cross Entropy loss with L1 regularization """
    
    def __init__(self, tuning_param): # Called when creating the layer
        super(CustomLoss, self).__init__()
        self.tuning_param = tuning_param
        self.SCE = tf.keras.losses.SparseCategoricalCrossentropy()
        
    def call(self, y_true, y_pred): # What the loss function actually does
        return self.SCE(y_true, y_pred) + self.tuning_param * sum(my_model.losses)

In [35]:
cust_loss_function = CustomLoss(tuning_param=1e-3)

In [36]:
# One SGD step with a given batch
@tf.function
def train_step(images, labels):
    
    # Open a GradientTape
    with tf.GradientTape() as tape:
        
        #Forward pass
        predictions = my_model(images)
        # Loss for this batch
        loss = cust_loss_function(labels, predictions)
        
    # Get gradients of loss w.r.t. the weights
    gradients = tape.gradient(loss, my_model.trainable_variables)
    # Update the weights according to our optimizer
    optimizer.apply_gradients(zip(gradients, my_model.trainable_variables))
    
    # Save loss and accuracy
    train_loss(loss)
    train_accuracy(labels, predictions)

In [37]:
# Test the model on a given batch
@tf.function
def test_step(images, labels):
    
    # Forward pass 
    predictions = my_model(images)
    # Loss for this batch
    t_loss = cust_loss_function(labels, predictions)
    # Add extra losses created during this forward pass:
    t_loss += 1e-3 * sum(my_model.losses)
    
    # Save loss and accuracy
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [38]:
my_model = MyModel(nb_classes=10)
start = time.time()

# Iterate over 3 epochs
for epoch in range(3):
    # Train over every batch in the training dataset
    for images, labels in train_ds:
        train_step(images, labels)
        
    # Test over every batch in the testing dataset
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)
    
    # Print result
    template = 'Epoch {:.0f},   Loss: {:.3f}, Accuracy: {:.3f}    '+ \
               'Test Loss: {:.3f}, Test Accuracy: {:.3f}'
    print(template.format(epoch+1, train_loss.result(), train_accuracy.result()*100,
                          test_loss.result(), test_accuracy.result()*100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

# Display elapsed time
end = time.time()
print('TIME = ', end - start)

Epoch 1,   Loss: 1.351, Accuracy: 69.843    Test Loss: 0.925, Test Accuracy: 90.310
Epoch 2,   Loss: 0.579, Accuracy: 90.955    Test Loss: 0.730, Test Accuracy: 93.110
Epoch 3,   Loss: 0.477, Accuracy: 92.930    Test Loss: 0.652, Test Accuracy: 94.040
TIME =  4.375347375869751
