In [1]:
import tensorflow as tf

2023-03-30 06:21:05.283087: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-30 06:21:12.545353: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-03-30 06:21:12.545830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


In [2]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10)
])

2023-03-29 13:49:08.762190: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
X = tf.random.uniform((2, 20))
net(X).shape

TensorShape([2, 10])

# Layers and Modules

## A Custom Module
The basic functionality that each module must provide:

1. Ingest input data as arguments to its forward propagation method.

2. Generate an output by having the forward propagation method return a value. Note that the output may have a different shape from the input. For example, the first fully connected layer in our model above ingests an input of arbitrary dimension but returns an output of dimension 256.

3. Calculate the gradient of its output with respect to its input, which can be accessed via its backpropagation method. Typically this happens automatically.

4. Store and provide access to those parameters necessary to execute the forward propagation computation.

5. Initialize model parameters as needed.

In [6]:
class MLP(tf.keras.Model):
    def __init__(self):
        # Call the constructor of the parent class tf.keras.Model to perform
        # the necessary initialization
        super().__init__()
        self.hidden = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(10)

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input X
    def call(self, X):
        return self.out(self.hidden(X))

In [7]:
net = MLP()
net(X).shape

TensorShape([2, 10])

## The Sequential Module

In [8]:
class MySequential(tf.keras.Model):
    def __init__(self, *args):
        super().__init__()
        self.modules = args

    def call(self, X):
        for module in self.modules:
            X = module(X)
        return X

In [9]:
net = MySequential(tf.keras.layers.Dense(256, activation=tf.nn.relu),
                   tf.keras.layers.Dense(10))
net(X).shape

TensorShape([2, 10])

## Executing Code in the Forward Propagation Method

we want a layer that calculates the function $f(\mathbf{x}, \mathbf{w})=c \cdot \mathbf{w}^T \mathbf{x}$
, where $\mathbf{x}$ is the input, $\mathbf{w}$ is our parameter, and $c$ is some specified constant that is not updated during optimization. 

In [10]:
class FixedHiddenMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        # Random weight parameters created with tf.constant are not updated
        # during training (i.e., constant parameters)
        self.rand_weight = tf.constant(tf.random.uniform((20, 20)))
        self.dense = tf.keras.layers.Dense(20, activation=tf.nn.relu)

    def call(self, X):
        X = self.flatten(X)
        # Use the created constant parameters, as well as the relu and
        # matmul functions
        X = tf.nn.relu(tf.matmul(X, self.rand_weight) + 1)
        X = self.dense(X)

        # Control flow
        while tf.reduce_sum(tf.math.abs(X)) > 1:
            X /= 2
        return tf.reduce_sum(X)

In [11]:
net = FixedHiddenMLP()
net(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.6434067>

mix and match various ways of assembling modules together

In [12]:
class NestMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.net = tf.keras.Sequential()
        self.net.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
        self.net.add(tf.keras.layers.Dense(32, activation=tf.nn.relu))
        self.dense = tf.keras.layers.Dense(16, activation=tf.nn.relu)

    def call(self, X):
        return self.dense(self.net(X))

In [13]:
chimera = tf.keras.Sequential()
chimera.add(NestMLP())
chimera.add(tf.keras.layers.Dense(20))
chimera.add(FixedHiddenMLP())
chimera(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.574469>

# Parameter Management

- Accessing parameters for debugging, diagnostics, and visualizations.

- Sharing parameters across different model components.

In [14]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1)
])

X = tf.random.uniform((2, 4))
net(X).shape

TensorShape([2, 1])

## Parameter Access
When a model is defined via the `Sequential` class, we can first access any layer by indexing into the model as though it were a list. Each layer’s parameters are conveniently located in its attribute.

In [23]:
net.layers[2].weights

[<tf.Variable 'dense_13/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[-0.9095521 ],
        [-1.0702254 ],
        [ 0.16246808],
        [-0.49569738]], dtype=float32)>,
 <tf.Variable 'dense_13/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

### Targeted Parameters - access the underlying numerical values

In [25]:
type(net.layers[2].weights[1])

tensorflow.python.ops.resource_variable_ops.ResourceVariable

In [26]:
tf.convert_to_tensor(net.layers[2].weights[1])

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>

### All Parameters at Once

In [27]:
net.get_weights()

[array([[ 0.27454692, -0.71919805, -0.1618939 , -0.06235301],
        [ 0.7343103 ,  0.38332385,  0.802774  ,  0.82870907],
        [-0.44232085,  0.19790345,  0.19437641,  0.23844534],
        [ 0.23141378,  0.37618   ,  0.7395182 ,  0.10106611]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[-0.9095521 ],
        [-1.0702254 ],
        [ 0.16246808],
        [-0.49569738]], dtype=float32),
 array([0.], dtype=float32)]

## Tied Parameters
share parameters across multiple layers.

This example shows that the parameters of the second and third layer are tied. They are not just equal, they are represented by the same exact tensor. Thus, if we change one of the parameters, the other one changes, too.

In [29]:
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1)
])
net(X)
print(len(net.layers) == 3)

True


when parameters are tied what happens to the gradients? Since the model parameters contain gradients, the gradients of the second hidden layer and the third hidden layer are added together during backpropagation.

# Parameter Initialization
By default, Keras initializes weight matrices uniformly by drawing from a range that is computed according to the input and output dimension, and the bias parameters are all set to zero.

In [30]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1)
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.29267216],
       [0.3371871 ]], dtype=float32)>

## Built-in Initialization

In [34]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu,
                          kernel_initializer=tf.random_normal_initializer(
                              mean=0, stddev=0.01),
                          bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)
])

net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.00501987],
       [ 0.00057448]], dtype=float32)>

In [42]:
net.layers[1].weights[0], net.layers[1].weights[1]

(<tf.Variable 'dense_26/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[-0.01251589, -0.00526345,  0.02432222, -0.00017998],
        [ 0.01613509,  0.00407495, -0.01105246,  0.00467974],
        [ 0.00284544,  0.01083667,  0.00735553,  0.0048871 ],
        [ 0.01196324, -0.01146888, -0.00694788,  0.00693432]],
       dtype=float32)>,
 <tf.Variable 'dense_26/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

We can also initialize all the parameters to a given constant value (say, 1).



In [44]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu,
                          kernel_initializer=tf.keras.initializers.Constant(1),
                          bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)
])

net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.20678014],
       [-0.13935864]], dtype=float32)>

In [45]:
net.layers[1].weights[0], net.layers[1].weights[1]

(<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_28/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [48]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu,
                          kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(1,
                          kernel_initializer=tf.keras.initializers.Constant(42))
])

net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 1.6291482],
       [17.009798 ]], dtype=float32)>

In [50]:
net.layers[1].weights[0], net.layers[2].weights[0]

(<tf.Variable 'dense_32/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[-0.73607093, -0.03272218, -0.6821911 ,  0.384692  ],
        [-0.1882773 , -0.15246272,  0.6166542 , -0.5230375 ],
        [-0.1902287 , -0.41124925, -0.3214807 ,  0.5151908 ],
        [ 0.58740205,  0.18837911, -0.6111795 , -0.0971604 ]],
       dtype=float32)>,
 <tf.Variable 'dense_33/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[42.],
        [42.],
        [42.],
        [42.]], dtype=float32)>)

## Custom Initialization

define an initializer for any weight parameter using the following strange distribution:
![image.png](attachment:image.png)

In [59]:
# define a subclass of Initializer and
# implement the __call__ function that
# return a desired tensor given the shape and data type.
class MyInit(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
        data = tf.random.uniform(
            shape=shape, minval=-10, maxval=10, dtype=dtype)
        factor = (tf.abs(data) >= 5)
        factor = tf.cast(factor, tf.float32)
        return data * factor


net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu,
                          kernel_initializer=MyInit()),
    tf.keras.layers.Dense(1)
])

net(X)
net.layers[1].weights[0]

<tf.Variable 'dense_38/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.       ,  0.       ,  0.       , -8.417983 ],
       [-0.       , -7.755871 ,  7.3948402, -6.292293 ],
       [ 0.       , -0.       , -0.       ,  0.       ],
       [-0.       , -0.       ,  8.931616 ,  8.792221 ]], dtype=float32)>

setting parameters directly:

In [60]:
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

<tf.Variable 'dense_38/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.      ,  1.      ,  1.      , -7.417983],
       [ 1.      , -6.755871,  8.39484 , -5.292293],
       [ 1.      ,  1.      ,  1.      ,  1.      ],
       [ 1.      ,  1.      ,  9.931616,  9.792221]], dtype=float32)>

# Lazy Initialization

 The trick here is that the framework defers initialization, waiting until the first time we pass data through the model, to infer the sizes of each layer on the fly.

In [61]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10)
])

At this point, the network cannot possibly know the dimensions of the input layer’s weights because the input dimension remains unknown.

In [62]:
[net.layers[i].get_weights() for i in range(len(net.layers))]

[[], []]

In [64]:
X = tf.random.uniform((2, 20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]

# Custom Layers
##  Layers without Parameters
Construct a custom layer that does not have any parameters of its own

In [65]:
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def call(self, X):
        return X - tf.reduce_mean(X)

In [66]:
layer = CenteredLayer()
layer(tf.random.uniform((2, 4)))

<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
array([[-0.15093398,  0.23805869,  0.0578934 ,  0.42996526],
       [ 0.37997603, -0.0455035 , -0.51968384, -0.38977182]],
      dtype=float32)>

Incorporate our layer as a component in constructing more complex models

In [69]:
net = tf.keras.Sequential([tf.keras.layers.Dense(128),
                           CenteredLayer()])

In [72]:
y = net(tf.random.uniform((4, 8)))
tf.reduce_mean(y)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

## Layers with Parameters

In [74]:
?tf.keras.Model.build

In [95]:
class MyDense(tf.keras.Model):
    def __init__(self, units):
        super().__init__()
        self.units = units
        
    def build(self, X_shape):
        # Builds the model based on input shapes received.
        self.w = self.add_weight(name='weight',
                                shape=[X_shape[-1], self.units],
                                initializer=MyInit())
        self.b = self.add_weight(name='bias',
                                shape=[self.units],
                                initializer=tf.zeros_initializer())
    def call(self, X):
        linear = tf.matmul(X, self.w) + self.b
        return tf.nn.relu(linear)

In [96]:
dense = MyDense(3)
dense(tf.random.uniform((2,5)))
dense.get_weights()

[array([[ 8.789167 ,  9.616621 ,  9.173519 ],
        [-0.       ,  6.547964 , -6.5558147],
        [-7.203214 , -8.105307 , -0.       ],
        [-8.957229 , -5.525341 ,  0.       ],
        [ 6.7042274, -0.       ,  0.       ]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

construct models using custom layers. 

In [97]:
net = tf.keras.models.Sequential([MyDense(8), MyDense(1)])
net(tf.random.uniform((2,64)))

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [0.]], dtype=float32)>

# File I/O

In [98]:
x = tf.range(4)
np.save('x-file.npy',x)

In [99]:
x2 = np.load('x-file.npy', allow_pickle=True)
x2

array([0, 1, 2, 3], dtype=int32)

In [100]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(10)
        
    def call(self, X):
        return self.out(self.dense(self.flatten(X)))

In [101]:
net = MLP()
X = tf.random.uniform((2,20))
Y = net(X)

In [103]:
# store the parameters of the model as a file with the name “mlp.params”.
net.save_weights('./model/mlp.params')

In [105]:
# To recover the model, we instantiate a clone of the original MLP model.
clone = MLP()
clone.load_weights('./model/mlp.params')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x15f3e93d0>

In [108]:
Y_clone = clone(X)

Y_clone == Y

<tf.Tensor: shape=(2, 10), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])>

# GPUs

By default, tensors are created in the main memory and then use the CPU to calculate it.

In [6]:
def cpu():
    return tf.device('/CPU:0')

def gpu(i):
    return tf.device(f'/GPU:{i}')

In [8]:
cpu(), gpu(0), gpu(1)

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f048a29dec0>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f048a296cc0>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f048a29f080>)

We can query the number of available GPUs.

In [13]:
def num_gpus():
    return len(tf.config.experimental.list_physical_devices('GPU'))
num_gpus()

2

In [18]:
def try_gpu(i=0):
    if num_gpus() >= i + 1:
        return gpu(i)
    return cpu()

def try_all_gpu():
    return [gpu(i) for i in range(num_gpus())]

try_all_gpu()

[<tensorflow.python.eager.context._EagerDeviceContext at 0x7f03e02feb40>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f03e0315980>]

## Tensors and GPUs

In [19]:
x = tf.constant([1,2,3])
x.device

'/job:localhost/replica:0/task:0/device:GPU:0'

In [20]:
# specify a storage device when creating a tensor. 
# Then create the tensor variable X on the first gpu.with try_gpu():
    X = tf.ones((2,3))
X

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>