# Chapter 6 - Builder's Guide
## Parameter Management
We start by focusing on an MLP with one hidden layer.

In [1]:
import tensorflow as tf

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
])

X = tf.random.uniform((2, 4))
net(X).shape

2022-07-17 14:16:17.011708: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorShape([2, 1])

### Parameter Access
When a model defined via the Sequential class, we can first access any layer by indexing into the model as though it were a list. We can inspect the parameters of the second fully connected layer as follows:

In [2]:
net.layers[2].weights

[<tf.Variable 'dense_1/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[ 0.562533  ],
        [-0.12379932],
        [-0.48799145],
        [-0.16038698]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

Each parameter is represented as an instance of the parameter class. To do anything useful with the parametrs, we first need to access the underlying numerical values. The following code extracts the bias from the second neural network layer, which returns a parameter class instance:

In [3]:
type(net.layers[2].weights[1]), tf.convert_to_tensor(net.layers[2].weights[1])

(tensorflow.python.ops.resource_variable_ops.ResourceVariable,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>)

To get all parameters at once:

In [4]:
net.get_weights()

[array([[ 0.41053194, -0.7718975 ,  0.35929936, -0.12054759],
        [-0.29666138, -0.33360726, -0.03594857, -0.33871835],
        [ 0.45185965,  0.7802716 ,  0.12379384,  0.61790067],
        [ 0.5794429 , -0.15028208, -0.37281528,  0.46897858]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[ 0.562533  ],
        [-0.12379932],
        [-0.48799145],
        [-0.16038698]], dtype=float32),
 array([0.], dtype=float32)]

Often, we want to share parameters across multiple layers. Let’s see how to do this elegantly. In the following we allocate a fully connected layer and then use its parameters specifically to set those of another layer. Here we need to run the forward propagation net(X) before accessing the parameters.

In [5]:
# tf.keras behaves a bit differently. It removes the duplicate layer
# automatically
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])
net(X)
# Check whether the parameters are different
print(len(net.layers) == 3)

True


The parameters of the second and third layers are tied. They are not just equal, they are represented by the same exact tensor.

## Parameter Initialization


In [6]:
import tensorflow as tf

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
])

X = tf.random.uniform((2, 4))
net(X).shape

TensorShape([2, 1])

### Built-in Initialization
The code below initializes all weights parameters as Gaussian random variable with standard deviation 0.01, while bias parameters cleared to zero.

In [7]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_6/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 0.00723212, -0.01530714,  0.01126629,  0.00255094],
        [ 0.01806535, -0.00322238, -0.01142395,  0.01356027],
        [-0.00162665,  0.02979071,  0.01495478, -0.02412402],
        [ 0.01075977, -0.00342412, -0.00296703,  0.00470037]],
       dtype=float32)>,
 <tf.Variable 'dense_6/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [8]:
#we also can initialize all the parameters to a given constrant
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_8/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_8/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [9]:
#initializing first layer with Xavier, second to a constant
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(42)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])

<tf.Variable 'dense_10/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.59155446,  0.7373546 ,  0.8280507 ,  0.62458056],
       [-0.8335741 ,  0.23231083, -0.8103054 , -0.46738032],
       [-0.29961276,  0.5847798 , -0.5695137 , -0.80882126],
       [-0.75137496,  0.18924707,  0.11201453, -0.5980407 ]],
      dtype=float32)>
<tf.Variable 'dense_11/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[42.],
       [42.],
       [42.],
       [42.]], dtype=float32)>


### Custom Initialization
Initialize parameters with regards to this distribution
$$
\begin{aligned}
    w \sim \begin{cases}
        U(5, 10) & \text{ with probability } \frac{1}{4} \\
            0    & \text{ with probability } \frac{1}{2} \\
        U(-10, -5) & \text{ with probability } \frac{1}{4}
    \end{cases}
\end{aligned}
$$

In [10]:
class MyInit(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
        data=tf.random.uniform(shape, -10, 10, dtype=dtype)
        factor=(tf.abs(data) >= 5)
        factor=tf.cast(factor, tf.float32)
        return data * factor

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=MyInit()),
    tf.keras.layers.Dense(1),
])

net(X)
print(net.layers[1].weights[0])

<tf.Variable 'dense_12/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.       ,  0.       ,  0.       , -0.       ],
       [ 0.       , -0.       ,  0.       ,  8.9689865],
       [-7.665801 , -0.       ,  0.       ,  0.       ],
       [-0.       , -8.497915 , -0.       ,  0.       ]], dtype=float32)>


In [11]:
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

<tf.Variable 'dense_12/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.       ,  1.       ,  1.       ,  1.       ],
       [ 1.       ,  1.       ,  1.       ,  9.9689865],
       [-6.665801 ,  1.       ,  1.       ,  1.       ],
       [ 1.       , -7.4979153,  1.       ,  1.       ]], dtype=float32)>

## Lazy Initialization


In [12]:
import tensorflow as tf

net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])



In [13]:
X = tf.random.uniform((2, 20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]

Lazy initialization can be convenient, allowing the framework to infer parameter shapes automatically, making it easy to modify architectures and eliminating one common source of errors.
We can pass data through the model to make the framework finally initialize parameters.

### Custom Layers
### Layers without Parameters
The following CenteredLayer class simply subtracts the mean from its input. 

In [14]:
import tensorflow as tf
from d2l import tensorflow as d2l


class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [16]:
layer = CenteredLayer()
layer(tf.constant([1.0,2,3,4,5]))



<tf.Tensor: shape=(5,), dtype=float32, numpy=array([-2., -1.,  0.,  1.,  2.], dtype=float32)>

Now we will incorporate our layer as a component in constructing more complex models.


In [17]:
net = tf.keras.Sequential([tf.keras.layers.Dense(128), CenteredLayer()])



### Layers with Parameters
To create fully-connected layer, we need weight and bias parameters. To use ReLu activation, set in_units (number of inputs) and units  (number of outputs).

In [18]:
class MyDense(tf.keras.Model):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, X_shape):
        self.weight = self.add_weight(name='weight',
            shape=[X_shape[-1], self.units],
            initializer=tf.random_normal_initializer())
        self.bias = self.add_weight(
            name='bias', shape=[self.units],
            initializer=tf.zeros_initializer())

    def call(self, X):
        linear = tf.matmul(X, self.weight) + self.bias
        return tf.nn.relu(linear)

In [19]:
#Next, we instantiate the MyDense class and access its model parameters.
dense = MyDense(3)
dense(tf.random.uniform((2, 5)))
dense.get_weights()

[array([[ 0.04538563,  0.02833788, -0.00999232],
        [-0.0711128 , -0.01901866, -0.01374371],
        [-0.023159  , -0.05815957, -0.03409592],
        [ 0.06743631,  0.05994912,  0.02385817],
        [ 0.0240874 , -0.02597427,  0.04005754]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [20]:
net = tf.keras.models.Sequential([MyDense(8), MyDense(1)])
net(tf.random.uniform((2, 64)))


<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.00407525],
       [0.00578758]], dtype=float32)>

We can design custom layers via the basic layer class. This allows us to define flexible new layers that behave differently from any existing layers in the library.
Once defined, custom layers can be invoked in arbitrary contexts and architectures.
Layers can have local parameters, which can be created through built-in functions

### File I/O
So far we discussed how to process data and how to build, train, and test deep learning models. However, at some point, we will hopefully be happy enough with the learned models that we will want to save the results for later use in various contexts (perhaps even to make predictions in deployment). Additionally, when running a long training process, the best practice is to periodically save intermediate results (checkpointing) to ensure that we do not lose several days worth of computation if we trip over the power cord of our serve

To create the variable and to save it to the existing directory use np.save()
To load it for the further calculations, use np.load()

In [22]:
import numpy as np
import tensorflow as tf

x = tf.range(4)
np.save('x-file.npy', x)

x2 = np.load('x-file.npy', allow_pickle=True)
x2

OSError: [Errno 30] Read-only file system: 'x-file.npy'

In [None]:
#to save list of tensors
y = tf.zeros(4)
np.save('xy-files.npy', [x, y])
x2, y2 = np.load('xy-files.npy', allow_pickle=True)
(x2, y2)

In [None]:
mydict = {'x': x, 'y': y}
np.save('mydict.npy', mydict)
mydict2 = np.load('mydict.npy', allow_pickle=True)
mydict2

### Loading and Saving Network Parameters

In [24]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.hidden(x)
        return self.out(x)

net = MLP()
X = tf.random.uniform((2, 20))
Y = net(X)

In [25]:
#to save weights
net.save_weights('mlp.params')
#to recover the model
clone = MLP()
clone.load_weights('mlp.params')


2022-07-18 10:49:30.201583: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at save_restore_v2_ops.cc:112 : PERMISSION_DENIED: mlp.params_temp; Read-only file system


PermissionDeniedError: mlp.params_temp; Read-only file system [Op:SaveV2]

For GPUs note:
if we want to operate on several tensorflow elements, they need to be on the same device.


In [26]:
def try_gpu(i=0):  #@save
    """Return gpu(i) if exists, otherwise return cpu()."""
    if num_gpus() >= i + 1:
        return gpu(i)
    return cpu()

def try_all_gpus():  #@save
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    return [gpu(i) for i in range(num_gpus())]

try_gpu(), try_gpu(10), try_all_gpus()

NameError: name 'num_gpus' is not defined

In [None]:
#if we want to perform on several elements in different locations, we need to define where it will be stored. for example,
with try_gpu(1):
    Z = X
print(X)
print(Z)
Y + Z


In [None]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    net = tf.keras.models.Sequential([
        tf.keras.layers.Dense(1)])