In [16]:
import tensorflow as tf

# Layers and Blocks

In [17]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])

X = tf.random.uniform((2, 20))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.01428078,  0.09322949, -0.09008395,  0.2979754 , -0.2543663 ,
        -0.19645092, -0.00395768,  0.01467552, -0.36376938,  0.15041693],
       [-0.13024715,  0.33287895, -0.05019459,  0.29030573,  0.05229899,
        -0.20493284, -0.06210567,  0.20211479, -0.36138535, -0.06127165]],
      dtype=float32)>

## A Custom Block

In [18]:
class MLP(tf.keras.Model):

    def __init__(self):

        super().__init__()
        
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)


    def call(self, X):
        return self.out(self.hidden((X)))


In [19]:
net = MLP()
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.5574007 , -0.20781834, -0.18789206, -0.01031101,  0.01155946,
         0.4410808 , -0.26910755, -0.08603245, -0.11692932,  0.31184253],
       [ 0.4820602 , -0.12271253, -0.05463037, -0.01741287,  0.17176928,
         0.33521494,  0.04367429, -0.00612284, -0.16062528,  0.01674779]],
      dtype=float32)>

## The Sequential Block

In [20]:
class MySequential(tf.keras.Model):
    def __init__(self, *args):
        super().__init__()
        self.modules = []
        for block in args:

            self.modules.append(block)

    def call(self, X):
        for module in self.modules:
            X = module(X)
        return X

In [21]:
net = MySequential(
    tf.keras.layers.Dense(units=256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.17051524, -0.46032685,  0.10198548,  0.20917   ,  0.36131424,
         0.00181762,  0.30198005,  0.3457077 ,  0.33865118,  0.13388312],
       [-0.08400035, -0.21153684,  0.06554966,  0.00304362,  0.20790848,
        -0.05397642,  0.07291701,  0.23324075,  0.15164194,  0.03614782]],
      dtype=float32)>

## Executing Code in the Forward Propagation Function

In [22]:
class FixedHiddenMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()

        self.rand_weight = tf.constant(tf.random.uniform((20, 20)))
        self.dense = tf.keras.layers.Dense(20, activation=tf.nn.relu)

    def call(self, inputs):
        X = self.flatten(inputs)

        X = tf.nn.relu(tf.matmul(X, self.rand_weight) + 1)

        X = self.dense(X)
        
        while tf.reduce_sum(tf.math.abs(X)) > 1:
            X /= 2
        return tf.reduce_sum(X)


In [23]:
net = FixedHiddenMLP()
net(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.7464899>

In [24]:
class NestMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.net = tf.keras.Sequential()
        self.net.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
        self.net.add(tf.keras.layers.Dense(32, activation=tf.nn.relu))
        self.dense = tf.keras.layers.Dense(16, activation=tf.nn.relu)

    def call(self, inputs):
        return self.dense(self.net(inputs))

chimera = tf.keras.Sequential()
chimera.add(NestMLP())
chimera.add(tf.keras.layers.Dense(20))
chimera.add(FixedHiddenMLP())
chimera(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.70286834>

# Parameter Management

In [25]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.15454806],
       [ 0.5617555 ]], dtype=float32)>

## Parameter Access

In [26]:
print(net.layers[2].weights)

[<tf.Variable 'dense_33/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[-0.20560753],
       [ 0.43194103],
       [ 0.8268703 ],
       [-0.5161398 ]], dtype=float32)>, <tf.Variable 'dense_33/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]


## Targeted Parameters

In [27]:
print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))

<class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
<tf.Variable 'dense_33/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)


## All Parameters at Once

In [28]:
print(net.layers[1].weights)
print(net.get_weights())

[<tf.Variable 'dense_32/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.82369334,  0.07516652,  0.67686516,  0.45998412],
       [ 0.83738893,  0.6420091 , -0.4622448 , -0.21757215],
       [ 0.50858706,  0.3512649 , -0.7855154 ,  0.6934604 ],
       [ 0.3726321 ,  0.67377204,  0.47829133,  0.0514257 ]],
      dtype=float32)>, <tf.Variable 'dense_32/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]
[array([[ 0.82369334,  0.07516652,  0.67686516,  0.45998412],
       [ 0.83738893,  0.6420091 , -0.4622448 , -0.21757215],
       [ 0.50858706,  0.3512649 , -0.7855154 ,  0.6934604 ],
       [ 0.3726321 ,  0.67377204,  0.47829133,  0.0514257 ]],
      dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[-0.20560753],
       [ 0.43194103],
       [ 0.8268703 ],
       [-0.5161398 ]], dtype=float32), array([0.], dtype=float32)]


In [29]:
net.get_weights()[1]

array([0., 0., 0., 0.], dtype=float32)

## Collecting Parameters from Nested Blocks

In [30]:
def block1(name):
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation=tf.nn.relu)],
        name=name)

def block2():
    net = tf.keras.Sequential()
    for i in range(4):
        
        net.add(block1(name=f'block-{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)


<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.07062218],
       [-0.01365906]], dtype=float32)>

In [31]:
print(rgnet.summary())

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_10 (Sequential)  (2, 4)                    80        
                                                                 
 dense_38 (Dense)            (2, 1)                    5         
                                                                 
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________
None


In [32]:
rgnet.layers[0].layers[1].layers[1].weights[1]

<tf.Variable 'dense_35/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

## Parameter Initialization

## Built-in Initialization

In [33]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_39/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 0.00282042,  0.00914793,  0.02491274, -0.00587755],
        [ 0.02280997,  0.00609103,  0.00287951, -0.01323005],
        [-0.00394876,  0.02440245, -0.01391268,  0.00293388],
        [-0.0174959 , -0.01561125,  0.0021481 , -0.00578594]],
       dtype=float32)>,
 <tf.Variable 'dense_39/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [34]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]


(<tf.Variable 'dense_41/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_41/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [35]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(42)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])


<tf.Variable 'dense_43/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.48692226,  0.0603531 , -0.11693698, -0.45452198],
       [-0.19843203, -0.6525388 , -0.6270709 , -0.50121003],
       [-0.44447276,  0.24397904, -0.06346905, -0.53539574],
       [ 0.8624938 , -0.73247886, -0.06466162,  0.6293157 ]],
      dtype=float32)>
<tf.Variable 'dense_44/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[42.],
       [42.],
       [42.],
       [42.]], dtype=float32)>


## Custom Initialization

In [36]:
class MyInit(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
        data=tf.random.uniform(shape, -10, 10, dtype=dtype)
        factor=(tf.abs(data) >= 5)
        factor=tf.cast(factor, tf.float32)
        return data * factor

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=MyInit()),
    tf.keras.layers.Dense(1),
])

net(X)
print(net.layers[1].weights[0])


<tf.Variable 'dense_45/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-6.841991 ,  0.       ,  5.939474 , -0.       ],
       [ 9.488342 ,  0.       ,  0.       ,  6.6017456],
       [ 0.       ,  7.2634506, -0.       ,  5.2517843],
       [ 9.268473 ,  9.497379 ,  0.       ,  0.       ]], dtype=float32)>


In [37]:
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]


<tf.Variable 'dense_45/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.       ,  1.       ,  6.939474 ,  1.       ],
       [10.488342 ,  1.       ,  1.       ,  7.6017456],
       [ 1.       ,  8.263451 ,  1.       ,  6.2517843],
       [10.268473 , 10.497379 ,  1.       ,  1.       ]], dtype=float32)>

## Tied Parameters

In [38]:

shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])

net(X)

print(len(net.layers) == 3)


True


# Deferred Initialization

## Instantiating a Network

In [39]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])

In [40]:
[net.layers[i].get_weights() for i in range(len(net.layers))]

[[], []]

In [41]:
X = tf.random.uniform((2, 20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]

# Custom Layers

## Layers without Parameters

In [42]:
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [43]:
layer = CenteredLayer()
layer(tf.constant([1, 2, 3, 4, 5]))

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([-2, -1,  0,  1,  2], dtype=int32)>

In [44]:
net = tf.keras.Sequential([tf.keras.layers.Dense(128), CenteredLayer()])

In [45]:
Y = net(tf.random.uniform((4, 8)))
tf.reduce_mean(Y)

<tf.Tensor: shape=(), dtype=float32, numpy=-3.259629e-09>

##  Layers with Parameters

In [46]:
class MyDense(tf.keras.Model):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, X_shape):
        self.weight = self.add_weight(name='weight',
            shape=[X_shape[-1], self.units],
            initializer=tf.random_normal_initializer())
        self.bias = self.add_weight(
            name='bias', shape=[self.units],
            initializer=tf.zeros_initializer())

    def call(self, X):
        linear = tf.matmul(X, self.weight) + self.bias
        return tf.nn.relu(linear)

In [47]:
dense = MyDense(3)
dense(tf.random.uniform((2, 5)))
dense.get_weights()

[array([[ 0.15277301,  0.05416867, -0.03340849],
        [-0.03892843, -0.03945602,  0.07088908],
        [ 0.07437005,  0.08346223,  0.06517704],
        [ 0.00791574,  0.05780813, -0.02969188],
        [ 0.003831  ,  0.04087809,  0.0210499 ]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [48]:
dense(tf.random.uniform((2, 5)))

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.13314623, 0.08451811, 0.07346228],
       [0.03696066, 0.08522607, 0.01726006]], dtype=float32)>

In [49]:
net = tf.keras.models.Sequential([MyDense(8), MyDense(1)])
net(tf.random.uniform((2, 64)))

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.        ],
       [0.00945512]], dtype=float32)>

# File I/O

## Loading and Saving Tensors

In [51]:
import numpy as np

In [52]:
x = tf.range(4)
np.save('x-file.npy', x)

In [53]:
x2 = np.load('x-file.npy', allow_pickle=True)
x2

array([0, 1, 2, 3], dtype=int32)

In [54]:
y = tf.zeros(4)
np.save('xy-files.npy', [x, y])
x2, y2 = np.load('xy-files.npy', allow_pickle=True)
(x2, y2)

(array([0., 1., 2., 3.]), array([0., 0., 0., 0.]))

In [55]:
mydict = {'x': x, 'y': y}
np.save('mydict.npy', mydict)
mydict2 = np.load('mydict.npy', allow_pickle=True)
mydict2

array({'x': <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 2, 3], dtype=int32)>, 'y': <tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>},
      dtype=object)

## Loading and Saving Model Parameters

In [56]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.hidden(x)
        return self.out(x)

net = MLP()
X = tf.random.uniform((2, 20))
Y = net(X)

In [57]:
net.save_weights('mlp.params')

In [58]:
clone = MLP()
clone.load_weights('mlp.params')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f105cac10d0>

In [59]:
Y_clone = clone(X)
Y_clone == Y

<tf.Tensor: shape=(2, 10), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])>

# GPUs

## Computing Devices

In [60]:
tf.device('/CPU:0'), tf.device('/GPU:0'), tf.device('/GPU:1')

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9c2780>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9c28c0>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9c2dc0>)

In [61]:
len(tf.config.experimental.list_physical_devices('GPU'))

0

In [62]:
def try_gpu(i=0):
    """Return gpu(i) if exists, otherwise return cpu()."""
    if len(tf.config.experimental.list_physical_devices('GPU')) >= i + 1:
        return tf.device(f'/GPU:{i}')
    return tf.device('/CPU:0')

def try_all_gpus():
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
    devices = [tf.device(f'/GPU:{i}') for i in range(num_gpus)]
    return devices if devices else [tf.device('/CPU:0')]

try_gpu(), try_gpu(10), try_all_gpus()

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9d2a00>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9c40f0>,
 [<tensorflow.python.eager.context._EagerDeviceContext at 0x7f105c9c40a0>])

## Tensors and GPUs

In [63]:
x = tf.constant([1, 2, 3])
x.device

'/job:localhost/replica:0/task:0/device:CPU:0'

### Storage on the GPU

In [64]:
with try_gpu():
    X = tf.ones((2, 3))
X

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>

In [65]:
with try_gpu(1):
    Y = tf.random.uniform((2, 3))
Y

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[9.8908198e-01, 3.0840039e-02, 3.8397312e-04],
       [1.2076485e-01, 9.5040560e-02, 6.9281840e-01]], dtype=float32)>

### Copying

In [66]:
with try_gpu(1):
    Z = X
print(X)
print(Z)

tf.Tensor(
[[1. 1. 1.]
 [1. 1. 1.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[1. 1. 1.]
 [1. 1. 1.]], shape=(2, 3), dtype=float32)


In [67]:
Y + Z

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1.989082 , 1.03084  , 1.000384 ],
       [1.1207649, 1.0950406, 1.6928184]], dtype=float32)>

In [68]:
with try_gpu(1):
    Z2 = Z
Z2 is Z

True

## Neural Networks and GPUs

In [69]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    net = tf.keras.models.Sequential([
        tf.keras.layers.Dense(1)])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [70]:
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[2.0643282],
       [2.0643282]], dtype=float32)>

In [71]:
net.layers[0].weights[0].device, net.layers[0].weights[1].device

('/job:localhost/replica:0/task:0/device:CPU:0',
 '/job:localhost/replica:0/task:0/device:CPU:0')