<a href="https://colab.research.google.com/github/nugratar/pengantar-deep-learning/blob/main/Minggu3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Tugas Minggu 3 Deep Learning**

---

Nama: Anugrah Lestari

NIM: H071191059

Materi: [Chapter 5. Deep Learning Computation](https://d2l.ai/chapter_deep-learning-computation/index.html)

In [1]:
import numpy as np
import tensorflow as tf

## **5.1. Layers and Blocks**

In [2]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])

X = tf.random.uniform((2, 20))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.22587827,  0.2966476 , -0.11187722,  0.11894083,  0.19762437,
         0.0049703 , -0.03003908, -0.56404465, -0.13440756,  0.48708752],
       [-0.01162184,  0.45450974,  0.14661303, -0.13723812,  0.08274566,
         0.24840318, -0.09571005, -0.18770775, -0.07173897,  0.24179551]],
      dtype=float32)>

### **5.1.1. A Custom Block**

In [3]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)
    def call(self, X):
        return self.out(self.hidden((X)))

In [4]:
net = MLP()
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.21027564,  0.33801153,  0.38639066,  0.11583588,  0.07167374,
         0.11505774, -0.0871055 , -0.27603048, -0.21334934, -0.3078347 ],
       [ 0.38026607,  0.30476686,  0.09281956,  0.07465142, -0.1670453 ,
        -0.00394521, -0.0274213 , -0.2785929 , -0.08020512, -0.20211133]],
      dtype=float32)>

### **5.1.2. The Sequential Block**

In [5]:
class MySequential(tf.keras.Model):
    def __init__(self, *args):
        super().__init__()
        self.modules = []
        for block in args:
            self.modules.append(block)

    def call(self, X):
        for module in self.modules:
            X = module(X)
        return X

In [6]:
net = MySequential(
    tf.keras.layers.Dense(units=256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.02752724,  0.35556522,  0.07982898, -0.13700488,  0.05839343,
         0.42277822, -0.25712827, -0.5057378 , -0.42922506,  0.34451753],
       [-0.14693944,  0.18772082,  0.3060857 ,  0.05661932, -0.05371741,
         0.48677617, -0.4293428 , -0.33396488, -0.20798959,  0.14965913]],
      dtype=float32)>

### **5.1.3. Executing Code in the Forward Propagation Function**

In [7]:
class FixedHiddenMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.rand_weight = tf.constant(tf.random.uniform((20, 20)))
        self.dense = tf.keras.layers.Dense(20, activation=tf.nn.relu)

    def call(self, inputs):
        X = self.flatten(inputs)
        X = tf.nn.relu(tf.matmul(X, self.rand_weight) + 1)
        X = self.dense(X)
        while tf.reduce_sum(tf.math.abs(X)) > 1:
            X /= 2
        return tf.reduce_sum(X)

In [8]:
net = FixedHiddenMLP()
net(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.7870227>

In [9]:
class NestMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.net = tf.keras.Sequential()
        self.net.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
        self.net.add(tf.keras.layers.Dense(32, activation=tf.nn.relu))
        self.dense = tf.keras.layers.Dense(16, activation=tf.nn.relu)

    def call(self, inputs):
        return self.dense(self.net(inputs))

chimera = tf.keras.Sequential()
chimera.add(NestMLP())
chimera.add(tf.keras.layers.Dense(20))
chimera.add(FixedHiddenMLP())
chimera(X)

<tf.Tensor: shape=(), dtype=float32, numpy=0.6802695>

## **5.2. Parameter Management**

In [10]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 0.07015495],
       [-0.21873581]], dtype=float32)>

### **5.2.1. Parameter Access**

In [11]:
print(net.layers[2].weights)

[<tf.Variable 'dense_13/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[ 0.6596291],
       [-0.79066  ],
       [ 0.3811915],
       [-1.0496095]], dtype=float32)>, <tf.Variable 'dense_13/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]


**5.2.1.1. Targeted Parameters**

In [12]:
print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))

<class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
<tf.Variable 'dense_13/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)


In [13]:
net.get_weights()[1]

array([0., 0., 0., 0.], dtype=float32)

**5.2.1.2. All Parameters at Once**

In [14]:
print(net.layers[1].weights)
print(net.get_weights())

[<tf.Variable 'dense_12/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.56770664, -0.739567  ,  0.23949689,  0.40469986],
       [-0.8239748 , -0.14566529,  0.0324102 ,  0.2339856 ],
       [-0.63778126, -0.2838819 , -0.11342788, -0.69773626],
       [-0.8218518 , -0.792279  ,  0.72759134, -0.14758343]],
      dtype=float32)>, <tf.Variable 'dense_12/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]
[array([[-0.56770664, -0.739567  ,  0.23949689,  0.40469986],
       [-0.8239748 , -0.14566529,  0.0324102 ,  0.2339856 ],
       [-0.63778126, -0.2838819 , -0.11342788, -0.69773626],
       [-0.8218518 , -0.792279  ,  0.72759134, -0.14758343]],
      dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[ 0.6596291],
       [-0.79066  ],
       [ 0.3811915],
       [-1.0496095]], dtype=float32), array([0.], dtype=float32)]


**5.2.1.3. Collecting Parameters from Nested Blocks**

In [15]:
def block1(name):
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation=tf.nn.relu)],
        name=name)

def block2():
    net = tf.keras.Sequential()
    for i in range(4):
        net.add(block1(name=f'block-{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.0394949],
       [0.       ]], dtype=float32)>

In [16]:
print(rgnet.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_5 (Sequential)   (2, 4)                    80        
                                                                 
 dense_18 (Dense)            (2, 1)                    5         
                                                                 
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________
None


In [17]:
rgnet.layers[0].layers[1].layers[1].weights[1]

<tf.Variable 'dense_15/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

### **5.2.2. Parameter Initialization**

**5.2.2.1. Built-in Initialization**

In [18]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_19/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 4.7141714e-03,  1.3045545e-02, -4.5143887e-03,  2.0845432e-02],
        [-6.6234265e-03, -1.6503291e-04, -2.0403141e-02,  5.7090030e-05],
        [-2.8411844e-03, -7.8593828e-03, -8.9509673e-03,  8.2817087e-03],
        [-5.1554879e-03, -3.5459911e-03, -6.4757061e-03,  1.2355015e-02]],
       dtype=float32)>,
 <tf.Variable 'dense_19/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [19]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_21/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_21/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [20]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(42)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])

<tf.Variable 'dense_23/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.4237068 , -0.6215355 ,  0.3314541 ,  0.11717027],
       [-0.5593823 , -0.38212284,  0.21387392, -0.41744643],
       [ 0.53974086,  0.76091236,  0.8532389 , -0.0862143 ],
       [ 0.00585443, -0.53170145,  0.7941012 ,  0.71333903]],
      dtype=float32)>
<tf.Variable 'dense_24/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[42.],
       [42.],
       [42.],
       [42.]], dtype=float32)>


**5.2.2.2. Custom Initialization**

In [21]:
class MyInit(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
        data=tf.random.uniform(shape, -10, 10, dtype=dtype)
        factor=(tf.abs(data) >= 5)
        factor=tf.cast(factor, tf.float32)
        return data * factor

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=MyInit()),
    tf.keras.layers.Dense(1),
])

net(X)
print(net.layers[1].weights[0])

<tf.Variable 'dense_25/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.       ,  6.081764 , -0.       ,  9.076174 ],
       [-9.709287 , -8.627005 ,  7.5660515,  6.1013584],
       [ 6.8232307, -5.0418687,  0.       , -0.       ],
       [ 0.       , -6.1968184, -9.94524  ,  0.       ]], dtype=float32)>


In [22]:
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

<tf.Variable 'dense_25/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.       ,  7.081764 ,  1.       , 10.076174 ],
       [-8.709287 , -7.6270046,  8.5660515,  7.1013584],
       [ 7.8232307, -4.0418687,  1.       ,  1.       ],
       [ 1.       , -5.1968184, -8.94524  ,  1.       ]], dtype=float32)>

### **5.2.3. Tied Parameters**

In [23]:
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])

net(X)

print(len(net.layers) == 3)

True


## **5.3. Deferred Initialization**

### **5.3.1. Instantiating a Network**

In [24]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])

In [25]:
[net.layers[i].get_weights() for i in range(len(net.layers))]

[[], []]

In [26]:
X = tf.random.uniform((2, 20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]

## **5.4. Custom Layers**

### **5.4.1. Layers without Parameters**

In [27]:
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [28]:
layer = CenteredLayer()
layer(tf.constant([1, 2, 3, 4, 5]))

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([-2, -1,  0,  1,  2], dtype=int32)>

In [29]:
net = tf.keras.Sequential([tf.keras.layers.Dense(128), CenteredLayer()])

In [30]:
Y = net(tf.random.uniform((4, 8)))
tf.reduce_mean(Y)

<tf.Tensor: shape=(), dtype=float32, numpy=-9.313226e-10>

### **5.4.2. Layers with Parameters**

In [31]:
class MyDense(tf.keras.Model):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, X_shape):
        self.weight = self.add_weight(name='weight',
            shape=[X_shape[-1], self.units],
            initializer=tf.random_normal_initializer())
        self.bias = self.add_weight(
            name='bias', shape=[self.units],
            initializer=tf.zeros_initializer())

    def call(self, X):
        linear = tf.matmul(X, self.weight) + self.bias
        return tf.nn.relu(linear)

In [32]:
dense = MyDense(3)
dense(tf.random.uniform((2, 5)))
dense.get_weights()

[array([[-0.05425806,  0.04985551,  0.03630913],
        [ 0.00110864,  0.00778777,  0.02191167],
        [-0.00183461,  0.04339891, -0.02057284],
        [ 0.08061808, -0.02027545, -0.06995308],
        [ 0.01988108,  0.0402371 ,  0.07179581]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [33]:
dense(tf.random.uniform((2, 5)))

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.02231781, 0.09039012, 0.04925231],
       [0.00529003, 0.05870747, 0.06614687]], dtype=float32)>

In [34]:
net = tf.keras.models.Sequential([MyDense(8), MyDense(1)])
net(tf.random.uniform((2, 64)))

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.03050093],
       [0.0381741 ]], dtype=float32)>

## **5.5. File I/O**

### **5.5.1. Loading and Saving Tensors**

In [35]:
x = tf.range(4)
np.save('x-file.npy', x)

In [36]:
x2 = np.load('x-file.npy', allow_pickle=True)
x2

array([0, 1, 2, 3], dtype=int32)

In [37]:
y = tf.zeros(4)
np.save('xy-files.npy', [x, y])
x2, y2 = np.load('xy-files.npy', allow_pickle=True)
(x2, y2)

(array([0., 1., 2., 3.]), array([0., 0., 0., 0.]))

In [38]:
mydict = {'x': x, 'y': y}
np.save('mydict.npy', mydict)
mydict2 = np.load('mydict.npy', allow_pickle=True)
mydict2

array({'x': <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 2, 3], dtype=int32)>, 'y': <tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>},
      dtype=object)

### **5.5.2. Loading and Saving Model Parameters**

In [39]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.hidden(x)
        return self.out(x)

net = MLP()
X = tf.random.uniform((2, 20))
Y = net(X)

In [40]:
net.save_weights('mlp.params')

In [41]:
clone = MLP()
clone.load_weights('mlp.params')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f7b0d3e3850>

In [42]:
Y_clone = clone(X)
Y_clone == Y

<tf.Tensor: shape=(2, 10), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])>

## **5.6. GPUs**

### **5.6.1. Computing Devices**

In [43]:
tf.device('/CPU:0'), tf.device('/GPU:0'), tf.device('/GPU:1')

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d3a50a0>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d3a5820>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d3a58c0>)

In [44]:
len(tf.config.experimental.list_physical_devices('GPU'))

0

In [45]:
def try_gpu(i=0):
    """Return gpu(i) if exists, otherwise return cpu()."""
    if len(tf.config.experimental.list_physical_devices('GPU')) >= i + 1:
        return tf.device(f'/GPU:{i}')
    return tf.device('/CPU:0')

def try_all_gpus():
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
    devices = [tf.device(f'/GPU:{i}') for i in range(num_gpus)]
    return devices if devices else [tf.device('/CPU:0')]

try_gpu(), try_gpu(10), try_all_gpus()

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d498280>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d4b4e60>,
 [<tensorflow.python.eager.context._EagerDeviceContext at 0x7f7b0d552780>])

### **5.6.2. Tensors and GPUs**

In [46]:
x = tf.constant([1, 2, 3])
x.device

'/job:localhost/replica:0/task:0/device:CPU:0'

**5.6.2.1. Storage on the GPU**

In [47]:
with try_gpu():
    X = tf.ones((2, 3))
X

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>

In [48]:
with try_gpu(1):
    Y = tf.random.uniform((2, 3))
Y

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.36515582, 0.17878664, 0.0017612 ],
       [0.42501247, 0.1452688 , 0.30965602]], dtype=float32)>

**5.6.2.2. Copying**

In [49]:
with try_gpu(1):
    Z = X
print(X)
print(Z)

tf.Tensor(
[[1. 1. 1.]
 [1. 1. 1.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[1. 1. 1.]
 [1. 1. 1.]], shape=(2, 3), dtype=float32)


In [50]:
Y + Z

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1.3651558, 1.1787866, 1.0017612],
       [1.4250125, 1.1452688, 1.309656 ]], dtype=float32)>

In [51]:
with try_gpu(1):
    Z2 = Z
Z2 is Z

True

### **5.6.3. Neural Networks and GPUs**

In [52]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    net = tf.keras.models.Sequential([
        tf.keras.layers.Dense(1)])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [53]:
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-1.132022],
       [-1.132022]], dtype=float32)>

In [54]:
net.layers[0].weights[0].device, net.layers[0].weights[1].device

('/job:localhost/replica:0/task:0/device:CPU:0',
 '/job:localhost/replica:0/task:0/device:CPU:0')