<a href="https://colab.research.google.com/github/prasanth5reddy/D2L/blob/master/Deep%20Learning%20Computation/parameter_management.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing Libraries

In [1]:
!pip install mxnet



Importing Libraries

In [0]:
from mxnet import nd,init
from mxnet.gluon import nn

Traditional MLP

In [3]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize() # use the default initialization method

x = nd.random.uniform(shape=(2, 20))
net(x) # forward computation


[[ 0.09543004  0.04614332 -0.00286654 -0.07790349 -0.05130243  0.02942037
   0.08696642 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099705  0.00856576 -0.04467199 -0.06926839  0.09132434
   0.06786595 -0.06187842 -0.03436673  0.04234694]]
<NDArray 2x10 @cpu(0)>

Parameter Access

In [4]:
print(len(net))
print(net[0].params, net[1].params)

2
dense0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
) dense1_ (
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
  Parameter dense1_bias (shape=(10,), dtype=float32)
)


Targeted Parameters

In [5]:
print(net[1].bias, net[1].bias.data())

Parameter dense1_bias (shape=(10,), dtype=float32) 
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>


In [6]:
print(net[0].params['dense0_weight'], net[0].params['dense0_weight'].data())

Parameter dense0_weight (shape=(256, 20), dtype=float32) 
[[ 0.06700657 -0.00369488  0.0418822  ... -0.05517294 -0.01194733
  -0.00369594]
 [-0.03296221 -0.04391347  0.03839272 ...  0.05636378  0.02545484
  -0.007007  ]
 [-0.0196689   0.01582889 -0.00881553 ...  0.01509629 -0.01908049
  -0.02449339]
 ...
 [ 0.00010955  0.0439323  -0.04911506 ...  0.06975312  0.0449558
  -0.03283203]
 [ 0.04106557  0.05671307 -0.00066976 ...  0.06387014 -0.01292654
   0.00974177]
 [ 0.00297424 -0.0281784  -0.06881659 ... -0.04047417  0.00457048
   0.05696651]]
<NDArray 256x20 @cpu(0)>


In [7]:
net[0].weight.grad()
# since we didn't computer backpropagation gradient will be zero


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

All Parameters at Once

In [8]:
# parameters only for the first layer
print(net[0].collect_params())

# parameters of the entire network
print(net.collect_params())

dense0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
)
sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
  Parameter dense1_bias (shape=(10,), dtype=float32)
)


In [9]:
net.collect_params()['dense1_bias'].data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [10]:
print(net.collect_params('.*weight'), net.collect_params('dense0.*'))

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
) sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
)


Rube Goldberg strikes again

In [11]:
def block1():
  net = nn.Sequential()
  net.add(nn.Dense(32, activation='relu'))
  net.add(nn.Dense(16, activation='relu'))
  return net

def block2():
  net = nn.Sequential()
  for i in range(4):
    net.add(block1())
  return net

rgnet = nn.Sequential()
rgnet.add(block2())
rgnet.add(nn.Dense(10))
rgnet.initialize()
rgnet(x)


[[ 1.0116727e-08 -9.4839003e-10 -1.1526797e-08  1.4917443e-08
  -1.5690811e-09 -3.9257650e-09 -4.1441655e-09  9.3013472e-09
   3.2393586e-09 -4.8612452e-09]
 [ 9.0111598e-09 -1.9115812e-10 -8.9595842e-09  1.0745880e-08
   1.4963460e-10 -2.2272872e-09 -3.9153973e-09  7.0595711e-09
   3.4854222e-09 -4.5807327e-09]]
<NDArray 2x10 @cpu(0)>

In [12]:
print(rgnet.collect_params)

<bound method Block.collect_params of Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Dense(20 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (1): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (2): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (3): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
  )
  (1): Dense(16 -> 10, linear)
)>


In [13]:
print(rgnet.collect_params())

sequential1_ (
  Parameter dense2_weight (shape=(32, 20), dtype=float32)
  Parameter dense2_bias (shape=(32,), dtype=float32)
  Parameter dense3_weight (shape=(16, 32), dtype=float32)
  Parameter dense3_bias (shape=(16,), dtype=float32)
  Parameter dense4_weight (shape=(32, 16), dtype=float32)
  Parameter dense4_bias (shape=(32,), dtype=float32)
  Parameter dense5_weight (shape=(16, 32), dtype=float32)
  Parameter dense5_bias (shape=(16,), dtype=float32)
  Parameter dense6_weight (shape=(32, 16), dtype=float32)
  Parameter dense6_bias (shape=(32,), dtype=float32)
  Parameter dense7_weight (shape=(16, 32), dtype=float32)
  Parameter dense7_bias (shape=(16,), dtype=float32)
  Parameter dense8_weight (shape=(32, 16), dtype=float32)
  Parameter dense8_bias (shape=(32,), dtype=float32)
  Parameter dense9_weight (shape=(16, 32), dtype=float32)
  Parameter dense9_bias (shape=(16,), dtype=float32)
  Parameter dense10_weight (shape=(10, 16), dtype=float32)
  Parameter dense10_bias (shape=(10,),

In [14]:
rgnet[0][1][0].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 32 @cpu(0)>

Parameter Initialization

Built-in Initialization

In [15]:
# force_reinit ensures that the variables are initialized again, regardless of
# whether they were already initialized previously
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[-0.008166   -0.00159167 -0.00273115  0.00684697  0.01204039  0.01359703
  0.00776908 -0.00640936  0.00256858  0.00545601  0.0018105  -0.00914027
  0.00133803  0.01070259 -0.00368285  0.01432678  0.00558631 -0.01479764
  0.00879013  0.00460165]
<NDArray 20 @cpu(0)>

In [16]:
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [17]:
net[1].initialize(init=init.Constant(55), force_reinit=True)
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
print(net[1].weight.data()[0,0])
print(net[0].weight.data()[0])


[55.]
<NDArray 1 @cpu(0)>

[-0.14511706 -0.01173057 -0.03754489 -0.14020921  0.00900492  0.01712246
  0.12447387 -0.04094418 -0.12105145  0.00079902 -0.0277361  -0.10213967
 -0.14027238 -0.02196661 -0.04641148  0.11977354  0.03604397 -0.14493202
 -0.06514931  0.13826048]
<NDArray 20 @cpu(0)>


Custom Initialization

In [0]:
class MyInit(init.Initializer):
  def _init_weight(self, name, data):
    print('Init', name, data.shape)
    data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
    data *= data.abs() >= 5

In [19]:
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense0_weight (256, 20)
Init dense1_weight (10, 256)



[-5.44481    6.536484  -0.         0.         0.         7.7452965
  7.739216   7.6021366  0.        -0.        -7.3307705 -0.
  9.611603   0.         7.4357147  0.         0.        -0.
  8.446959   0.       ]
<NDArray 20 @cpu(0)>

In [20]:
net[0].weight.data()[:] += 1
net[0].weight.data()[0,0] = 42
net[0].weight.data()[0]


[42.         7.536484   1.         1.         1.         8.7452965
  8.739216   8.602137   1.         1.        -6.3307705  1.
 10.611603   1.         8.435715   1.         1.         1.
  9.446959   1.       ]
<NDArray 20 @cpu(0)>

Tied Parameters

In [21]:
net = nn.Sequential()
# We need to give the shared layer a name such that we can reference its parameters
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))

net.initialize()
x = nd.random.uniform(shape=(2, 20))
net(x)


[[-3.26720801e-05 -1.50486550e-04 -1.03853330e-04  3.90124587e-05
  -6.71521339e-05  1.49683037e-04 -1.34553018e-04  5.02496259e-05
   1.36569170e-05  9.94169895e-05]
 [-6.49217909e-05 -1.80481613e-04 -4.07774714e-05  8.86480120e-05
  -3.06234469e-05  1.19763485e-04 -1.26902116e-04  4.91021055e-05
  -6.76310265e-06  8.90009178e-05]]
<NDArray 2x10 @cpu(0)>

In [22]:
# Check whether the parameters are the same
print(net[1].weight.data()[0] == net[2].weight.data()[0])
net[1].weight.data()[0,0] = 100
# Make sure that they're actually the same object rather than just having the
# same value
print(net[1].weight.data()[0] == net[2].weight.data()[0])


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>
