# Feed-forward fully connected network
<img src="img/FeedForward.png" height="480" width="480" />


# Activation Functions
<img src="img/activation.png" width="600" height="600">

In [1]:
from mxnet import gluon, nd

In [2]:
net = gluon.nn.Sequential()

In [3]:
with net.name_scope(): #Returns a name space object managing a child :py:class:`Block` and parameter names.
    net.add(gluon.nn.Dense(units=128, activation='relu'))
    net.add(gluon.nn.Dense(units=64, activation='relu'))
    net.add(gluon.nn.Dense(units=10))

In [4]:
net

Sequential(
  (0): Dense(None -> 128, Activation(relu))
  (1): Dense(None -> 64, Activation(relu))
  (2): Dense(None -> 10, linear)
)

In [5]:
net.collect_params()

sequential0_ (
  Parameter sequential0_dense0_weight (shape=(128, 0), dtype=float32)
  Parameter sequential0_dense0_bias (shape=(128,), dtype=float32)
  Parameter sequential0_dense1_weight (shape=(64, 0), dtype=float32)
  Parameter sequential0_dense1_bias (shape=(64,), dtype=float32)
  Parameter sequential0_dense2_weight (shape=(10, 0), dtype=float32)
  Parameter sequential0_dense2_bias (shape=(10,), dtype=float32)
)

In [6]:
net.initialize(force_reinit=True)

In [7]:
net[0].weight.data()
#error because no data is assigned

DeferredInitializationError: Parameter 'sequential0_dense0_weight' has not been initialized yet because initialization was deferred. Actual initialization happens during the first forward pass. Please pass one batch of data through the network before accessing Parameters. You can also avoid deferred initialization by specifying in_units, num_features, etc., for network layers.

# Binding data to the network
- You can see that the network has no values attached to it yet.
- Next we are creating a dataet and computing the network with the assigned data.

In [10]:
x = nd.random_normal(loc=0, scale=2, shape=(4,1,28,28))
y = net(x)
y.shape

(4, 10)

In [11]:
y


[[-0.05293976 -0.10608105  0.01933832  0.16411468 -0.08766328 -0.09690822
  -0.14190705 -0.13602577  0.19117233  0.21519439]
 [ 0.0975407  -0.07235308  0.13949803 -0.03645501  0.00467792 -0.08024731
  -0.10674612 -0.03932514  0.42517149  0.20621806]
 [-0.2611475  -0.1007035  -0.05576909  0.09544102 -0.19598499 -0.14384021
   0.09350166 -0.07735793 -0.04939874  0.12983452]
 [ 0.39649001 -0.07397338  0.1046305   0.11360808  0.18582129 -0.10736685
  -0.26358518 -0.02380536  0.31710705  0.13270037]]
<NDArray 4x10 @cpu(0)>

In [12]:
(net[0].weight.data().shape)

(128, 784)

In [13]:
net.collect_params()

sequential0_ (
  Parameter sequential0_dense0_weight (shape=(128, 784), dtype=float32)
  Parameter sequential0_dense0_bias (shape=(128,), dtype=float32)
  Parameter sequential0_dense1_weight (shape=(64, 128), dtype=float32)
  Parameter sequential0_dense1_bias (shape=(64,), dtype=float32)
  Parameter sequential0_dense2_weight (shape=(10, 64), dtype=float32)
  Parameter sequential0_dense2_bias (shape=(10,), dtype=float32)
)

# Convolutoinal Neural Networks - Feature extrction
<img src="img/featureextraction.png" width=480, height=480>

# Convolution
<img src="img/conv.png" height="480" width="600">

# Pooling
<img src="img/pooling.png" height="480" width="480">

In [15]:
conv_net = gluon.nn.Sequential()
conv_net.add(
    # Feature Extraction layers 
    gluon.nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
    gluon.nn.MaxPool2D(pool_size=2, strides=2),
    gluon.nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
    gluon.nn.MaxPool2D(pool_size=2, strides=2),
    
    gluon.nn.Flatten(),
    
    #Dense Layers
    gluon.nn.Dense(120, activation="relu"),
    gluon.nn.Dense(84, activation="relu"),
    gluon.nn.Dense(10))
conv_net

Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): Flatten
  (5): Dense(None -> 120, Activation(relu))
  (6): Dense(None -> 84, Activation(relu))
  (7): Dense(None -> 10, linear)
)

# Challenge
- Create a random normal dataset with dimentions: $4\times 1\times 28\times 28$
- Print network parameters
- Initialize the convolutoinal network
- Calculate a forward pass ```conv_net(x)```
- Print network parameters and compare them with intial parameters

In [17]:
x = nd.random.uniform(low=-1, high=1, shape=(4,1,28,28), )
x


[[[[ 0.18196833  0.90744281  0.71544528 ..., -0.10524338 -0.48263186
     0.45539272]
   [ 0.69807661  0.48447597 -0.93339074 ..., -0.88806832 -0.34659821
    -0.82693505]
   [-0.53451174 -0.52565342  0.2289294  ..., -0.25582463  0.89509785
    -0.83428907]
   ..., 
   [-0.04555839  0.30440664 -0.79965723 ...,  0.87727594 -0.78141713
    -0.85595369]
   [ 0.58397961  0.32714021  0.28375471 ...,  0.11080635  0.14695954
     0.3635664 ]
   [-0.68463123  0.25241089  0.78305233 ...,  0.47739434  0.95705974
     0.33531511]]]


 [[[ 0.24307644 -0.14142084 -0.21319914 ...,  0.3416034   0.22959232
     0.3424679 ]
   [-0.64672887 -0.68806112 -0.58323634 ..., -0.06946385 -0.56647813
     0.17255282]
   [-0.60991514  0.01296473 -0.90403956 ..., -0.39649004  0.71677685
     0.73780239]
   ..., 
   [-0.40322983  0.31495166 -0.9697268  ...,  0.37385547 -0.97515726
    -0.83280432]
   [-0.95366049 -0.24307543 -0.9619965  ...,  0.49953234 -0.66279256
     0.26014793]
   [-0.06715506  0.59362316  0.

In [18]:
conv_net = gluon.nn.Sequential()
conv_net.add(
    # Feature Extraction layers 
    gluon.nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
    gluon.nn.MaxPool2D(pool_size=2, strides=2),
    gluon.nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
    gluon.nn.MaxPool2D(pool_size=2, strides=2),
    
    gluon.nn.Flatten(),
    
    #Dense Layers
    gluon.nn.Dense(120, activation="relu"),
    gluon.nn.Dense(84, activation="relu"),
    gluon.nn.Dense(10))
conv_net

Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): Flatten
  (5): Dense(None -> 120, Activation(relu))
  (6): Dense(None -> 84, Activation(relu))
  (7): Dense(None -> 10, linear)
)

In [19]:
y = conv_net(x)
y.shape

RuntimeError: Parameter 'conv4_weight' has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks