## Avaliando [Crash Course](https://mxnet.apache.org/versions/master/api/python/docs/tutorials/getting-started/crash-course/2-create-nn.html)


### Step2

In [55]:
import mxnet as mx
from mxnet import np, npx
from mxnet.gluon import nn

npx.set_np()

mx.__version__

'2.0.0'

## Step 2 - Create neural network
Uma camada com 3 entradas e 5 saidas, com função de ativação "RELU"


In [56]:
layer=nn.Dense(5,in_units=3,activation='relu')
layer

Dense(3 -> 5, Activation(relu))

In [57]:
layer.initialize()

#### dez linhas com três parametros

In [58]:
x=np.random.uniform(-1,1,(10,3))
x

array([[ 0.9304558 ,  0.62095165,  0.57590795],
       [-0.23812461,  0.4895475 ,  0.40970135],
       [ 0.8304789 ,  0.83184505, -0.2338106 ],
       [ 0.25073445, -0.80570596,  0.57387054],
       [ 0.06210291,  0.50480986,  0.7998245 ],
       [-0.30788028,  0.8492992 ,  0.11609352],
       [-0.5782545 ,  0.74868107, -0.32136083],
       [-0.17012817, -0.34187388,  0.7680478 ],
       [-0.359024  ,  0.6662005 , -0.94704115],
       [ 0.31440222, -0.69534445, -0.03637332]])

In [59]:
layer(x)

array([[0.        , 0.        , 0.        , 0.00695837, 0.02151532],
       [0.        , 0.0196271 , 0.        , 0.06759939, 0.        ],
       [0.03224901, 0.00728238, 0.        , 0.        , 0.03928804],
       [0.        , 0.        , 0.06608538, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.06925236, 0.        ],
       [0.01515879, 0.05853312, 0.        , 0.07884857, 0.        ],
       [0.03958181, 0.09199545, 0.        , 0.06745759, 0.        ],
       [0.        , 0.        , 0.04870468, 0.03085256, 0.        ],
       [0.07424875, 0.10676001, 0.        , 0.01475125, 0.01642966],
       [0.        , 0.        , 0.03956246, 0.        , 0.00527947]])

In [60]:
layer.params

{'weight': Parameter (shape=(5, 3), dtype=float32),
 'bias': Parameter (shape=(5,), dtype=float32)}

#### pesos e bias de entrada de cada elemento da camada interna de 5 neuronios.

In [61]:
layer.weight.data()

array([[-0.00296517,  0.02493882, -0.05973333],
       [-0.06011468,  0.05420877, -0.05180714],
       [-0.01905191, -0.06704587,  0.02934997],
       [-0.06633657,  0.06156359,  0.05287929],
       [ 0.03232814,  0.00826898, -0.02378718]])

In [62]:
layer.bias.data()


array([0., 0., 0., 0., 0.])

Chain layers into a neural network using nn.Sequential

Sequential é a ligação de camadas onde a saída de uma camada é a entrada de outra

In [63]:
net = nn.Sequential()

net.add(nn.Dense(5,in_units=3,activation='relu'),nn.Dense(25,activation='relu'),nn.Dense(2))

net

Sequential(
  (0): Dense(3 -> 5, Activation(relu))
  (1): Dense(-1 -> 25, Activation(relu))
  (2): Dense(-1 -> 2, linear)
)

In [64]:
net[1]

Dense(-1 -> 25, Activation(relu))

#### Custom neural network architecture flexibly

In [65]:
class Net(nn.Block):
    def __init__(self):
        super().__init__()
        
    def forward(self,x):
        return x

In [66]:
class MLP(nn.Block):
    def __init__(self):
        super().__init__()
        self.dense1=nn.Dense(5,activation='relu')
        self.dense2=nn.Dense(25,activation='relu')
        self.dense3=nn.Dense(2)
        
    def forward(self,x):
        layer1=self.dense1(x)
        layer2=self.dense2(layer1)
        layer3=self.dense3(layer2)
        return layer3

In [67]:
net=MLP()
net

MLP(
  (dense1): Dense(-1 -> 5, Activation(relu))
  (dense2): Dense(-1 -> 25, Activation(relu))
  (dense3): Dense(-1 -> 2, linear)
)

In [68]:
net.dense1.params

{'weight': Parameter (shape=(5, -1), dtype=float32),
 'bias': Parameter (shape=(5,), dtype=float32)}

#### Creating custom layers using Parameters

In [69]:
from mxnet.gluon import Parameter

weight=Parameter("custom_parameter_weight",shape=(5,-1))
bias=Parameter("custom_parameter_bias",shape=(5,-1))

weight,bias

(Parameter (shape=(5, -1), dtype=<class 'numpy.float32'>),
 Parameter (shape=(5, -1), dtype=<class 'numpy.float32'>))

Custom layer com função linear sem função de ativação. 

w*x+b

In [70]:
class custom_layer(nn.Block):
    def __init__(self,out_units, in_units=0):
        super().__init__()
        
        # mxnet 2.0
        self.weight=Parameter("weight",shape=(in_units,out_units),allow_deferred_init=True)
        self.bias=Parameter("bias",shape=(out_units,),allow_deferred_init=True)
        
        # mxnet 1.8.0
        # self.weight = self.params.get('weight', shape=(in_units, out_units),allow_deferred_init=True)
        # self.bias = self.params.get('bias', shape=(out_units,),allow_deferred_init=True)

        
    def forward(self,x):
        return np.dot(x,self.weight.data())+self.bias.data()

In [71]:
dense=custom_layer(3,in_units=5)
dense.initialize()
dense(np.random.uniform(size=(4,5)))

array([[-0.01971773, -0.05515386, -0.07169655],
       [-0.01348839, -0.05879273, -0.04236437],
       [-0.06800018, -0.07280456, -0.01472434],
       [ 0.00791047, -0.05942319, -0.04943746]])

### [LeNet](http://yann.lecun.com/exdb/lenet/)

In [72]:
class LeNet(nn.Block):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2D(channels=6,kernel_size=3,activation='relu')
        self.pool1=nn.MaxPool2D(pool_size=2,strides=2)
        self.conv2=nn.Conv2D(channels=16,kernel_size=3,activation='relu')
        self.pool2=nn.MaxPool2D(pool_size=2,strides=2)
        self.dense1=nn.Dense(120,activation='relu')
        self.dense2=nn.Dense(84,activation='relu')
        self.dense3=nn.Dense(10)
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.pool2(x)
        x=self.dense1(x)
        x=self.dense2(x)
        x=self.dense3(x)
        return x

lenet=LeNet()        

In [73]:
class LeNet_custom(nn.Block):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2D(channels=6,kernel_size=3,activation='relu')
        self.pool1=nn.MaxPool2D(pool_size=2,strides=2)
        self.conv2=nn.Conv2D(channels=16,kernel_size=3,activation='relu')
        self.pool2=nn.MaxPool2D(pool_size=2,strides=2)
        self.dense1=nn.Dense(120,activation='relu')
        self.dense2=nn.Dense(84,activation='relu')
        self.dense3=custom_layer(10,84)
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.pool2(x)
        x=self.dense1(x)
        x=self.dense2(x)
        x=self.dense3(x)
        return x

lenet_custom=LeNet_custom()        

In [74]:
image_data=np.random.uniform(-1,1,(1,1,28,28))

lenet.initialize()
lenet_custom.initialize()

print("LeNet:")
print(lenet(image_data))

print("Custom LeNet:")
print(lenet_custom(image_data))


LeNet:
[[-6.8408676e-04  2.3123024e-03  3.8441597e-04  7.9229695e-04
   8.2185236e-04 -2.0441706e-03 -1.9353793e-03  1.3393344e-03
  -5.3992699e-04 -9.2347735e-05]]
Custom LeNet:
[[ 4.5696136e-02 -4.4086133e-05  2.1577444e-02  3.3180423e-02
   5.8146108e-02  6.3894823e-02  4.6673581e-02  8.2850773e-03
  -1.7607763e-02  2.0094015e-02]]


In [75]:
lenet.conv1.weight.data().shape, lenet.dense1.bias.data().shape,lenet_custom.conv1.weight.data().shape, lenet_custom.dense1.bias.data().shape


((6, 1, 3, 3), (120,), (6, 1, 3, 3), (120,))

#### Using predefined (pretrained) architectures

[Gluon CV model zoo](https://cv.gluon.ai/model_zoo/index.html)

[Gluon NLP model zoo](https://nlp.gluon.ai/model_zoo/index.html)

In [76]:
from mxnet.gluon import model_zoo

net=model_zoo.vision.resnet50_v2(pretrained=True)
net.hybridize()

dummy_input=np.ones(shape=(1,3,224,224))
output=net(dummy_input)
output.shape


(1, 1000)

### Deciding the paradigm for your network

In [77]:
net_hybrid_seq=nn.HybridSequential()

net_hybrid_seq.add(nn.Dense(5,in_units=3,activation='relu'),
    nn.Dense(25,activation='relu'),nn.Dense(2))

net_hybrid_seq

HybridSequential(
  (0): Dense(3 -> 5, Activation(relu))
  (1): Dense(-1 -> 25, Activation(relu))
  (2): Dense(-1 -> 2, linear)
)

In [78]:
net_hybrid_seq.hybridize()

### Creating custom layers using Parameters(HbridBlocks API)

In [79]:
class CustomLayer(nn.HybridBlock):
    def __init__(self,out_units, in_units=-1):
        super().__init__()
        self.weight=Parameter("weight",shape=(in_units, out_units),allow_deferred_init=True)
        self.bias=Parameter("bias",shape=(out_units,),allow_deferred_init=True)
        
    def forward(self, x):
        print(self.weight.shape,self.bias.shape)
        return np.dot(x,self.weight.data())+self.bias.data()

    def infer_shape(self,x):
        print(self.weight.shape,x.shape)
        self.weight.shape=(x. shape[-1],self.weight.shape[1])


dense=CustomLayer(3)

dense.initialize()
dense(np.random.uniform(size=(4,5)))
    

(-1, 3) (4, 5)
(5, 3) (3,)


array([[-0.11437228,  0.10194876, -0.04716104],
       [-0.11570357,  0.07732303, -0.08061042],
       [-0.15427211, -0.04824062, -0.07051762],
       [-0.16056198,  0.08461182, -0.08503564]])

### Performance

In [80]:
from time import time

def benchmark(net,x):
    y=net(x)
    start=time()
    for i in range(1,1000):
        y=net(x)
    return time()-start


x_bench=np.random.normal(size=(1,521))

net_hybrid_seq=nn.HybridSequential()

net_hybrid_seq.add (nn.Dense(256,activation='relu'),
                nn.Dense(128,activation='relu'),
                nn.Dense(2))

net_hybrid_seq.initialize()

print('Before hybridizing: %.4f sec'%(benchmark(net_hybrid_seq, x_bench)))
net_hybrid_seq.hybridize()
print('After hybridizing: %.4f'%(benchmark(net_hybrid_seq, x_bench)))

Before hybridizing: 0.5018 sec
After hybridizing: 0.1699


In [81]:
from mxnet.gluon import HybridBlock

class MLP_Hybrid(HybridBlock):
    def __init__(self):
        super().__init__()
        self.dense1=nn.Dense(256,activation='relu')
        self.dense2=nn.Dense(123,activation='relu')
        self.dense3=nn.Dense(2)

    def forward(self,x):
        layer1=self.dense1(x)
        layer2=self.dense2(layer1)
        layer3=self.dense3(layer2)
        return layer3

net_hybrid=MLP_Hybrid()
net_hybrid.initialize()

print('Before hybridizing: %.4f sec'%(benchmark(net_hybrid, x_bench)))
net_hybrid.hybridize()
print('After hybridizing: %.4f'%(benchmark(net_hybrid, x_bench)))

Before hybridizing: 0.5161 sec
After hybridizing: 0.2285


#### Saving and Loading your models

In [82]:
file_name='../../../data/crashCourse/models/layer.params'
layer.save_parameters(file_name)

In [83]:
def build_model():
    layer=nn.Dense(5,in_units=3,activation='relu')
    return layer

layer_new=build_model()

In [84]:
layer_new.load_parameters(file_name)

#### Save/load the model weights/parameters and the architectures

In [85]:
net_hybrid.export('../../../data/crashCourse/models/MLP_hybrid')

('../../../data/crashCourse/models/MLP_hybrid-symbol.json',
 '../../../data/crashCourse/models/MLP_hybrid-0000.params')

In [86]:
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    net_loaded=nn.SymbolBlock.imports('../../../data/crashCourse/models/MLP_hybrid-symbol.json',['data'], '../../../data/crashCourse/models/MLP_hybrid-0000.params',device=None)

In [87]:
net_loaded(x_bench)

array([[-0.08079498,  0.10126774]])

#### Visualizing Models

In [88]:
layer.summary(x)

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                                     (10, 3)               0
        Activation-1                                     (10, 5)               0
             Dense-2                                     (10, 5)              20
Parameters in forward computation graph, duplicate included
   Total params: 20
   Trainable params: 20
   Non-trainable params: 0
Shared params in forward computation graph: 0
Unique parameters in model: 20
--------------------------------------------------------------------------------


In [89]:
lenet.summary(image_data)

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                              (1, 1, 28, 28)               0
        Activation-1                              (1, 6, 26, 26)               0
            Conv2D-2                              (1, 6, 26, 26)              60
         MaxPool2D-3                              (1, 6, 13, 13)               0
        Activation-4                             (1, 16, 11, 11)               0
            Conv2D-5                             (1, 16, 11, 11)             880
         MaxPool2D-6                               (1, 16, 5, 5)               0
        Activation-7                                    (1, 120)               0
             Dense-8                                    (1, 120)           48120
        Activation-9                                     (1, 84)               0
            Dense-10        

In [90]:
net_hybrid_summary=MLP_Hybrid()

net_hybrid_summary.initialize()

net_hybrid_summary.summary(x_bench)

net_hybrid_summary.hybridize()

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                                    (1, 521)               0
        Activation-1                                    (1, 256)               0
             Dense-2                                    (1, 256)          133632
        Activation-3                                    (1, 123)               0
             Dense-4                                    (1, 123)           31611
             Dense-5                                      (1, 2)             248
        MLP_Hybrid-6                                      (1, 2)               0
Parameters in forward computation graph, duplicate included
   Total params: 165491
   Trainable params: 165491
   Non-trainable params: 0
Shared params in forward computation graph: 0
Unique parameters in model: 165491
---------------------------------------------------