### Usage of Symbol in MXNET

### basic operators

In [8]:
import sys,os
sys.path.append('/data/guest_users/mingdai/faster-rcnn')
import mxnet as mx 

In [10]:
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
c = a + b
(a,b,c)
# here there will be automatically symbol for c 

(<Symbol a>, <Symbol b>, <Symbol _plus1>)

Most **NDArray** operators can be applied to **Symbol**, for example:

In [15]:
# elementwise times
d = a * b
# dot, matrix mulitiplication
e = mx.sym.dot(a,b)
(d,e)
# reshape 
f = mx.sym.Reshape(d+e,shape=(1,4))
g = mx.sym.broadcast_to(f,shape=(2,4))
# mx.viz.plot_network(symbol = g)

ImportError: Draw network requires graphviz library

In [18]:
net = mx.sym.Variable('data')
net = mx.sym.FullyConnected(data = net, name = 'fc1', num_hidden=128)
net = mx.sym.Activation(data = net, name= 'relu1',act_type = 'relu')
net = mx.sym.FullyConnected(data = net, name = 'fc2',num_hidden = 10)
net = mx.sym.SoftmaxOutput(data = net, name='out')


##### modulelized Construction for Deep Networks

In [None]:
def ConvFactory(data, num_filter, kernel, stride=(1,1),pad = (0,0),name =None, suffix=''):
    conv = mx.symbol.Convolution(data = data,num_filter = num_filter, kernel = kernel, stride = stride, pad = pad, 
                                 name = 'con_%s%s'%(name,suffix))
    bn = mx.symbol.BatchNorm(data = conv, name = 'bn_%s%s'%(name,suffix))
    act = mx.symbol.Activation(data = bn, act_type='relu', name = 'relu%s%s'%(name,suffix))
    return act
prev = mx.symbol.Variable(name = 'Previous Output')
conv_comp = ConvFactory(data = prev, num_filter=64, kernel = (7,7), stride = (2,2))
shape = {"Previous Output": (128,3,28,28)}

Then we can define a function that constructs an Inception module based on **ConvFactory**

In [21]:
def InceptionFactoryA(data, num_1X1, num_3X3red, num_3X3, num_d3X3red, num_d3X3, pool, proj, name):
    # 1X1 
    c1X1 = ConvFactory(data = data, num_filter = num_1X1,kernel=(1,1),name=('%s_1X1'%name))
    # 3X3 reduce + 3X3 
    c3X3r = ConvFactory(data = data, num_filter = num_3X3red, kernel=(1,1),name=('%s_3X3'%name),suffix='_reduce')
    c3X3 = ConvFactory(data = c3X3r, num_filter = num_3X3, kernel= (3,3),pad=(1,1),name=('%s_3X3'% name))
    
    # double 3x3 recue + double 3x3 
    cd3x3r = ConvFactory(data=data, num_filter = num_d3X3red, kernel=(1,1),name=('%s_double_3x3'%name),suffix='_reduce')
    cd3x3 = COnvFactory(data=cd3x3r, num_filter = num_d3x3, kernel=(3,3),pad=(1,1), name=('%s_double_3x3_1'%name))
    
    
    # not finished !
    return

#### Shape Inference
for each symbol, we can query its inputs(or arguments) and outputs. 

In [23]:
arg_name = c.list_arguments() # get the names of the inputs 
out_name = c.list_outputs() # get the names of the outputs
arg_shape, out_shape, _ = c.infer_shape(a=(2,3),b=(2,3))

{'input':dict(zip(arg_name,arg_shape)),'output':dict(zip(out_name,out_shape))}

{'input': {'a': (2L, 3L), 'b': (2L, 3L)},
 'output': {'_plus1_output': (2L, 3L)}}

#### Bind with Data and Evaluate

In [33]:
ex = c.bind(ctx=mx.cpu(),args={'a':mx.nd.ones((2,3)),
                               'b':mx.nd.ones((2,3))})
ex.forward()
print 'numver of outputs = %d\n the first output =\n%s' %(len(ex.outputs),ex.outputs[0].asnumpy())

numver of outputs = 1
 the first output =
[[ 2.  2.  2.]
 [ 2.  2.  2.]]


In [37]:
# with GPUs
ex_gpu = c.bind(ctx=mx.gpu(2),args={'a':mx.nd.ones((3,4),mx.gpu(2))*2,
                                    'b':mx.nd.ones((3,4),mx.gpu(2))*3})
ex_gpu.forward()
ex_gpu.outputs[0].asnumpy()

array([[ 5.,  5.,  5.,  5.],
       [ 5.,  5.,  5.,  5.],
       [ 5.,  5.,  5.,  5.]], dtype=float32)

#### Load and Save
we can laso using *pickle* or simple *save or load* directly.  
Here instead of binary format, Symbol uses json format, which si more readable for serialization. The **tojson** method returns the json string. 

In [29]:
print(c.tojson())
c.save('symbol-c.json')
c2 = mx.symbol.load('symbol-c.json')
c.tojson() == c2.tojson()

{
  "nodes": [
    {
      "op": "null", 
      "name": "a", 
      "inputs": []
    }, 
    {
      "op": "null", 
      "name": "b", 
      "inputs": []
    }, 
    {
      "op": "elemwise_add", 
      "name": "_plus1", 
      "inputs": [[0, 0, 0], [1, 0, 0]]
    }
  ], 
  "arg_nodes": [0, 1], 
  "node_row_ptr": [0, 1, 2, 3], 
  "heads": [[2, 0, 0]], 
  "attrs": {"mxnet_version": ["int", 904]}
}


True

#### Customized Symbol 
To implement an operator in Python, we just need to define the two computation methods **forward** and **backward** with several methods for querying the properties, such as *list_arguments* and *infer_shape* 

> we first create a subclass of **mx.operator.CustomOp**  and then define **forward** and **backward**

In [32]:
class Softmax(mx.operator.CustomOp):
    def forward(self,is_train,req,in_data, out_data,aux):
        x = in_data[0].asnumpy()
        y = np.exp(x - x.max(axis=1).reshape((x.shape[0],1) ))
        y /= y.sum(axis =1).reshpae((x.shape[0],1))
        self.assign(out_data[0],req[0],mx.nd.array(y))
    
    def backward(self,req, out_grad, in_data, out_data, in_grad, aux):
        l = in_data[1].asnumpy().ravel().astype(np.int)
        y = out_data[0].asnumpy()
        y[np.arange(l.shape[0]),l] -=1.0
        self.assign(in_grad[0],req[0],mx.nd.array(y))
        
@mx.operator.register("softmax")
class SfotmaxProp(mx.operator.CustomOpProp):
    def __init__(self):
        # softmax is a loss layer sos we don't need the gradient input 
        # from layers above. 
        super(SoftmaxProp,self).__init__(need_top_grad=False)
    def list_arguments(self):
        return ['data','label']
    def infer_shape(self,in_shape):
        data_shape = in_shape[0]
        lable_shape = (in_shape[0][0],)
        output_shape = in_shape[0]
        return [data_shape, label_shape], [output_shape],[]
    
    def create_operator(self, ctx,shapes, dtypes):
        return Softmax()
    