In [37]:
import mxnet as mx
import numpy as np
from mxnet import nd, autograd, gluon
from mxnet.gluon import nn, Block
mx.random.seed(1)

###########################
#  Speficy the context we'll be using
###########################
ctx = mx.cpu()

###########################
#  Load up our dataset
###########################
batch_size = 64
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [8]:
#Custom Layer
# Returns a Tensor with a Mean of Zero
class CenteredLayer(Block):
    def __init__(self, **kwargs):
        super(CenteredLayer, self).__init__(**kwargs)

    #This is the only `custom` operation 
    def forward(self, x):
        return x - nd.mean(x)

In [10]:
#The output of the `net` call is a mean centered version
# of [1,2,3,4,5]
net = CenteredLayer()
net(nd.array([1,2,3,4,5]))


[-2. -1.  0.  1.  2.]
<NDArray 5 @cpu(0)>

In [12]:
#Insert the custom Block into an existing net with
# more sophisticated, Blocks
net2 = nn.Sequential()
net2.add(nn.Dense(128))
net2.add(nn.Dense(10))
net2.add(CenteredLayer())

In [13]:
#Need to initialize the parameters of the Dense Blocks
# in net2
net2.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [15]:
for data, _ in train_data:
    data = data.as_in_context(ctx)
    break
output = net2(data[0:1])

print(output)
print(nd.mean(output))

#NOTE: The mean is rather large for rounding/truncation error
# because MXNet uses low precision arithmetic operations. This
# results in significant speedups and 'doesn't affect' most deep
# learning algorithms. 
# - This may be an issue for NN that output probabilities)


[[ 0.01874375 -0.04043868 -0.06232345  0.40486944 -0.58767474  0.84935367
   0.30716246 -0.31174922 -0.31321037 -0.26473284]]
<NDArray 1x10 @cpu(0)>

[  5.96046457e-09]
<NDArray 1 @cpu(0)>


---

### Custom Layers with Parameters

In [17]:
# Toy Example with mxnet.Parameter defined outside of a Block

# Assign a name
# Specify gradient calculation 
# Specify shapes of each parameter
my_param = gluon.Parameter("exciting_parameter_yay"
                           , grad_req='write'
                           , shape=(5,5))
print(my_param)

my_param.initialize(mx.init.Xavier(magnitude=2.24)
                    , ctx=ctx)
print(my_param.data())

Parameter exciting_parameter_yay (shape=(5, 5), dtype=<type 'numpy.float32'>)

[[-0.6098488  -0.28859827  0.28575182  0.66764957 -0.03796715]
 [-0.03556556  0.1872465   0.00136471  0.00830621 -0.36514667]
 [-0.44752467 -0.57045501 -0.11451089  0.34310347  0.50825721]
 [-0.40443578  0.48336524 -0.51013076 -0.11458552 -0.14575809]
 [ 0.22497982 -0.09228575  0.34368902  0.28922212  0.41042238]]
<NDArray 5x5 @cpu(0)>


### Parameter Dictionaries
 - In practice, we’ll rarely instantiate our own ParameterDict. That’s because whenever we call the Block constructor it’s generated automatically. 

In [48]:
#Create a Parameter dictionary
pd = gluon.ParameterDict(prefix = 'block1_')

# Instantiate a new Parameter with pd.get()
pd.get("exciting_parameter_yay"
       , grad_req='write'
       , shape=(5,5))

# Check names of parameters in `pd`
print(pd.keys())

print(pd[pd.keys()[0]])


['block1_exciting_parameter_yay']
Parameter block1_exciting_parameter_yay (shape=(5, 5), dtype=<type 'numpy.float32'>)


### Build a Fully-Connected gluon Layer

In [30]:
def relu(X):
    return nd.maximum(X, 0)

In [31]:
# Define a Block
class MyDense(Block):
    ####################
    # We add arguments to our constructor (__init__)
    # to indicate the number of input units (``in_units``)
    # and output units (``units``)
    ####################
    def __init__(self, units, in_units=0, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        with self.name_scope():
            self.units = units
            self._in_units = in_units
            #################
            # We add the required parameters to the ``Block``'s ParameterDict ,
            # indicating the desired shape
            #################
            self.weight = self.params.get(
                'weight', init=mx.init.Xavier(magnitude=2.24),
                shape=(in_units, units))
            self.bias = self.params.get('bias', shape=(units,))

    #################
    #  Now we just have to write the forward pass.
    #  We could rely upong the FullyConnected primitive in NDArray,
    #  but it's better to get our hands dirty and write it out
    #  so you'll know how to compose arbitrary functions
    #################
    def forward(self, x):
        with x.context:
            linear = nd.dot(x, self.weight.data()) + self.bias.data()
            activation = relu(linear)
            return activation

In [32]:
#instantiate and initialize custom layer
dense = MyDense(20,in_units=10)
dense.collect_params().initialize(ctx=ctx)

In [33]:
# Check Parameters of `dense`
dense.params

mydense1_ (
  Parameter mydense1_weight (shape=(10, 20), dtype=<type 'numpy.float32'>)
  Parameter mydense1_bias (shape=(20,), dtype=<type 'numpy.float32'>)
)

In [34]:
# Run data through custom Block
dense(nd.ones(shape = (2,10)))


[[ 0.          0.          1.17950749  0.          0.          0.33774683
   0.          0.2917698   0.36031649  0.          0.41916567  1.34597456
   0.          0.          1.0746069   0.8672781   0.          0.
   0.37210393  1.5765295 ]
 [ 0.          0.          1.17950749  0.          0.          0.33774683
   0.          0.2917698   0.36031649  0.          0.41916567  1.34597456
   0.          0.          1.0746069   0.8672781   0.          0.
   0.37210393  1.5765295 ]]
<NDArray 2x20 @cpu(0)>

### Building an MLP with Custom Block

In [41]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(MyDense(128, in_units=784))
    net.add(MyDense(64, in_units=128))
    net.add(MyDense(10, in_units=64))

### Initialize Parameters

In [42]:
net.collect_params().initialize(ctx = ctx)

### Instantiate a Loss

In [43]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()

### Optimizer

In [44]:
trainer = gluon.Trainer(net.collect_params()
                        ,'sgd'
                        ,{'learning_rate':0.1} 
                       )

### Evaluation Metric

In [45]:
metric = mx.metric.Accuracy()

def evaluate_accuracy(data_iterator, net):
    numerator = 0.
    denominator = 0.

    for i, (data, label) in enumerate(data_iterator):
        with autograd.record():
            data = data.as_in_context(ctx).reshape((-1,784))
            label = label.as_in_context(ctx)
            label_one_hot = nd.one_hot(label, 10)
            output = net(data)

        metric.update([label], [output])
    return metric.get()[1]

### Execute Training

In [46]:
epochs = 2  # Low number for testing, set higher when you run!
moving_loss = 0.

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx).reshape((-1,784))
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            cross_entropy = loss(output, label)
            cross_entropy.backward()
        trainer.step(data.shape[0])

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Train_acc %s, Test_acc %s" % (e, train_accuracy, test_accuracy))

Epoch 0. Train_acc 0.742, Test_acc 0.7383
Epoch 1. Train_acc 0.749342857143, Test_acc 0.7432625
