In [1]:
import mxnet as mx
from mxnet.gluon import nn
from mxnet.gluon.contrib import nn as nn_contrib
from mxnet import nd
from mxnet import gluon
import numpy as np
ctx = mx.gpu()

In [2]:
global alpha
alpha = 0.25
num_filters = int(32*alpha)

In [3]:
def dp_layer(nfilters, stride, expension_constant):
    out = nn.HybridSequential()
    out.add(nn.Conv2D(nfilters, 3, strides=stride, padding=1, groups=nfilters, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(nfilters*expension_constant, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    return out

In [4]:
from mxnet import gluon
def s16():
    out = nn.HybridSequential()
    # conv_0 layer
    out.add(nn.Conv2D(num_filters, 3, strides=2, padding=1, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_1 layer
    out.add(dp_layer(num_filters, 1, 2))
    # conv_2 layer
    out.add(dp_layer(num_filters*2, 2, 2))
    # conv_3 layer
    out.add(dp_layer(num_filters*4, 1, 1))
    out.add(nn.Conv2D(num_filters*4, 3, strides=2, padding=1, groups=num_filters*4, use_bias=False))
    out.hybridize()
    return out

def s32():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*8, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_4_layer
    out.add(dp_layer(num_filters*8, 1, 1))
    out.add(nn.Conv2D(num_filters*8, 3, strides=2, padding=1, groups=num_filters*8, use_bias=False))
    out.hybridize()
    return out

def fc():
    out = nn.HybridSequential()
    # from last layer
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    out.add(nn.Conv2D(num_filters*16, 1, strides=1, padding=0, use_bias=False))
    out.add(nn.BatchNorm(use_global_stats=False, epsilon=1e-05, momentum=0.9, axis=1))
    out.add(nn.Activation('relu'))
    # conv_5_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    # conv_6_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    # conv_7_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    # conv_8_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    # conv_9_layer
    out.add(dp_layer(num_filters*16, 1, 1))
    # conv_10_layer
    out.add(dp_layer(num_filters*16, 2, 2))
    # conv_11_layer
    out.add(dp_layer(num_filters*32, 1, 1))
    out.add(nn.GlobalAvgPool2D())
    out.add(nn.Flatten())
    out.add(nn.Dense(1000))
    out.hybridize()
    return out

In [5]:
s16 = s16()
s16.initialize()
s32 = s32()
s32.initialize()
fc = fc()
fc.initialize()
x = nd.zeros((2, 3, 256, 256))
s16(x)
s32(x)
fc(x)
s16.save_parameters("process/s16.params")
s32.save_parameters("process/s32.params")
fc.save_parameters("process/fc.params")

In [6]:
s16_params = mx.nd.load("process/s16.params")
sorted(s16_params.keys())

['0.weight',
 '1.beta',
 '1.gamma',
 '1.running_mean',
 '1.running_var',
 '3.0.weight',
 '3.1.beta',
 '3.1.gamma',
 '3.1.running_mean',
 '3.1.running_var',
 '3.3.weight',
 '3.4.beta',
 '3.4.gamma',
 '3.4.running_mean',
 '3.4.running_var',
 '4.0.weight',
 '4.1.beta',
 '4.1.gamma',
 '4.1.running_mean',
 '4.1.running_var',
 '4.3.weight',
 '4.4.beta',
 '4.4.gamma',
 '4.4.running_mean',
 '4.4.running_var',
 '5.0.weight',
 '5.1.beta',
 '5.1.gamma',
 '5.1.running_mean',
 '5.1.running_var',
 '5.3.weight',
 '5.4.beta',
 '5.4.gamma',
 '5.4.running_mean',
 '5.4.running_var',
 '6.weight']

In [7]:
s32_params = mx.nd.load("process/s32.params")
sorted(s32_params.keys())

['0.beta',
 '0.gamma',
 '0.running_mean',
 '0.running_var',
 '2.weight',
 '3.beta',
 '3.gamma',
 '3.running_mean',
 '3.running_var',
 '5.0.weight',
 '5.1.beta',
 '5.1.gamma',
 '5.1.running_mean',
 '5.1.running_var',
 '5.3.weight',
 '5.4.beta',
 '5.4.gamma',
 '5.4.running_mean',
 '5.4.running_var',
 '6.weight']

In [8]:
fc_params = mx.nd.load("process/fc.params")
sorted(fc_params.keys())

['0.beta',
 '0.gamma',
 '0.running_mean',
 '0.running_var',
 '10.0.weight',
 '10.1.beta',
 '10.1.gamma',
 '10.1.running_mean',
 '10.1.running_var',
 '10.3.weight',
 '10.4.beta',
 '10.4.gamma',
 '10.4.running_mean',
 '10.4.running_var',
 '11.0.weight',
 '11.1.beta',
 '11.1.gamma',
 '11.1.running_mean',
 '11.1.running_var',
 '11.3.weight',
 '11.4.beta',
 '11.4.gamma',
 '11.4.running_mean',
 '11.4.running_var',
 '14.bias',
 '14.weight',
 '2.weight',
 '3.beta',
 '3.gamma',
 '3.running_mean',
 '3.running_var',
 '5.0.weight',
 '5.1.beta',
 '5.1.gamma',
 '5.1.running_mean',
 '5.1.running_var',
 '5.3.weight',
 '5.4.beta',
 '5.4.gamma',
 '5.4.running_mean',
 '5.4.running_var',
 '6.0.weight',
 '6.1.beta',
 '6.1.gamma',
 '6.1.running_mean',
 '6.1.running_var',
 '6.3.weight',
 '6.4.beta',
 '6.4.gamma',
 '6.4.running_mean',
 '6.4.running_var',
 '7.0.weight',
 '7.1.beta',
 '7.1.gamma',
 '7.1.running_mean',
 '7.1.running_var',
 '7.3.weight',
 '7.4.beta',
 '7.4.gamma',
 '7.4.running_mean',
 '7.4.runni

In [9]:
mn_dist_params = mx.nd.load("process/mobilenet0.25-distilled.params")
sorted(mn_dist_params.keys())

['fc.0.beta',
 'fc.0.gamma',
 'fc.0.running_mean',
 'fc.0.running_var',
 'fc.10.0.weight',
 'fc.10.1.beta',
 'fc.10.1.gamma',
 'fc.10.1.running_mean',
 'fc.10.1.running_var',
 'fc.10.3.weight',
 'fc.10.4.beta',
 'fc.10.4.gamma',
 'fc.10.4.running_mean',
 'fc.10.4.running_var',
 'fc.11.0.weight',
 'fc.11.1.beta',
 'fc.11.1.gamma',
 'fc.11.1.running_mean',
 'fc.11.1.running_var',
 'fc.11.3.weight',
 'fc.11.4.beta',
 'fc.11.4.gamma',
 'fc.11.4.running_mean',
 'fc.11.4.running_var',
 'fc.14.bias',
 'fc.14.weight',
 'fc.2.weight',
 'fc.3.beta',
 'fc.3.gamma',
 'fc.3.running_mean',
 'fc.3.running_var',
 'fc.5.0.weight',
 'fc.5.1.beta',
 'fc.5.1.gamma',
 'fc.5.1.running_mean',
 'fc.5.1.running_var',
 'fc.5.3.weight',
 'fc.5.4.beta',
 'fc.5.4.gamma',
 'fc.5.4.running_mean',
 'fc.5.4.running_var',
 'fc.6.0.weight',
 'fc.6.1.beta',
 'fc.6.1.gamma',
 'fc.6.1.running_mean',
 'fc.6.1.running_var',
 'fc.6.3.weight',
 'fc.6.4.beta',
 'fc.6.4.gamma',
 'fc.6.4.running_mean',
 'fc.6.4.running_var',
 'fc

In [12]:
s16_dist_params = dict()
s16_dist_params['0.weight']=mn_dist_params['s16.0.weight']
s16_dist_params['1.beta']=mn_dist_params['s16.1.beta']
s16_dist_params['1.gamma']=mn_dist_params['s16.1.gamma']
s16_dist_params['1.running_mean']=mn_dist_params['s16.1.running_mean']
s16_dist_params['1.running_var']=mn_dist_params['s16.1.running_var']
s16_dist_params['3.0.weight']=mn_dist_params['s16.3.0.weight']
s16_dist_params['3.1.beta']=mn_dist_params['s16.3.1.beta']
s16_dist_params['3.1.gamma']=mn_dist_params['s16.3.1.beta']
s16_dist_params['3.1.running_mean']=mn_dist_params['s16.3.1.gamma']
s16_dist_params['3.1.running_var']=mn_dist_params['s16.3.1.running_mean']
s16_dist_params['3.3.weight']=mn_dist_params['s16.3.3.weight']
s16_dist_params['3.4.beta']=mn_dist_params['s16.3.4.beta']
s16_dist_params['3.4.gamma']=mn_dist_params['s16.3.4.gamma']
s16_dist_params['3.4.running_mean']=mn_dist_params['s16.3.4.running_mean']
s16_dist_params['3.4.running_var']=mn_dist_params['s16.3.4.running_var']
s16_dist_params['4.0.weight']=mn_dist_params['s16.4.0.weight']
s16_dist_params['4.1.beta']=mn_dist_params['s16.4.1.beta']
s16_dist_params['4.1.gamma']=mn_dist_params['s16.4.1.gamma']
s16_dist_params['4.1.running_mean']=mn_dist_params['s16.4.1.running_mean']
s16_dist_params['4.1.running_var']=mn_dist_params['s16.4.1.running_var']
s16_dist_params['4.3.weight']=mn_dist_params['s16.4.3.weight']
s16_dist_params['4.4.beta']=mn_dist_params['s16.4.4.beta']
s16_dist_params['4.4.gamma']=mn_dist_params['s16.4.4.gamma']
s16_dist_params['4.4.running_mean']=mn_dist_params['s16.4.4.running_mean']
s16_dist_params['4.4.running_var']=mn_dist_params['s16.4.4.running_var']
s16_dist_params['5.0.weight']=mn_dist_params['s16.5.0.weight']
s16_dist_params['5.1.beta']=mn_dist_params['s16.5.1.beta']
s16_dist_params['5.1.gamma']=mn_dist_params['s16.5.1.gamma']
s16_dist_params['5.1.running_mean']=mn_dist_params['s16.5.1.running_mean']
s16_dist_params['5.1.running_var']=mn_dist_params['s16.5.1.running_var']
s16_dist_params['5.3.weight']=mn_dist_params['s16.5.3.weight']
s16_dist_params['5.4.beta']=mn_dist_params['s16.5.4.beta']
s16_dist_params['5.4.gamma']=mn_dist_params['s16.5.4.gamma']
s16_dist_params['5.4.running_mean']=mn_dist_params['s16.5.4.running_mean']
s16_dist_params['5.4.running_var']=mn_dist_params['s16.5.4.running_var']
s16_dist_params['6.weight']=mn_dist_params['s16.6.weight']

In [13]:
s32_dist_params = dict()
s32_dist_params['0.beta']=mn_dist_params['s32.0.beta']
s32_dist_params['0.gamma']=mn_dist_params['s32.0.gamma']
s32_dist_params['0.running_mean']=mn_dist_params['s32.0.running_mean']
s32_dist_params['0.running_var']=mn_dist_params['s32.0.running_var']
s32_dist_params['2.weight']=mn_dist_params['s32.2.weight']
s32_dist_params['3.beta']=mn_dist_params['s32.3.beta']
s32_dist_params['3.gamma']=mn_dist_params['s32.3.gamma']
s32_dist_params['3.running_mean']=mn_dist_params['s32.3.running_mean']
s32_dist_params['3.running_var']=mn_dist_params['s32.3.running_var']
s32_dist_params['5.0.weight']=mn_dist_params['s32.5.0.weight']
s32_dist_params['5.1.beta']=mn_dist_params['s32.5.1.beta']
s32_dist_params['5.1.gamma']=mn_dist_params['s32.5.1.gamma']
s32_dist_params['5.1.running_mean']=mn_dist_params['s32.5.1.running_mean']
s32_dist_params['5.1.running_var']=mn_dist_params['s32.5.1.running_var']
s32_dist_params['5.3.weight']=mn_dist_params['s32.5.3.weight']
s32_dist_params['5.4.beta']=mn_dist_params['s32.5.4.beta']
s32_dist_params['5.4.gamma']=mn_dist_params['s32.5.4.gamma']
s32_dist_params['5.4.running_mean']=mn_dist_params['s32.5.4.running_mean']
s32_dist_params['5.4.running_var']=mn_dist_params['s32.5.4.running_var']
s32_dist_params['6.weight']=mn_dist_params['s32.6.weight']

In [14]:
fc_dist_params = dict()
fc_dist_params['0.beta']=mn_dist_params['fc.0.beta']
fc_dist_params['0.gamma']=mn_dist_params['fc.0.gamma']
fc_dist_params['0.running_mean']=mn_dist_params['fc.0.running_mean']
fc_dist_params['0.running_var']=mn_dist_params['fc.0.running_var']
fc_dist_params['2.weight']=mn_dist_params['fc.2.weight']
fc_dist_params['3.beta']=mn_dist_params['fc.3.beta']
fc_dist_params['3.gamma']=mn_dist_params['fc.3.gamma']
fc_dist_params['3.running_mean']=mn_dist_params['fc.3.running_mean']
fc_dist_params['3.running_var']=mn_dist_params['fc.3.running_var']
fc_dist_params['5.0.weight']=mn_dist_params['fc.5.0.weight']
fc_dist_params['5.1.beta']=mn_dist_params['fc.5.1.beta']
fc_dist_params['5.1.gamma']=mn_dist_params['fc.5.1.gamma']
fc_dist_params['5.1.running_mean']=mn_dist_params['fc.5.1.running_mean']
fc_dist_params['5.1.running_var']=mn_dist_params['fc.5.1.running_var']
fc_dist_params['5.3.weight']=mn_dist_params['fc.5.3.weight']
fc_dist_params['5.4.beta']=mn_dist_params['fc.5.4.beta']
fc_dist_params['5.4.gamma']=mn_dist_params['fc.5.4.gamma']
fc_dist_params['5.4.running_mean']=mn_dist_params['fc.5.4.running_mean']
fc_dist_params['5.4.running_var']=mn_dist_params['fc.5.4.running_var']
fc_dist_params['6.0.weight']=mn_dist_params['fc.6.0.weight']
fc_dist_params['6.1.beta']=mn_dist_params['fc.6.1.beta']
fc_dist_params['6.1.gamma']=mn_dist_params['fc.6.1.gamma']
fc_dist_params['6.1.running_mean']=mn_dist_params['fc.6.1.running_mean']
fc_dist_params['6.1.running_var']=mn_dist_params['fc.6.1.running_var']
fc_dist_params['6.3.weight']=mn_dist_params['fc.6.3.weight']
fc_dist_params['6.4.beta']=mn_dist_params['fc.6.4.beta']
fc_dist_params['6.4.gamma']=mn_dist_params['fc.6.4.gamma']
fc_dist_params['6.4.running_mean']=mn_dist_params['fc.6.4.running_mean']
fc_dist_params['6.4.running_var']=mn_dist_params['fc.6.4.running_var']
fc_dist_params['7.0.weight']=mn_dist_params['fc.7.0.weight']
fc_dist_params['7.1.beta']=mn_dist_params['fc.7.1.beta']
fc_dist_params['7.1.gamma']=mn_dist_params['fc.7.1.gamma']
fc_dist_params['7.1.running_mean']=mn_dist_params['fc.7.1.running_mean']
fc_dist_params['7.1.running_var']=mn_dist_params['fc.7.1.running_var']
fc_dist_params['7.3.weight']=mn_dist_params['fc.7.3.weight']
fc_dist_params['7.4.beta']=mn_dist_params['fc.7.4.beta']
fc_dist_params['7.4.gamma']=mn_dist_params['fc.7.4.gamma']
fc_dist_params['7.4.running_mean']=mn_dist_params['fc.7.4.running_mean']
fc_dist_params['7.4.running_var']=mn_dist_params['fc.7.4.running_var']
fc_dist_params['8.0.weight']=mn_dist_params['fc.8.0.weight']
fc_dist_params['8.1.beta']=mn_dist_params['fc.8.1.beta']
fc_dist_params['8.1.gamma']=mn_dist_params['fc.8.1.gamma']
fc_dist_params['8.1.running_mean']=mn_dist_params['fc.8.1.running_mean']
fc_dist_params['8.1.running_var']=mn_dist_params['fc.8.1.running_var']
fc_dist_params['8.3.weight']=mn_dist_params['fc.8.3.weight']
fc_dist_params['8.4.beta']=mn_dist_params['fc.8.4.beta']
fc_dist_params['8.4.gamma']=mn_dist_params['fc.8.4.gamma']
fc_dist_params['8.4.running_mean']=mn_dist_params['fc.8.4.running_mean']
fc_dist_params['8.4.running_var']=mn_dist_params['fc.8.4.running_var']
fc_dist_params['9.0.weight']=mn_dist_params['fc.9.0.weight']
fc_dist_params['9.1.beta']=mn_dist_params['fc.9.1.beta']
fc_dist_params['9.1.gamma']=mn_dist_params['fc.9.1.gamma']
fc_dist_params['9.1.running_mean']=mn_dist_params['fc.9.1.running_mean']
fc_dist_params['9.1.running_var']=mn_dist_params['fc.9.1.running_var']
fc_dist_params['9.3.weight']=mn_dist_params['fc.9.3.weight']
fc_dist_params['9.4.beta']=mn_dist_params['fc.9.4.beta']
fc_dist_params['9.4.gamma']=mn_dist_params['fc.9.4.gamma']
fc_dist_params['9.4.running_mean']=mn_dist_params['fc.9.4.running_mean']
fc_dist_params['9.4.running_var']=mn_dist_params['fc.9.4.running_var']
fc_dist_params['10.0.weight']=mn_dist_params['fc.10.0.weight']
fc_dist_params['10.1.beta']=mn_dist_params['fc.10.1.beta']
fc_dist_params['10.1.gamma']=mn_dist_params['fc.10.1.gamma']
fc_dist_params['10.1.running_mean']=mn_dist_params['fc.10.1.running_mean']
fc_dist_params['10.1.running_var']=mn_dist_params['fc.10.1.running_var']
fc_dist_params['10.3.weight']=mn_dist_params['fc.10.3.weight']
fc_dist_params['10.4.beta']=mn_dist_params['fc.10.4.beta']
fc_dist_params['10.4.gamma']=mn_dist_params['fc.10.4.gamma']
fc_dist_params['10.4.running_mean']=mn_dist_params['fc.10.4.running_mean']
fc_dist_params['10.4.running_var']=mn_dist_params['fc.10.4.running_var']
fc_dist_params['11.0.weight']=mn_dist_params['fc.11.0.weight']
fc_dist_params['11.1.beta']=mn_dist_params['fc.11.1.beta']
fc_dist_params['11.1.gamma']=mn_dist_params['fc.11.1.gamma']
fc_dist_params['11.1.running_mean']=mn_dist_params['fc.11.1.running_mean']
fc_dist_params['11.1.running_var']=mn_dist_params['fc.11.1.running_var']
fc_dist_params['11.3.weight']=mn_dist_params['fc.11.3.weight']
fc_dist_params['11.4.beta']=mn_dist_params['fc.11.4.beta']
fc_dist_params['11.4.gamma']=mn_dist_params['fc.11.4.gamma']
fc_dist_params['11.4.running_mean']=mn_dist_params['fc.11.4.running_mean']
fc_dist_params['11.4.running_var']=mn_dist_params['fc.11.4.running_var']
fc_dist_params['14.bias']=mn_dist_params['fc.14.bias']
fc_dist_params['14.weight']=mn_dist_params['fc.14.weight']

In [15]:
mx.nd.save("process/mobilenet_0_25_s16_dist.params",s16_dist_params)
mx.nd.save("process/mobilenet_0_25_s32_dist.params",s32_dist_params)
mx.nd.save("process/mobilenet_0_25_fc_dist.params",fc_dist_params)