In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import copy
import sys

sys.path.append('./mcunet')

from mcunet.gumbel_module.gumbel_net import GumbelMCUNets

from mcunet.tinynas.nn.modules import MBInvertedConvLayer
from mcunet.tinynas.nn.networks import MobileInvertedResidualBlock
from mcunet.model_zoo import build_model

from mcunet.utils import MyModule, MyNetwork, SEModule, build_activation, get_same_padding, sub_filter_start_end
from mcunet.tinynas.nn.modules import ZeroLayer, set_layer_from_config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)

In [3]:
gubmel_config = {'global_expand_ratio_list':[1,3,5,6], 'global_kernel_size_list':[3,5,7], 'gumbel_feature_extract_block':2}
net = GumbelMCUNets.build_from_config(model.config, gubmel_config)
net.load_pretrained_mcunet_param(model)

0 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 32, 'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'expand_ratio': 1, 'mid_channels': None, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
1 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 16, 'out_channels': 24, 'kernel_size': 7, 'stride': 2, 'expand_ratio': 3, 'mid_channels': 48, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
load first_conv.conv.weight params (torch.Size([32, 3, 3, 3]))
load first_conv.bn.weight params (torch.Size([32]))
load first_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.conv.weight params (torch.Size([32, 1, 3, 3]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.weight params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.point_linear.co

In [5]:
inputs = torch.randn(16, 3, 160, 160)

out = net.forward_original(inputs)
out2 = model.forward(inputs)
+
print(out - out2)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], grad_fn=<SubBackward0>)


In [22]:
print("before forward grad : ", net.gumbel_fc1.weight.grad)
out = net(torch.randn(32, 3, 160, 160))

out.sum().backward()

print("after forward grad : \n", net.gumbel_fc1.weight.grad)

before forward grad :  None
0 idx 0 expand 3 kernel 1
gumbel shape :  torch.Size([32, 3])
1 idx 3 expand 1 kernel 2
gumbel shape :  torch.Size([32, 2])
1 4 3 5 torch.Size([96, 1, 5, 5])
2 idx 5 expand 3 kernel 3
3 idx 11 expand 1 kernel 1
4 idx 11 expand 1 kernel 3
gumbel shape :  torch.Size([32, 3])
1 6 5 7 torch.Size([160, 1, 7, 7])
1 4 3 5 torch.Size([160, 1, 7, 7])
5 idx 14 expand 2 kernel 3
6 idx 19 expand 2 kernel 1
gumbel shape :  torch.Size([32, 2])
7 idx 21 expand 2 kernel 3
gumbel shape :  torch.Size([32, 5])
1 6 5 7 torch.Size([240, 1, 7, 7])
1 4 3 5 torch.Size([240, 1, 7, 7])
8 idx 26 expand 1 kernel 1
9 idx 26 expand 2 kernel 2
gumbel shape :  torch.Size([32, 4])
1 4 3 5 torch.Size([288, 1, 5, 5])
10 idx 30 expand 2 kernel 2
gumbel shape :  torch.Size([32, 4])
1 4 3 5 torch.Size([288, 1, 5, 5])
11 idx 34 expand 1 kernel 3
12 idx 37 expand 2 kernel 3
gumbel shape :  torch.Size([32, 5])
1 6 5 7 torch.Size([576, 1, 7, 7])
1 4 3 5 torch.Size([576, 1, 7, 7])
13 idx 42 expand 2 

tensor([[ 1.0600e-06, -1.1182e-06, -4.1251e-06,  ...,  7.1878e-06,
          1.6874e-06,  2.5609e-06],
        [ 6.7503e-06,  5.0171e-06, -5.8395e-06,  ...,  6.9475e-07,
         -1.2265e-06,  8.7869e-06],
        [-3.8922e-06, -2.3463e-06, -9.2838e-07,  ...,  4.8356e-06,
         -4.1079e-06, -1.0903e-06],
        ...,
        [ 3.7491e-06,  4.1430e-06, -8.1688e-06,  ..., -1.1423e-05,
         -5.2271e-06, -8.3704e-06],
        [ 2.5260e-06,  1.3305e-06,  1.4562e-08,  ...,  9.1271e-06,
         -3.0333e-06,  3.6833e-06],
        [ 2.8772e-07, -1.1172e-06,  2.8125e-06,  ...,  7.8197e-07,
          3.9362e-06, -2.5796e-07]])

In [11]:
for n, p in net.named_parameters():
    if has_deep_attr(model, n):
        print(n)

first_conv.conv.weight
first_conv.bn.weight
first_conv.bn.bias
blocks.0.mobile_inverted_conv.depth_conv.conv.weight
blocks.0.mobile_inverted_conv.depth_conv.bn.weight
blocks.0.mobile_inverted_conv.depth_conv.bn.bias
blocks.0.mobile_inverted_conv.point_linear.conv.weight
blocks.0.mobile_inverted_conv.point_linear.bn.weight
blocks.0.mobile_inverted_conv.point_linear.bn.bias
blocks.1.mobile_inverted_conv.inverted_bottleneck.conv.weight
blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.weight
blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.bias
blocks.1.mobile_inverted_conv.depth_conv.conv.weight
blocks.1.mobile_inverted_conv.depth_conv.bn.weight
blocks.1.mobile_inverted_conv.depth_conv.bn.bias
blocks.1.mobile_inverted_conv.point_linear.conv.weight
blocks.1.mobile_inverted_conv.point_linear.bn.weight
blocks.1.mobile_inverted_conv.point_linear.bn.bias
blocks.2.mobile_inverted_conv.inverted_bottleneck.conv.weight
blocks.2.mobile_inverted_conv.inverted_bottleneck.bn.weight
blocks.2.mo

In [12]:
model

ProxylessNASNets(
  (first_conv): ConvLayer(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU6(inplace=True)
  )
  (blocks): ModuleList(
    (0): MobileInvertedResidualBlock(
      (mobile_inverted_conv): MBInvertedConvLayer(
        (depth_conv): Sequential(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): ReLU6(inplace=True)
        )
        (point_linear): Sequential(
          (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
    )
    (1): MobileInvertedResidualBlock(
      (mobile_inverted_conv): MBInvertedConvLayer(
        (inverted_b

In [13]:
mbconv_test = MBGumbelInvertedConvLayer.build_from_config(m.mobile_inverted_conv.config)
mbconv_test.config

{'name': 'MBGumbelInvertedConvLayer',
 'in_channels': 16,
 'out_channels': 24,
 'kernel_size': 7,
 'kernel_size_list': [7, 5, 3],
 'stride': 2,
 'expand_ratio': 3,
 'expand_ratio_list': [1, 3],
 'mid_channels': 48,
 'act_func': 'relu6',
 'use_se': False}

In [14]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32))

tensor([[0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0.]], grad_fn=<AddBackward0>)


In [15]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32), gumbel_index)
out.sum().backward()

tensor([[0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.]], grad_fn=<AddBackward0>)
gumbel shape :  torch.Size([2, 5])
1 6 5 7 torch.Size([48, 1, 7, 7])
1 4 3 5 torch.Size([48, 1, 7, 7])


In [16]:
gumbel_layer.weight.grad

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
original_mbconv_test_weight = copy.deepcopy(mbconv_test.depth_conv.conv.weight)
print(original_mbconv_test_weight)

In [None]:
print(m.mobile_inverted_conv.depth_conv.conv.weight)

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n, p)
        set_deep_attr(mbconv_test, n, p)
        print('------------------')

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n)
        print(get_deep_attr(mbconv_test, n) - p)

In [None]:
mbconv_test.forward(torch.randn(1,32,16,16), gumbel=1)

In [None]:
bn_layer = nn.BatchNorm2d(16)

In [None]:
x = torch.randn(1, 12, 32, 32)

In [None]:
feature_dim = 12
out = F.batch_norm(x, bn_layer.running_mean[:feature_dim], bn_layer.running_var[:feature_dim], bn_layer.weight[:feature_dim], bn_layer.bias[:feature_dim])

In [None]:
out.sum().backward()

In [None]:
bn_layer.weight.grad

In [None]:
model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)

backup_model = copy.deepcopy(model)
model_copy = build_model(net_id='mcunet-in4', pretrained=False)[0]

for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1 - p2).sum())

In [None]:
for n, p in model.named_parameters():
    if has_deep_attr(model_copy, n):
        print(n)
        set_deep_attr(model_copy, n, p)

In [None]:
for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1-p2).sum())