In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import copy
import sys

sys.path.append('./mcunet')

from mcunet.gumbel_module.gumbel_net import GumbelMCUNets
from mcunet.gumbel_module.gumbel_layer import MBGumbelInvertedConvLayer, MobileGumbelInvertedResidualBlock
from mcunet.tinynas.nn.modules import MBInvertedConvLayer
from mcunet.tinynas.nn.networks import MobileInvertedResidualBlock
from mcunet.model_zoo import build_model

from mcunet.utils import MyModule, MyNetwork, SEModule, build_activation, get_same_padding, sub_filter_start_end
from mcunet.tinynas.nn.modules import ZeroLayer, set_layer_from_config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ori_model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)
gubmel_config = {'global_expand_ratio_list':[1,3,4,5,6], 'global_kernel_size_list':[3,5,7], 'gumbel_feature_extract_block':2}
gumbel_model = GumbelMCUNets.build_from_config(ori_model.config, gubmel_config)
gumbel_model.load_pretrained_mcunet_param(ori_model)

inputs = torch.randn(16, 3, 160, 160)

out = gumbel_model.forward_original(inputs)
out2 = ori_model.forward(inputs)
print((out - out2).sum())

0 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 32, 'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'expand_ratio': 1, 'mid_channels': None, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
1 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 16, 'out_channels': 24, 'kernel_size': 7, 'stride': 2, 'expand_ratio': 3, 'mid_channels': 48, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
load first_conv.conv.weight params (torch.Size([32, 3, 3, 3]))
load first_conv.bn.weight params (torch.Size([32]))
load first_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.conv.weight params (torch.Size([32, 1, 3, 3]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.weight params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.point_linear.co

In [3]:
gumbel_model.forward(inputs)

0 idx : 4 1
expand
55
tensor([[0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.]], grad_fn=<AddBackward0>)
1 idx : 3 2
expand and kernel
gumbel one hot shape : torch.Size([16, 5])
55
tensor([[0., 1., 0., 1., 0.],
        [0., 0., 1., 1., 0.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [1., 0., 0., 1., 0.],
        [1., 0., 0., 1., 0.],
        [0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [0., 0., 1., 1., 0.],
        [0., 0., 1., 0., 1.],
        [0., 0., 1., 0., 1.],
        [0., 0., 1., 0., 1.],
        [0.

tensor([[ 0.1199, -1.1755, -1.7459,  ...,  0.6347,  1.3588,  0.6848],
        [-0.4713,  0.9033, -3.0200,  ...,  0.0858,  1.8171,  1.7585],
        [-3.1973, -2.4502, -0.0440,  ...,  0.4652, -2.4316, -0.7511],
        ...,
        [-2.6966, -1.3019, -3.1374,  ..., -0.3173,  0.4488,  1.4645],
        [-0.5879, -1.2749,  0.2184,  ..., -0.7647,  1.0355,  1.9321],
        [ 0.5431,  0.2174,  0.3034,  ...,  0.3808,  0.5486,  0.8299]],
       grad_fn=<AddmmBackward0>)

In [4]:
for i, block in enumerate(gumbel_model.blocks):
    if isinstance(block, MobileGumbelInvertedResidualBlock):
        print(block.shortcut, block.mobile_inverted_conv.depth_conv.conv.stride)

IdentityLayer() (1, 1)
IdentityLayer() (1, 1)
None (2, 2)
IdentityLayer() (1, 1)
IdentityLayer() (1, 1)
None (2, 2)
IdentityLayer() (1, 1)
IdentityLayer() (1, 1)
None (1, 1)
IdentityLayer() (1, 1)
IdentityLayer() (1, 1)
None (2, 2)
IdentityLayer() (1, 1)
IdentityLayer() (1, 1)
None (1, 1)


0 idx : 4 1
expand
55
tensor([[0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.]], grad_fn=<AddBackward0>)
1 idx : 3 2
expand and kernel
55
tensor([[1., 0., 0., 1., 0.],
        [0., 0., 1., 1., 0.],
        [1., 0., 0., 1., 0.],
        [0., 0., 1., 1., 0.],
        [0., 0., 1., 1., 0.],
        [0., 0., 1., 0., 1.],
        [0., 1., 0., 0., 1.],
        [0., 1., 0., 0., 1.],
        [1., 0., 0., 1., 0.],
        [0., 1., 0., 0., 1.],
        [0., 1., 0., 1., 0.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [1., 0., 0., 1., 0.]], grad_fn=<CatBackward0>)

tensor([[ 3.8448,  1.4015,  4.3325,  ...,  0.7505,  0.2776,  4.3317],
        [-7.0356, -4.0002, -2.7757,  ...,  0.3206, -4.3554,  0.7706],
        [-5.3106, -5.9325, -7.7053,  ..., -2.0939,  3.3589,  1.3137],
        ...,
        [ 5.7245,  0.2657,  1.2621,  ...,  0.0100,  4.3869, -0.8825],
        [-1.3125, -2.6280, -1.1142,  ..., -1.6452,  1.0936, -1.1470],
        [ 0.5951,  0.8839,  1.1419,  ..., -3.9329, -1.4844,  1.3321]],
       grad_fn=<AddmmBackward0>)

In [5]:
gumbel_model.forward(inputs)

gumbel_model.eval()
print(gumbel_model.training)
out_origin = gumbel_model.forward(inputs)
for i in range(10):
    print(out_origin - gumbel_model.forward(inputs))


0 idx : 4 1
expand
5
tensor([[0., 0., 1., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.]], grad_fn=<AddBackward0>)
1 idx : 3 2
expand and kernel
5
tensor([[0., 0., 1., 1., 0.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [0., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0.],
        [0., 0., 1., 0., 1.],
        [0., 0., 1., 0., 1.],
        [1., 0., 0., 0., 1.],
        [0., 0., 1., 0., 1.],
        [0., 0., 1., 0., 1.],
        [1., 0., 0., 1., 0.],
        [0., 1., 0., 1., 0.],
        [0., 1., 0., 1., 0.],
        [0., 1., 0., 1., 0.],
        [1., 0., 0., 1., 0.],
        [1., 0., 0., 1., 0.]], grad_fn=<CatBackward0>)
2

In [None]:
class test_gumbel(nn.Module):
    def __init__(self):
        super().__init__()
        self.test_layer = nn.Linear(20, 4)
    
    def forward(self, x):
        gumbel_input = self.test_layer(x)
        if self.training:
            gumbel_out = F.gumbel_softmax(gumbel_input, tau=1, hard=True, eps=1e-10, dim=-1)
        else:
            index = gumbel_input.max(dim=-1, keepdim=True)[1]
            gumbel_out = torch.zeros_like(gumbel_input, memory_format=torch.legacy_contiguous_format).scatter_(-1, index, 1.0)
        
        return gumbel_out
inputs = torch.randn(5, 20)
gumbel = test_gumbel()

# train
gumbel.train()
for i in range(5):
    out = gumbel(inputs)
    print(f"{i} iter -> ", out)

# test
gumbel.eval()
for i in range(5):
    out = gumbel(inputs)
    print(f"{i} iter -> ", out)


In [None]:
from torchprofile import profile_macs

total_mac = profile_macs(gumbel_model.cuda(), torch.randn(2, 3,160, 160).cuda())
print(total_mac)

In [None]:
from torchprofile.utils.flatten import Flatten
import warnings
with warnings.catch_warnings(record=True):
    graph, _ = torch.jit._get_trace_graph(Flatten(gumbel_model.cuda()), torch.randn(2,3,160,160).cuda(), None)

In [None]:
variables = dict()
for x in graph.nodes():
    for v in list(x.inputs()):
        if 'tensor' in v.type().kind().lower():
            print(v.debugName(), v.type().scalarType(), v.type().sizes())

In [None]:
for x in graph.nodes():
    if 'mul_' in x.kind().lower():
        print(x)

In [None]:
ori_model_size = sum([p.numel() for p in ori_model.parameters()]) * 4 / 2**20
gumbel_model_size = sum([p.numel() for p in gumbel_model.parameters()]) * 4 / 2**20
print("Ori model size : %.1f MB" % ori_model_size)
print("Gumbel model size : %.1f MB" % gumbel_model_size)

from torchinfo import summary

summary(gumbel_model, input_size=(4, 3, 160, 160), col_width=16, col_names=['kernel_size', 'output_size', 'num_params', 'mult_adds', 'params_percent'], depth=2)


In [None]:
print("before forward grad : ", gumbel_model.gumbel_fc1.weight.grad)
out = gumbel_model(torch.randn(32, 3, 160, 160))

out.sum().backward()

print("after forward grad : \n", gumbel_model.gumbel_fc1.weight.grad)

In [None]:
for n, p in net.named_parameters():
    if has_deep_attr(model, n):
        print(n)

In [None]:
model

In [None]:
mbconv_test = MBGumbelInvertedConvLayer.build_from_config(m.mobile_inverted_conv.config)
mbconv_test.config

In [None]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32))

In [None]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32), gumbel_index)
out.sum().backward()

In [None]:
gumbel_layer.weight.grad

In [None]:
original_mbconv_test_weight = copy.deepcopy(mbconv_test.depth_conv.conv.weight)
print(original_mbconv_test_weight)

In [None]:
print(m.mobile_inverted_conv.depth_conv.conv.weight)

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n, p)
        set_deep_attr(mbconv_test, n, p)
        print('------------------')

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n)
        print(get_deep_attr(mbconv_test, n) - p)

In [None]:
mbconv_test.forward(torch.randn(1,32,16,16), gumbel=1)

In [None]:
bn_layer = nn.BatchNorm2d(16)

In [None]:
x = torch.randn(1, 12, 32, 32)

In [None]:
feature_dim = 12
out = F.batch_norm(x, bn_layer.running_mean[:feature_dim], bn_layer.running_var[:feature_dim], bn_layer.weight[:feature_dim], bn_layer.bias[:feature_dim])

In [None]:
out.sum().backward()

In [None]:
bn_layer.weight.grad

In [None]:
model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)

backup_model = copy.deepcopy(model)
model_copy = build_model(net_id='mcunet-in4', pretrained=False)[0]

for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1 - p2).sum())

In [None]:
for n, p in model.named_parameters():
    if has_deep_attr(model_copy, n):
        print(n)
        set_deep_attr(model_copy, n, p)

In [None]:
for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1-p2).sum())