In [36]:
import mcunet
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import copy

from mcunet.tinynas.nn.modules import MBInvertedConvLayer
from mcunet.tinynas.nn.networks import MobileInvertedResidualBlock
from mcunet.model_zoo import build_model

from mcunet.utils import MyModule, MyNetwork, SEModule, build_activation, get_same_padding, sub_filter_start_end
from mcunet.tinynas.nn.modules import ZeroLayer, set_layer_from_config



In [37]:
def get_deep_attr(obj, attrs):
    for attr in attrs.split("."):
        obj = getattr(obj, attr)
    return obj

def has_deep_attr(obj, attrs):
    try:
        get_deep_attr(obj, attrs)
        return True
    except AttributeError:
        return False

def set_deep_attr(obj, attrs, value):
    for attr in attrs.split(".")[:-1]:
        obj = getattr(obj, attr)
    setattr(obj, attrs.split(".")[-1], value)
    

In [2]:
model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)

In [3]:
count = 0
for n, m in model.named_modules():
    if isinstance(m, MobileInvertedResidualBlock):
        print(n)
        print(m)
        count += 2
        if count > 3: 
            break

blocks.0
MobileInvertedResidualBlock(
  (mobile_inverted_conv): MBInvertedConvLayer(
    (depth_conv): Sequential(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): ReLU6(inplace=True)
    )
    (point_linear): Sequential(
      (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
)
blocks.1
MobileInvertedResidualBlock(
  (mobile_inverted_conv): MBInvertedConvLayer(
    (inverted_bottleneck): Sequential(
      (conv): Conv2d(16, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): ReLU6(inplace=True)
    )
    (depth_conv): Sequential(
      (conv): Conv2d(48, 48, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)

In [4]:
m.config

{'name': 'MobileInvertedResidualBlock',
 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer',
  'in_channels': 16,
  'out_channels': 24,
  'kernel_size': 7,
  'stride': 2,
  'expand_ratio': 3,
  'mid_channels': 48,
  'act_func': 'relu6',
  'use_se': False},
 'shortcut': None}

In [6]:
mm  = m.mobile_inverted_conv

In [7]:
mm.config

{'name': 'MBInvertedConvLayer',
 'in_channels': 16,
 'out_channels': 24,
 'kernel_size': 7,
 'stride': 2,
 'expand_ratio': 3,
 'mid_channels': 48,
 'act_func': 'relu6',
 'use_se': False}

In [8]:
from collections import OrderedDict


class MBGumbelInvertedConvLayer(MyModule):
    global_kernel_size_list = [3,5,7]
    global_expand_ratio_list = [1,3,4,5,6]
    def __init__(self, in_channels, out_channels,
                 kernel_size=3, stride=1, expand_ratio=6, mid_channels=None, act_func='relu6', use_se=False, **kwargs):
        super(MBGumbelInvertedConvLayer, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels

        self.max_kernel_size = kernel_size
        self.kernel_size_list = []
        self.stride = stride
        self.max_expand_ratio = expand_ratio
        self.expand_ratio_list = []
        self.mid_channels = mid_channels
        self.act_func = act_func
        self.use_se = use_se
        
        
        if self.max_kernel_size in self.global_kernel_size_list:
            for kernel in sorted(self.global_kernel_size_list):
                if kernel == self.max_kernel_size:
                    self.kernel_size_list.append(kernel)
                    break
                self.kernel_size_list.append(kernel)
            
            self.kernel_size_list.reverse() # sorted in descending order
        
        else:
            self.kernel_size_list = [self.max_kernel_size]
        
        if self.max_expand_ratio in self.global_expand_ratio_list:        
            for expand in sorted(self.global_expand_ratio_list):
                if expand == self.max_expand_ratio:
                    self.expand_ratio_list.append(expand)
                    break
                self.expand_ratio_list.append(expand)
        
        else:
            self.expand_ratio_list = [self.max_expand_ratio]
        

        if self.mid_channels is None:
            feature_dim = round(self.in_channels * self.max_expand_ratio)
        else:
            feature_dim = self.mid_channels

        if self.max_expand_ratio == 1:
            self.inverted_bottleneck = None
        else:
            self.inverted_bottleneck = nn.Sequential(OrderedDict([
                ('conv', nn.Conv2d(self.in_channels, feature_dim, 1, 1, 0, bias=False)),
                ('bn', nn.BatchNorm2d(feature_dim)),
                ('act', build_activation(self.act_func, inplace=True)),
            ]))

        pad = get_same_padding(self.max_kernel_size)
        depth_conv_modules = [
            ('conv', nn.Conv2d(feature_dim, feature_dim, kernel_size, stride, pad, groups=feature_dim, bias=False)),
            ('bn', nn.BatchNorm2d(feature_dim)),
            ('act', build_activation(self.act_func, inplace=True))
        ]
        if self.use_se:
            depth_conv_modules.append(('se', SEModule(feature_dim)))
        self.depth_conv = nn.Sequential(OrderedDict(depth_conv_modules))

        self.point_linear = nn.Sequential(OrderedDict([
            ('conv', nn.Conv2d(feature_dim, out_channels, 1, 1, 0, bias=False)),
            ('bn', nn.BatchNorm2d(out_channels)),
        ]))

        self.kernel_transform_linear_list = nn.ModuleList()
        
        for i, kernel in enumerate(self.kernel_size_list[1:]):
            kernel_linear = nn.Linear(kernel*kernel, kernel*kernel)
            self.kernel_transform_linear_list.append(kernel_linear)

    def forward(self, x, gumbel=None):
        """
        gumbel: [batch_size, len(self.expand_ratio_list) + len(self.kernel_size_list)]
        """
        if gumbel==None:
            if self.inverted_bottleneck:
                x = self.inverted_bottleneck(x)
            x = self.depth_conv(x)
            x = self.point_linear(x)
            return x
        else:    
            if len(self.expand_ratio_list) == 1: ## 
                if len(self.kernel_size_list) == 1:
                    if self.inverted_bottleneck:
                        x = self.inverted_bottleneck(x)
                    x = self.depth_conv(x)
                    x = self.point_linear(x)
                    return x
                else:
                    assert len(gumbel[0]) == len(self.kernel_size_list), "gumbel size is not match with kernel_size_list"
                    if self.inverted_bottleneck:
                        x = self.inverted_bottleneck(x)
                    
                    depth_weight = self.depth_conv.conv.weight
                    pad = get_same_padding(self.max_kernel_size)
                    kernel_max_out = F.conv2d(x, depth_weight, stride=self.stride, padding=pad, groups=x.size(1))
                    kernel_max_out = self.depth_conv.bn(kernel_max_out)
                    kernel_max_out = self.depth_conv.act(kernel_max_out)
                    kernel_max_out *= gumbel[:, len(self.expand_ratio_list)].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                    for i, active_kernel_size in enumerate(self.kernel_size_list[1:]):
                        start, end = sub_filter_start_end(self.kernel_size_list[i], active_kernel_size)
                        print(start, end, active_kernel_size, self.kernel_size_list[i], depth_weight.shape)
                        kernel_weight = depth_weight[:, :, start:end, start:end].contiguous()
                        kernel_weight = kernel_weight.view(kernel_weight.size(0), kernel_weight.size(1), -1)
                        kernel_weight = self.kernel_transform_linear_list[i](kernel_weight)
                        kernel_weight = kernel_weight.view(kernel_weight.size(0), kernel_weight.size(1), active_kernel_size, active_kernel_size)
                        pad = get_same_padding(active_kernel_size)
                        kernel_out = F.conv2d(x, kernel_weight, stride=self.stride, padding=pad, groups=x.size(1))
                        kernel_out = self.depth_conv.bn(kernel_out)
                        kernel_out = self.depth_conv.act(kernel_out)
                        kernel_out *= gumbel[:, len(self.expand_ratio_list) + i + 1].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                        kernel_max_out += kernel_out
                    x = kernel_max_out
                    if self.use_se:
                        x = self.depth_conv.se(x)
                    # 3. pointwise convolution weights (out_channels)
                    x = self.point_linear(x)
                    return x
            
            elif len(self.kernel_size_list) == 1:
                
                assert len(gumbel[0]) == len(self.expand_ratio_list), "gumbel size is not match with expand_ratio_list"
                
                if self.inverted_bottleneck:
                    # 1. inverted bottleneck weights (max_expand_ratio)
                    expand_weight = self.inverted_bottleneck.conv.weight
                    expand_max_out = F.conv2d(x, expand_weight, stride=1, padding=0)
                    expand_max_out = self.inverted_bottleneck.bn(expand_max_out)
                    expand_max_out = self.inverted_bottleneck.act(expand_max_out)
                    expand_max_out *= gumbel[:, len(self.expand_ratio_list)].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                    for i, expand_ratio in enumerate(self.expand_ratio_list[:-1]):
                        out = F.conv2d(x, expand_weight[:expand_ratio*self.in_channels, :, :, :], stride=1, padding=0)
                        out = F.batch_norm(out, self.inverted_bottleneck.bn.running_mean[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.running_var[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.weight[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.bias[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.training, self.inverted_bottleneck.bn.momentum, self.inverted_bottleneck.bn.eps)
                        out = self.inverted_bottleneck.act(out)
                        out *= gumbel[:, i].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                        out = F.pad(out, [0, 0, 0, 0, 0, expand_max_out.size(1) - out.size(1)], mode='constant', value=0) # zero pad
                        expand_max_out += out
                    x = expand_max_out
                x = self.depth_conv(x)
                x = self.point_linear(x)
                return x
                
            elif len(gumbel[0]) == len(self.expand_ratio_list) + len(self.kernel_size_list):
                if self.inverted_bottleneck:
                    # 1. inverted bottleneck weights (max_expand_ratio)
                    expand_weight = self.inverted_bottleneck.conv.weight
                    expand_max_out = F.conv2d(x, expand_weight, stride=1, padding=0)
                    expand_max_out = self.inverted_bottleneck.bn(expand_max_out)
                    expand_max_out = self.inverted_bottleneck.act(expand_max_out)
                    expand_max_out *= gumbel[:, len(self.expand_ratio_list)].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                    for i, expand_ratio in enumerate(self.expand_ratio_list[:-1]):
                        out = F.conv2d(x, expand_weight[:expand_ratio*self.in_channels, :, :, :], stride=1, padding=0)
                        out = F.batch_norm(out, self.inverted_bottleneck.bn.running_mean[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.running_var[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.weight[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.bias[:expand_ratio*self.in_channels], self.inverted_bottleneck.bn.training, self.inverted_bottleneck.bn.momentum, self.inverted_bottleneck.bn.eps)
                        out = self.inverted_bottleneck.act(out)
                        out *= gumbel[:, i].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                        out = F.pad(out, [0, 0, 0, 0, 0, expand_max_out.size(1) - out.size(1)], mode='constant', value=0) # zero pad
                        expand_max_out += out
                    x = expand_max_out
                # 2. depthwise convolution weights (max_kernel_size)
                depth_weight = self.depth_conv.conv.weight
                pad = get_same_padding(self.max_kernel_size)
                kernel_max_out = F.conv2d(x, depth_weight, stride=self.stride, padding=pad, groups=x.size(1))
                kernel_max_out = self.depth_conv.bn(kernel_max_out)
                kernel_max_out = self.depth_conv.act(kernel_max_out)
                kernel_max_out *= gumbel[:, len(self.expand_ratio_list)].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                for i, active_kernel_size in enumerate(self.kernel_size_list[1:]):
                    start, end = sub_filter_start_end(self.kernel_size_list[i], active_kernel_size)
                    print(start, end, active_kernel_size, self.kernel_size_list[i], depth_weight.shape)
                    kernel_weight = depth_weight[:, :, start:end, start:end].contiguous()
                    kernel_weight = kernel_weight.view(kernel_weight.size(0), kernel_weight.size(1), -1)
                    kernel_weight = self.kernel_transform_linear_list[i](kernel_weight)
                    kernel_weight = kernel_weight.view(kernel_weight.size(0), kernel_weight.size(1), active_kernel_size, active_kernel_size)
                    pad = get_same_padding(active_kernel_size)
                    kernel_out = F.conv2d(x, kernel_weight, stride=self.stride, padding=pad, groups=x.size(1))
                    kernel_out = self.depth_conv.bn(kernel_out)
                    kernel_out = self.depth_conv.act(kernel_out)
                    kernel_out *= gumbel[:, len(self.expand_ratio_list) + i + 1].unsqueeze(1).unsqueeze(2).unsqueeze(3)
                    kernel_max_out += kernel_out
                x = kernel_max_out
                if self.use_se:
                    x = self.depth_conv.se(x)
                # 3. pointwise convolution weights (out_channels)
                x = self.point_linear(x)
                return x
            else:
                assert False, "gumbel size is not match with expand_ratio_list and kernel_size_list"
            
    
    @property
    def module_str(self):
        if self.mid_channels is None:
            expand_ratio = self.max_expand_ratio
        else:
            expand_ratio = self.mid_channels // self.in_channels
        layer_str = '%dx%d_GumbelMBConv%d_%s' % (self.max_kernel_size, self.max_kernel_size, expand_ratio, self.act_func.upper())
        if self.use_se:
            layer_str = 'SE_' + layer_str
        layer_str += '_O%d' % self.out_channels
        return layer_str

    @property
    def config(self):
        return {
            'name': MBGumbelInvertedConvLayer.__name__,
            'in_channels': self.in_channels,
            'out_channels': self.out_channels,
            'kernel_size': self.max_kernel_size,
            'kernel_size_list': self.kernel_size_list,
            'stride': self.stride,
            'expand_ratio': self.max_expand_ratio,
            'expand_ratio_list': self.expand_ratio_list,
            'mid_channels': self.mid_channels,
            'act_func': self.act_func,
            'use_se': self.use_se,
        }

    @staticmethod
    def build_from_config(config):
        return MBGumbelInvertedConvLayer(**config)
    
    #@staticmethod
    #def build_from_module(module: MBInvertedConvLayer):
    #    mbgumbel = MBGumbelInvertedConvLayer.build_from_config(module.config)
    #    for n, m in module.named_parameters():
            


In [29]:

class MobileGumbelInvertedResidualBlock(MyModule):

    def __init__(self, mobile_inverted_conv, shortcut):
        super(MobileGumbelInvertedResidualBlock, self).__init__()

        self.mobile_inverted_conv = mobile_inverted_conv
        self.shortcut = shortcut

    def forward(self, x, gumbel_idx=None):
        if self.mobile_inverted_conv is None or isinstance(self.mobile_inverted_conv, ZeroLayer):
            res = x
        elif self.shortcut is None or isinstance(self.shortcut, ZeroLayer) and gumbel_idx == None:
            res = self.mobile_inverted_conv(x)
        elif self.shortcut is None or isinstance(self.shortcut, ZeroLayer) and gumbel_idx != None:
            res = self.mobile_inverted_conv(x, gumbel_idx)
        elif self.shortcut is not None and gumbel_idx == None:
            res = self.mobile_inverted_conv(x) + self.shortcut(x)
        else:
            res = self.mobile_inverted_conv(x, gumbel_idx) + self.shortcut(x)
        return res

    @property
    def module_str(self):
        return '(%s, %s)' % (
            self.mobile_inverted_conv.module_str if self.mobile_inverted_conv is not None else None,
            self.shortcut.module_str if self.shortcut is not None else None
        )

    @property
    def config(self):
        return {
            'name': MobileGumbelInvertedResidualBlock.__name__,
            'mobile_inverted_conv': self.mobile_inverted_conv.config if self.mobile_inverted_conv is not None else None,
            'shortcut': self.shortcut.config if self.shortcut is not None else None,
        }

    @staticmethod
    def build_from_config(config):
        mobile_inverted_conv = MBGumbelInvertedConvLayer.build_from_config(config['mobile_inverted_conv'])
        shortcut = set_layer_from_config(config['shortcut'])
        return MobileGumbelInvertedResidualBlock(mobile_inverted_conv, shortcut)

    @staticmethod
    def build_from_module(module):
        if isinstance(module, MobileGumbelInvertedResidualBlock):
            print("build from gumbel module")
            return module
        elif isinstance(module, MobileInvertedResidualBlock):
            print("build from normal MobileInvertedResidualBlock module")
            mobile_inverted_conv = module.mobile_inverted_conv
            shortcut = module.shortcut
            return MobileGumbelInvertedResidualBlock(module.mobile_inverted_conv, module.shortcut)

In [39]:
class GumbelMCUNets(MyNetwork):
    def __init__(self, first_conv, blocks, feature_mix_layer, classifier, gumbel_feature_extract_block):
        super(GumbelMCUNets, self).__init__()
        
        self.first_conv = first_conv
        self.blocks = nn.ModuleList(blocks)
        self.feature_mix_layer = feature_mix_layer
        self.classifier = classifier
        self.gumbel_feature_extract_block = gumbel_feature_extract_block
        
        self.gumbel_index = 0
        for i, block in enumerate(self.blocks):
            print(i, block.config, type(block))
            if i < self.gumbel_feature_extract_block:
                continue
            if len(block.mobile_inverted_conv.expand_ratio_list) > 1:
                self.gumbel_index += len(block.mobile_inverted_conv.expand_ratio_list)
                
            if len(block.mobile_inverted_conv.kernel_size_list) > 1:
                self.gumbel_index += len(block.mobile_inverted_conv.kernel_size_list)
        
        
        self.gumbel_input_channel = blocks[gumbel_feature_extract_block].mobile_inverted_conv.out_channels
        
        self.avgpool_policy = nn.AdaptiveAvgPool2d((8, 8))
        self.gumbel_features_flatten = nn.Flatten()
        self.gumbel_fc1 = nn.Linear(self.gumbel_input_channel*8*8, 256)
        self.dropout = nn.Dropout(0.2)
        self.gumbel_fc2 = nn.Linear(256, self.gumbel_index)
        
    def forward(self, x):
        return x
    
    @property
    def module_str(self):
        _str = self.first_conv.module_str + '\n'
        for block in self.blocks:
            _str += block.module_str + '\n'
        _str += self.feature_mix_layer.module_str + '\n'
        _str += self.classifier.module_str
        return _str
        
    @property
    def config(self):
        return {
            'name': GumbelMCUNets.__name__,
            'bn': self.get_bn_param(),
            'first_conv': self.first_conv.config,
            'blocks': [
                block.config for block in self.blocks
            ],
            'feature_mix_layer': None if self.feature_mix_layer is None else self.feature_mix_layer.config,
            'classifier': self.classifier.config,
        }
    
    
    @staticmethod
    def build_from_config(net_config, gumbel_config):
        MBGumbelInvertedConvLayer.global_expand_ratio_list = gumbel_config['global_expand_ratio_list']
        MBGumbelInvertedConvLayer.global_kernel_size_list = gumbel_config['global_kernel_size_list']
        gumbel_feature_extract_block = gumbel_config['gumbel_feature_extract_block']
        
        first_conv = set_layer_from_config(net_config['first_conv'])
        feature_mix_layer = set_layer_from_config(net_config['feature_mix_layer'])
        classifier = set_layer_from_config(net_config['classifier'])
        
        blocks = []
        
        for i, block_config in enumerate(net_config['blocks']):
            if i < gumbel_feature_extract_block:
                print(i, block_config)
                blocks.append(MobileInvertedResidualBlock.build_from_config(block_config))
            else:
                blocks.append(MobileGumbelInvertedResidualBlock.build_from_config(block_config))
        
        net = GumbelMCUNets(first_conv, blocks, feature_mix_layer, classifier, gumbel_feature_extract_block)
        
        if 'bn' in net_config:
            net.set_bn_param(**net_config['bn'])
        else:
            net.set_bn_param(momentum=0.1, eps=1e-3)
        
        return net
    
    def load_pretrained_mcunet_param(self, mcunet):
        
        for n, p in self.named_parameters():
            if has_deep_attr(mcunet, n):
                print("load {} params ({})".format(n, p.shape))
                set_deep_attr(self, n, get_deep_attr(mcunet, n))
        

In [40]:
gubmel_config = {'global_expand_ratio_list':[1,3,5,6], 'global_kernel_size_list':[3,5,7], 'gumbel_feature_extract_block':2}
net = GumbelMCUNets.build_from_config(model.config, gubmel_config)

0 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 32, 'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'expand_ratio': 1, 'mid_channels': None, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
1 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 16, 'out_channels': 24, 'kernel_size': 7, 'stride': 2, 'expand_ratio': 3, 'mid_channels': 48, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None}
0 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 32, 'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'expand_ratio': 1, 'mid_channels': None, 'act_func': 'relu6', 'use_se': False}, 'shortcut': None} <class 'mcunet.tinynas.nn.networks.proxyless_nets.MobileInvertedResidualBlock'>
1 {'name': 'MobileInvertedResidualBlock', 'mobile_inverted_conv': {'name': 'MBInvertedConvLayer', 'in_channels': 16, 'out_ch

In [42]:
net.load_pretrained_mcunet_param(model)

load first_conv.conv.weight params (torch.Size([32, 3, 3, 3]))
load first_conv.bn.weight params (torch.Size([32]))
load first_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.conv.weight params (torch.Size([32, 1, 3, 3]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.weight params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.depth_conv.bn.bias params (torch.Size([32]))
load blocks.0.mobile_inverted_conv.point_linear.conv.weight params (torch.Size([16, 32, 1, 1]))
load blocks.0.mobile_inverted_conv.point_linear.bn.weight params (torch.Size([16]))
load blocks.0.mobile_inverted_conv.point_linear.bn.bias params (torch.Size([16]))
load blocks.1.mobile_inverted_conv.inverted_bottleneck.conv.weight params (torch.Size([48, 16, 1, 1]))
load blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.weight params (torch.Size([48]))
load blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.bias params (torch.Size([48]))
load blocks.1.mobile_inverted_conv.

In [38]:
for n, p in net.named_parameters():
    if has_deep_attr(model, n):
        print(n)

first_conv.conv.weight
first_conv.bn.weight
first_conv.bn.bias
blocks.0.mobile_inverted_conv.depth_conv.conv.weight
blocks.0.mobile_inverted_conv.depth_conv.bn.weight
blocks.0.mobile_inverted_conv.depth_conv.bn.bias
blocks.0.mobile_inverted_conv.point_linear.conv.weight
blocks.0.mobile_inverted_conv.point_linear.bn.weight
blocks.0.mobile_inverted_conv.point_linear.bn.bias
blocks.1.mobile_inverted_conv.inverted_bottleneck.conv.weight
blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.weight
blocks.1.mobile_inverted_conv.inverted_bottleneck.bn.bias
blocks.1.mobile_inverted_conv.depth_conv.conv.weight
blocks.1.mobile_inverted_conv.depth_conv.bn.weight
blocks.1.mobile_inverted_conv.depth_conv.bn.bias
blocks.1.mobile_inverted_conv.point_linear.conv.weight
blocks.1.mobile_inverted_conv.point_linear.bn.weight
blocks.1.mobile_inverted_conv.point_linear.bn.bias
blocks.2.mobile_inverted_conv.inverted_bottleneck.conv.weight
blocks.2.mobile_inverted_conv.inverted_bottleneck.bn.weight
blocks.2.mo

In [None]:
model

In [None]:
mbconv_test = MBGumbelInvertedConvLayer.build_from_config(m.mobile_inverted_conv.config)
mbconv_test.config

In [None]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32))

In [None]:
inputs = torch.randn(2, 16, 32, 32)
gumbel_inputs = torch.randn(2, 4, 8, 8)
gumbel_inputs.requires_grad = True
gumbel_layer = nn.Linear(4*8*8, 5)
gumbel_output = gumbel_layer(gumbel_inputs.view(2, -1))
gumbel_index = F.gumbel_softmax(gumbel_output, tau=1, hard=True)
print(gumbel_index)
out = mbconv_test.forward(torch.randn(2, 16, 32, 32), gumbel_index)
out.sum().backward()

In [None]:
gumbel_layer.weight.grad

In [None]:
mbconv_test(torch.randn(1, 32, 32, 32), gumbel=[1, 0, 0, 1, 0, 0, 0])

In [None]:
original_mbconv_test_weight = copy.deepcopy(mbconv_test.depth_conv.conv.weight)
print(original_mbconv_test_weight)

In [None]:
print(m.mobile_inverted_conv.depth_conv.conv.weight)

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n, p)
        set_deep_attr(mbconv_test, n, p)
        print('------------------')

In [None]:
for n, p in m.mobile_inverted_conv.named_parameters():
    if has_deep_attr(mbconv_test, n):
        print(n)
        print(get_deep_attr(mbconv_test, n) - p)

In [None]:
mbconv_test.forward(torch.randn(1,32,16,16), gumbel=1)

In [None]:
bn_layer = nn.BatchNorm2d(16)

In [None]:
x = torch.randn(1, 12, 32, 32)

In [None]:
feature_dim = 12
out = F.batch_norm(x, bn_layer.running_mean[:feature_dim], bn_layer.running_var[:feature_dim], bn_layer.weight[:feature_dim], bn_layer.bias[:feature_dim])

In [None]:
out.sum().backward()

In [None]:
bn_layer.weight.grad

In [None]:
model, img_size, desc = build_model(net_id='mcunet-in4', pretrained=True)

backup_model = copy.deepcopy(model)
model_copy = build_model(net_id='mcunet-in4', pretrained=False)[0]

for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1 - p2).sum())

In [None]:
for n, p in model.named_parameters():
    if has_deep_attr(model_copy, n):
        print(n)
        set_deep_attr(model_copy, n, p)

In [None]:
for (n1, p1), (n2, p2) in zip(backup_model.named_parameters(), model_copy.named_parameters()):
    if n1 == n2:
        print((p1-p2).sum())