In [35]:
# Get a standard resnet from pytorch
import torchvision
import torch 
import torch.nn as nn
def resnet_model():
    model = torchvision.models.resnet18(pretrained=True)
    return model

In [36]:
model = resnet_model()

In [38]:
if isinstance(model.conv1, torch.nn.Conv2d):
    print('Conv1 is a convolutional layer')
    print(model.conv1)

Conv1 is a convolutional layer
Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)


In [39]:
from functools import reduce
from typing import Union

import torch
from torch import nn


def get_module_by_name(module: Union[torch.Tensor, nn.Module],
                       access_string: str):
    """Retrieve a module nested in another by its access string.

    Works even when there is a Sequential in the module.
    """
    names = access_string.split(sep='.')
    return reduce(getattr, names, module)


if __name__ == '__main__':
    from torchvision.models import resnet34
    
    model = resnet34()
    get_module_by_name(model, 'layer1.0.relu')

In [61]:
x = get_module_by_name(model, 'layer1.0.conv1')

In [64]:
if isinstance(x, torch.nn.Conv2d):
    print('Conv1 is a convolutional layer')
    print(x.in_channels,\
    x.kernel_size,\
    x.out_channels,\
    x.stride,\
    x.padding,\
    x.dilation,\
    x.groups,\
    x.bias,\
    x.padding_mode)


Conv1 is a convolutional layer
64 (3, 3) 64 (1, 1) (1, 1) (1, 1) 1 None zeros


In [59]:
for ix, name in enumerate(model.named_modules()):
    print(ix, name[0])
    #if name[0] is not '':
    #    print(get_module_by_name(model, name[0]))
    #print(get_module_by_name(model, 'layer1.0.conv1'))

0 
1 conv1
2 bn1
3 relu
4 maxpool
5 layer1
6 layer1.0
7 layer1.0.conv1
8 layer1.0.bn1
9 layer1.0.relu
10 layer1.0.conv2
11 layer1.0.bn2
12 layer1.1
13 layer1.1.conv1
14 layer1.1.bn1
15 layer1.1.relu
16 layer1.1.conv2
17 layer1.1.bn2
18 layer1.2
19 layer1.2.conv1
20 layer1.2.bn1
21 layer1.2.relu
22 layer1.2.conv2
23 layer1.2.bn2
24 layer2
25 layer2.0
26 layer2.0.conv1
27 layer2.0.bn1
28 layer2.0.relu
29 layer2.0.conv2
30 layer2.0.bn2
31 layer2.0.downsample
32 layer2.0.downsample.0
33 layer2.0.downsample.1
34 layer2.1
35 layer2.1.conv1
36 layer2.1.bn1
37 layer2.1.relu
38 layer2.1.conv2
39 layer2.1.bn2
40 layer2.2
41 layer2.2.conv1
42 layer2.2.bn1
43 layer2.2.relu
44 layer2.2.conv2
45 layer2.2.bn2
46 layer2.3
47 layer2.3.conv1
48 layer2.3.bn1
49 layer2.3.relu
50 layer2.3.conv2
51 layer2.3.bn2
52 layer3
53 layer3.0
54 layer3.0.conv1
55 layer3.0.bn1
56 layer3.0.relu
57 layer3.0.conv2
58 layer3.0.bn2
59 layer3.0.downsample
60 layer3.0.downsample.0
61 layer3.0.downsample.1
62 layer3.1
63 laye

In [29]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
def hook( m, i, o):
    print( m._get_name() )

for ( mo ) in model.modules():
    mo.register_forward_hook(hook)

In [28]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
# Iterate over the configurations of each layer
for name, param in model.named_parameters():
    print(name)

conv1.weight
bn1.weight
bn1.bias
layer1.0.conv1.weight
layer1.0.bn1.weight
layer1.0.bn1.bias
layer1.0.conv2.weight
layer1.0.bn2.weight
layer1.0.bn2.bias
layer1.1.conv1.weight
layer1.1.bn1.weight
layer1.1.bn1.bias
layer1.1.conv2.weight
layer1.1.bn2.weight
layer1.1.bn2.bias
layer2.0.conv1.weight
layer2.0.bn1.weight
layer2.0.bn1.bias
layer2.0.conv2.weight
layer2.0.bn2.weight
layer2.0.bn2.bias
layer2.0.downsample.0.weight
layer2.0.downsample.1.weight
layer2.0.downsample.1.bias
layer2.1.conv1.weight
layer2.1.bn1.weight
layer2.1.bn1.bias
layer2.1.conv2.weight
layer2.1.bn2.weight
layer2.1.bn2.bias
layer3.0.conv1.weight
layer3.0.bn1.weight
layer3.0.bn1.bias
layer3.0.conv2.weight
layer3.0.bn2.weight
layer3.0.bn2.bias
layer3.0.downsample.0.weight
layer3.0.downsample.1.weight
layer3.0.downsample.1.bias
layer3.1.conv1.weight
layer3.1.bn1.weight
layer3.1.bn1.bias
layer3.1.conv2.weight
layer3.1.bn2.weight
layer3.1.bn2.bias
layer4.0.conv1.weight
layer4.0.bn1.weight
layer4.0.bn1.bias
layer4.0.conv2.we

In [9]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [5]:
# Iterate model parameters

conv1.weight torch.Size([64, 3, 7, 7])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
bn1.weight torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
bn1.bias torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.conv1.weight torch.Size([64, 64, 3, 3])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.bn1.weight torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.bn1.bias torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.bn2.weight torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.0.bn2.bias torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.1.conv1.weight torch.Size([64, 64, 3, 3])
<class 'str'> <class 'torch.nn.parameter.Parameter'>
layer1.1.bn1.weight torch.Size([64])
<class 'str'> <class 'torch.nn.parameter.Paramete

In [3]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# Once for All: Train One Network and Specialize it for Efficient Deployment
# Han Cai, Chuang Gan, Tianzhe Wang, Zhekai Zhang, Song Han
# International Conference on Learning Representations (ICLR), 2020.

import torch
import torch.nn as nn

from collections import OrderedDict
from ofa.utils import get_same_padding, min_divisible_value, SEModule, ShuffleLayer
from ofa.utils import MyNetwork, MyModule
from ofa.utils import build_activation, make_divisible

__all__ = [
	'set_layer_from_config',
	'ConvLayer', 'IdentityLayer', 'LinearLayer', 'MultiHeadLinearLayer', 'ZeroLayer', 'MBConvLayer',
	'ResidualBlock', 'ResNetBottleneckBlock',
]


def set_layer_from_config(layer_config):
	if layer_config is None:
		return None

	name2layer = {
		ConvLayer.__name__: ConvLayer,
		IdentityLayer.__name__: IdentityLayer,
		LinearLayer.__name__: LinearLayer,
		MultiHeadLinearLayer.__name__: MultiHeadLinearLayer,
		ZeroLayer.__name__: ZeroLayer,
		MBConvLayer.__name__: MBConvLayer,
		'MBInvertedConvLayer': MBConvLayer,
		##########################################################
		ResidualBlock.__name__: ResidualBlock,
		ResNetBottleneckBlock.__name__: ResNetBottleneckBlock,
	}

	layer_name = layer_config.pop('name')
	layer = name2layer[layer_name]
	return layer.build_from_config(layer_config)


class My2DLayer(MyModule):

	def __init__(self, in_channels, out_channels,
	             use_bn=True, act_func='relu', dropout_rate=0, ops_order='weight_bn_act'):
		super(My2DLayer, self).__init__()
		self.in_channels = in_channels
		self.out_channels = out_channels

		self.use_bn = use_bn
		self.act_func = act_func
		self.dropout_rate = dropout_rate
		self.ops_order = ops_order

		""" modules """
		modules = {}
		# batch norm
		if self.use_bn:
			if self.bn_before_weight:
				modules['bn'] = nn.BatchNorm2d(in_channels)
			else:
				modules['bn'] = nn.BatchNorm2d(out_channels)
		else:
			modules['bn'] = None
		# activation
		modules['act'] = build_activation(self.act_func, self.ops_list[0] != 'act' and self.use_bn)
		# dropout
		if self.dropout_rate > 0:
			modules['dropout'] = nn.Dropout2d(self.dropout_rate, inplace=True)
		else:
			modules['dropout'] = None
		# weight
		modules['weight'] = self.weight_op()

		# add modules
		for op in self.ops_list:
			if modules[op] is None:
				continue
			elif op == 'weight':
				# dropout before weight operation
				if modules['dropout'] is not None:
					self.add_module('dropout', modules['dropout'])
				for key in modules['weight']:
					self.add_module(key, modules['weight'][key])
			else:
				self.add_module(op, modules[op])

	@property
	def ops_list(self):
		return self.ops_order.split('_')

	@property
	def bn_before_weight(self):
		for op in self.ops_list:
			if op == 'bn':
				return True
			elif op == 'weight':
				return False
		raise ValueError('Invalid ops_order: %s' % self.ops_order)

	def weight_op(self):
		raise NotImplementedError

	""" Methods defined in MyModule """

	def forward(self, x):
		# similar to nn.Sequential
		for module in self._modules.values():
			x = module(x)
		return x

	@property
	def module_str(self):
		raise NotImplementedError

	@property
	def config(self):
		return {
			'in_channels': self.in_channels,
			'out_channels': self.out_channels,
			'use_bn': self.use_bn,
			'act_func': self.act_func,
			'dropout_rate': self.dropout_rate,
			'ops_order': self.ops_order,
		}

	@staticmethod
	def build_from_config(config):
		raise NotImplementedError


class ConvLayer(My2DLayer):

	def __init__(self, in_channels, out_channels,
	             kernel_size=3, stride=1, dilation=1, groups=1, bias=False, has_shuffle=False, use_se=False,
	             use_bn=True, act_func='relu', dropout_rate=0, ops_order='weight_bn_act'):
		# default normal 3x3_Conv with bn and relu
		self.kernel_size = kernel_size
		self.stride = stride
		self.dilation = dilation
		self.groups = groups
		self.bias = bias
		self.has_shuffle = has_shuffle
		self.use_se = use_se

		super(ConvLayer, self).__init__(in_channels, out_channels, use_bn, act_func, dropout_rate, ops_order)
		if self.use_se:
			self.add_module('se', SEModule(self.out_channels))

	def weight_op(self):
		padding = get_same_padding(self.kernel_size)
		if isinstance(padding, int):
			padding *= self.dilation
		else:
			padding[0] *= self.dilation
			padding[1] *= self.dilation

		weight_dict = OrderedDict({
			'conv': nn.Conv2d(
				self.in_channels, self.out_channels, kernel_size=self.kernel_size, stride=self.stride, padding=padding,
				dilation=self.dilation, groups=min_divisible_value(self.in_channels, self.groups), bias=self.bias
			)
		})
		if self.has_shuffle and self.groups > 1:
			weight_dict['shuffle'] = ShuffleLayer(self.groups)

		return weight_dict

	@property
	def module_str(self):
		if isinstance(self.kernel_size, int):
			kernel_size = (self.kernel_size, self.kernel_size)
		else:
			kernel_size = self.kernel_size
		if self.groups == 1:
			if self.dilation > 1:
				conv_str = '%dx%d_DilatedConv' % (kernel_size[0], kernel_size[1])
			else:
				conv_str = '%dx%d_Conv' % (kernel_size[0], kernel_size[1])
		else:
			if self.dilation > 1:
				conv_str = '%dx%d_DilatedGroupConv' % (kernel_size[0], kernel_size[1])
			else:
				conv_str = '%dx%d_GroupConv' % (kernel_size[0], kernel_size[1])
		conv_str += '_O%d' % self.out_channels
		if self.use_se:
			conv_str = 'SE_' + conv_str
		conv_str += '_' + self.act_func.upper()
		if self.use_bn:
			if isinstance(self.bn, nn.GroupNorm):
				conv_str += '_GN%d' % self.bn.num_groups
			elif isinstance(self.bn, nn.BatchNorm2d):
				conv_str += '_BN'
		return conv_str

	@property
	def config(self):
		return {
			'name': ConvLayer.__name__,
			'kernel_size': self.kernel_size,
			'stride': self.stride,
			'dilation': self.dilation,
			'groups': self.groups,
			'bias': self.bias,
			'has_shuffle': self.has_shuffle,
			'use_se': self.use_se,
			**super(ConvLayer, self).config
		}

	@staticmethod
	def build_from_config(config):
		return ConvLayer(**config)


class IdentityLayer(My2DLayer):

	def __init__(self, in_channels, out_channels,
	             use_bn=False, act_func=None, dropout_rate=0, ops_order='weight_bn_act'):
		super(IdentityLayer, self).__init__(in_channels, out_channels, use_bn, act_func, dropout_rate, ops_order)

	def weight_op(self):
		return None

	@property
	def module_str(self):
		return 'Identity'

	@property
	def config(self):
		return {
			'name': IdentityLayer.__name__,
			**super(IdentityLayer, self).config,
		}

	@staticmethod
	def build_from_config(config):
		return IdentityLayer(**config)


class LinearLayer(MyModule):

	def __init__(self, in_features, out_features, bias=True,
	             use_bn=False, act_func=None, dropout_rate=0, ops_order='weight_bn_act'):
		super(LinearLayer, self).__init__()

		self.in_features = in_features
		self.out_features = out_features
		self.bias = bias

		self.use_bn = use_bn
		self.act_func = act_func
		self.dropout_rate = dropout_rate
		self.ops_order = ops_order

		""" modules """
		modules = {}
		# batch norm
		if self.use_bn:
			if self.bn_before_weight:
				modules['bn'] = nn.BatchNorm1d(in_features)
			else:
				modules['bn'] = nn.BatchNorm1d(out_features)
		else:
			modules['bn'] = None
		# activation
		modules['act'] = build_activation(self.act_func, self.ops_list[0] != 'act')
		# dropout
		if self.dropout_rate > 0:
			modules['dropout'] = nn.Dropout(self.dropout_rate, inplace=True)
		else:
			modules['dropout'] = None
		# linear
		modules['weight'] = {'linear': nn.Linear(self.in_features, self.out_features, self.bias)}

		# add modules
		for op in self.ops_list:
			if modules[op] is None:
				continue
			elif op == 'weight':
				if modules['dropout'] is not None:
					self.add_module('dropout', modules['dropout'])
				for key in modules['weight']:
					self.add_module(key, modules['weight'][key])
			else:
				self.add_module(op, modules[op])

	@property
	def ops_list(self):
		return self.ops_order.split('_')

	@property
	def bn_before_weight(self):
		for op in self.ops_list:
			if op == 'bn':
				return True
			elif op == 'weight':
				return False
		raise ValueError('Invalid ops_order: %s' % self.ops_order)

	def forward(self, x):
		for module in self._modules.values():
			x = module(x)
		return x

	@property
	def module_str(self):
		return '%dx%d_Linear' % (self.in_features, self.out_features)

	@property
	def config(self):
		return {
			'name': LinearLayer.__name__,
			'in_features': self.in_features,
			'out_features': self.out_features,
			'bias': self.bias,
			'use_bn': self.use_bn,
			'act_func': self.act_func,
			'dropout_rate': self.dropout_rate,
			'ops_order': self.ops_order,
		}

	@staticmethod
	def build_from_config(config):
		return LinearLayer(**config)


class MultiHeadLinearLayer(MyModule):

	def __init__(self, in_features, out_features, num_heads=1, bias=True, dropout_rate=0):
		super(MultiHeadLinearLayer, self).__init__()
		self.in_features = in_features
		self.out_features = out_features
		self.num_heads = num_heads

		self.bias = bias
		self.dropout_rate = dropout_rate

		if self.dropout_rate > 0:
			self.dropout = nn.Dropout(self.dropout_rate, inplace=True)
		else:
			self.dropout = None

		self.layers = nn.ModuleList()
		for k in range(num_heads):
			layer = nn.Linear(in_features, out_features, self.bias)
			self.layers.append(layer)

	def forward(self, inputs):
		if self.dropout is not None:
			inputs = self.dropout(inputs)

		outputs = []
		for layer in self.layers:
			output = layer.forward(inputs)
			outputs.append(output)

		outputs = torch.stack(outputs, dim=1)
		return outputs

	@property
	def module_str(self):
		return self.__repr__()

	@property
	def config(self):
		return {
			'name': MultiHeadLinearLayer.__name__,
			'in_features': self.in_features,
			'out_features': self.out_features,
			'num_heads': self.num_heads,
			'bias': self.bias,
			'dropout_rate': self.dropout_rate,
		}

	@staticmethod
	def build_from_config(config):
		return MultiHeadLinearLayer(**config)

	def __repr__(self):
		return 'MultiHeadLinear(in_features=%d, out_features=%d, num_heads=%d, bias=%s, dropout_rate=%s)' % (
			self.in_features, self.out_features, self.num_heads, self.bias, self.dropout_rate
		)


class ZeroLayer(MyModule):

	def __init__(self):
		super(ZeroLayer, self).__init__()

	def forward(self, x):
		raise ValueError

	@property
	def module_str(self):
		return 'Zero'

	@property
	def config(self):
		return {
			'name': ZeroLayer.__name__,
		}

	@staticmethod
	def build_from_config(config):
		return ZeroLayer()


class MBConvLayer(MyModule):

	def __init__(self, in_channels, out_channels,
	             kernel_size=3, stride=1, expand_ratio=6, mid_channels=None, act_func='relu6', use_se=False,
	             groups=None):
		super(MBConvLayer, self).__init__()

		self.in_channels = in_channels
		self.out_channels = out_channels

		self.kernel_size = kernel_size
		self.stride = stride
		self.expand_ratio = expand_ratio
		self.mid_channels = mid_channels
		self.act_func = act_func
		self.use_se = use_se
		self.groups = groups

		if self.mid_channels is None:
			feature_dim = round(self.in_channels * self.expand_ratio)
		else:
			feature_dim = self.mid_channels

		if self.expand_ratio == 1:
			self.inverted_bottleneck = None
		else:
			self.inverted_bottleneck = nn.Sequential(OrderedDict([
				('conv', nn.Conv2d(self.in_channels, feature_dim, 1, 1, 0, bias=False)),
				('bn', nn.BatchNorm2d(feature_dim)),
				('act', build_activation(self.act_func, inplace=True)),
			]))

		pad = get_same_padding(self.kernel_size)
		groups = feature_dim if self.groups is None else min_divisible_value(feature_dim, self.groups)
		depth_conv_modules = [
			('conv', nn.Conv2d(feature_dim, feature_dim, kernel_size, stride, pad, groups=groups, bias=False)),
			('bn', nn.BatchNorm2d(feature_dim)),
			('act', build_activation(self.act_func, inplace=True))
		]
		if self.use_se:
			depth_conv_modules.append(('se', SEModule(feature_dim)))
		self.depth_conv = nn.Sequential(OrderedDict(depth_conv_modules))

		self.point_linear = nn.Sequential(OrderedDict([
			('conv', nn.Conv2d(feature_dim, out_channels, 1, 1, 0, bias=False)),
			('bn', nn.BatchNorm2d(out_channels)),
		]))

	def forward(self, x):
		if self.inverted_bottleneck:
			x = self.inverted_bottleneck(x)
		x = self.depth_conv(x)
		x = self.point_linear(x)
		return x

	@property
	def module_str(self):
		if self.mid_channels is None:
			expand_ratio = self.expand_ratio
		else:
			expand_ratio = self.mid_channels // self.in_channels
		layer_str = '%dx%d_MBConv%d_%s' % (self.kernel_size, self.kernel_size, expand_ratio, self.act_func.upper())
		if self.use_se:
			layer_str = 'SE_' + layer_str
		layer_str += '_O%d' % self.out_channels
		if self.groups is not None:
			layer_str += '_G%d' % self.groups
		if isinstance(self.point_linear.bn, nn.GroupNorm):
			layer_str += '_GN%d' % self.point_linear.bn.num_groups
		elif isinstance(self.point_linear.bn, nn.BatchNorm2d):
			layer_str += '_BN'

		return layer_str

	@property
	def config(self):
		return {
			'name': MBConvLayer.__name__,
			'in_channels': self.in_channels,
			'out_channels': self.out_channels,
			'kernel_size': self.kernel_size,
			'stride': self.stride,
			'expand_ratio': self.expand_ratio,
			'mid_channels': self.mid_channels,
			'act_func': self.act_func,
			'use_se': self.use_se,
			'groups': self.groups,
		}

	@staticmethod
	def build_from_config(config):
		return MBConvLayer(**config)


class ResidualBlock(MyModule):

	def __init__(self, conv, shortcut):
		super(ResidualBlock, self).__init__()

		self.conv = conv
		self.shortcut = shortcut

	def forward(self, x):
		if self.conv is None or isinstance(self.conv, ZeroLayer):
			res = x
		elif self.shortcut is None or isinstance(self.shortcut, ZeroLayer):
			res = self.conv(x)
		else:
			res = self.conv(x) + self.shortcut(x)
		return res

	@property
	def module_str(self):
		return '(%s, %s)' % (
			self.conv.module_str if self.conv is not None else None,
			self.shortcut.module_str if self.shortcut is not None else None
		)

	@property
	def config(self):
		return {
			'name': ResidualBlock.__name__,
			'conv': self.conv.config if self.conv is not None else None,
			'shortcut': self.shortcut.config if self.shortcut is not None else None,
		}

	@staticmethod
	def build_from_config(config):
		conv_config = config['conv'] if 'conv' in config else config['mobile_inverted_conv']
		conv = set_layer_from_config(conv_config)
		shortcut = set_layer_from_config(config['shortcut'])
		return ResidualBlock(conv, shortcut)

	@property
	def mobile_inverted_conv(self):
		return self.conv


class ResNetBottleneckBlock(MyModule):

	def __init__(self, in_channels, out_channels,
	             kernel_size=3, stride=1, expand_ratio=0.25, mid_channels=None, act_func='relu', groups=1,
	             downsample_mode='avgpool_conv'):
		super(ResNetBottleneckBlock, self).__init__()

		self.in_channels = in_channels
		self.out_channels = out_channels

		self.kernel_size = kernel_size
		self.stride = stride
		self.expand_ratio = expand_ratio
		self.mid_channels = mid_channels
		self.act_func = act_func
		self.groups = groups

		self.downsample_mode = downsample_mode

		if self.mid_channels is None:
			feature_dim = round(self.out_channels * self.expand_ratio)
		else:
			feature_dim = self.mid_channels

		feature_dim = make_divisible(feature_dim, MyNetwork.CHANNEL_DIVISIBLE)
		self.mid_channels = feature_dim

		# build modules
		self.conv1 = nn.Sequential(OrderedDict([
			('conv', nn.Conv2d(self.in_channels, feature_dim, 1, 1, 0, bias=False)),
			('bn', nn.BatchNorm2d(feature_dim)),
			('act', build_activation(self.act_func, inplace=True)),
		]))

		pad = get_same_padding(self.kernel_size)
		self.conv2 = nn.Sequential(OrderedDict([
			('conv', nn.Conv2d(feature_dim, feature_dim, kernel_size, stride, pad, groups=groups, bias=False)),
			('bn', nn.BatchNorm2d(feature_dim)),
			('act', build_activation(self.act_func, inplace=True))
		]))

		self.conv3 = nn.Sequential(OrderedDict([
			('conv', nn.Conv2d(feature_dim, self.out_channels, 1, 1, 0, bias=False)),
			('bn', nn.BatchNorm2d(self.out_channels)),
		]))

		if stride == 1 and in_channels == out_channels:
			self.downsample = IdentityLayer(in_channels, out_channels)
		elif self.downsample_mode == 'conv':
			self.downsample = nn.Sequential(OrderedDict([
				('conv', nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False)),
				('bn', nn.BatchNorm2d(out_channels)),
			]))
		elif self.downsample_mode == 'avgpool_conv':
			self.downsample = nn.Sequential(OrderedDict([
				('avg_pool', nn.AvgPool2d(kernel_size=stride, stride=stride, padding=0, ceil_mode=True)),
				('conv', nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False)),
				('bn', nn.BatchNorm2d(out_channels)),
			]))
		else:
			raise NotImplementedError

		self.final_act = build_activation(self.act_func, inplace=True)

	def forward(self, x):
		residual = self.downsample(x)

		x = self.conv1(x)
		x = self.conv2(x)
		x = self.conv3(x)

		x = x + residual
		x = self.final_act(x)
		return x

	@property
	def module_str(self):
		return '(%s, %s)' % (
			'%dx%d_BottleneckConv_%d->%d->%d_S%d_G%d' % (
				self.kernel_size, self.kernel_size, self.in_channels, self.mid_channels, self.out_channels,
				self.stride, self.groups
			),
			'Identity' if isinstance(self.downsample, IdentityLayer) else self.downsample_mode,
		)

	@property
	def config(self):
		return {
			'name': ResNetBottleneckBlock.__name__,
			'in_channels': self.in_channels,
			'out_channels': self.out_channels,
			'kernel_size': self.kernel_size,
			'stride': self.stride,
			'expand_ratio': self.expand_ratio,
			'mid_channels': self.mid_channels,
			'act_func': self.act_func,
			'groups': self.groups,
			'downsample_mode': self.downsample_mode,
		}

	@staticmethod
	def build_from_config(config):
		return ResNetBottleneckBlock(**config)

In [25]:
base_resolution = 224

In [None]:
# block, input_size, in_channels, out_channels, expand_ratio, kernel_size, stride, act, se

configurations = [
			(ConvLayer, base_resolution, 3, 16, 3, 2, 'relu'),
			(ResidualBlock, base_resolution // 2, 16, 16, [1], [3, 5, 7], 1, 'relu', False),
			(ResidualBlock, base_resolution // 2, 16, 24, [3, 4, 6], [3, 5, 7], 2, 'relu', False),
			(ResidualBlock, base_resolution // 4, 24, 24, [3, 4, 6], [3, 5, 7], 1, 'relu', False),
			(ResidualBlock, base_resolution // 4, 24, 24, [3, 4, 6], [3, 5, 7], 1, 'relu', False),
			(ResidualBlock, base_resolution // 4, 24, 24, [3, 4, 6], [3, 5, 7], 1, 'relu', False),
			(ResidualBlock, base_resolution // 4, 24, 40, [3, 4, 6], [3, 5, 7], 2, 'relu', True),
			(ResidualBlock, base_resolution // 8, 40, 40, [3, 4, 6], [3, 5, 7], 1, 'relu', True),
			(ResidualBlock, base_resolution // 8, 40, 40, [3, 4, 6], [3, 5, 7], 1, 'relu', True),
			(ResidualBlock, base_resolution // 8, 40, 40, [3, 4, 6], [3, 5, 7], 1, 'relu', True),
			(ResidualBlock, base_resolution // 8, 40, 80, [3, 4, 6], [3, 5, 7], 2, 'h_swish', False),
			(ResidualBlock, base_resolution // 16, 80, 80, [3, 4, 6], [3, 5, 7], 1, 'h_swish', False),
			(ResidualBlock, base_resolution // 16, 80, 80, [3, 4, 6], [3, 5, 7], 1, 'h_swish', False),
			(ResidualBlock, base_resolution // 16, 80, 80, [3, 4, 6], [3, 5, 7], 1, 'h_swish', False),
			(ResidualBlock, base_resolution // 16, 80, 112, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 16, 112, 112, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 16, 112, 112, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 16, 112, 112, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 16, 112, 160, [3, 4, 6], [3, 5, 7], 2, 'h_swish', True),
			(ResidualBlock, base_resolution // 32, 160, 160, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 32, 160, 160, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ResidualBlock, base_resolution // 32, 160, 160, [3, 4, 6], [3, 5, 7], 1, 'h_swish', True),
			(ConvLayer, base_resolution // 32, 160, 960, 1, 1, 'h_swish'),
			(ConvLayer, 1, 960, 1280, 1, 1, 'h_swish'),
			(LinearLayer, 1, 1280, 1000, 1, 1),
		]

In [None]:
# block, 

In [None]:
{
    'Conv2D': [],
    'BatchNorm2d': [],
    'ReLU': [],
    'MaxPool2d': [],

}

In [24]:
from ofa.utils import *

ImportError: cannot import name 'queue' from 'torch._six' (/home/shikhar.srivastava/miniconda3/envs/pytorch_19/lib/python3.7/site-packages/torch/_six.py)

In [None]:
for layer_idx in range(len(configurations)):
	config = configurations[layer_idx]
	op_type = config[0]
	if op_type == ResidualBlock:
		_, input_size, in_channels, out_channels, expand_list, ks_list, stride, act, se = config
		in_channels = int(round(in_channels * self.multiplier))
		out_channels = int(round(out_channels * self.multiplier))
		template_config = {
			'name': ResidualBlock.__name__,
			'mobile_inverted_conv': {
				'name': MBConvLayer.__name__,
				'in_channels': in_channels,
				'out_channels': out_channels,
				'kernel_size': kernel_size,
				'stride': stride,
				'expand_ratio': 0,
				# 'mid_channels': None,
				'act_func': act,
				'use_se': se,
			},
			'shortcut': {
				'name': IdentityLayer.__name__,
				'in_channels': in_channels,
				'out_channels': out_channels,
			} if (in_channels == out_channels and stride == 1) else None
		}

	elif op_type == ConvLayer:
		_, input_size, in_channels, out_channels, kernel_size, stride, activation = config
		in_channels = int(round(in_channels * self.multiplier))
		out_channels = int(round(out_channels * self.multiplier))
		build_config = {
			# 'name': ConvLayer.__name__,
			'in_channels': in_channels,
			'out_channels': out_channels,
			'kernel_size': kernel_size,
			'stride': stride,
			'dilation': 1,
			'groups': 1,
			'bias': False,
			'use_bn': True,
			'has_shuffle': False,
			'act_func': activation,
		}
		layer = ConvLayer.build_from_config(build_config)
		input_shape = (batch_size, in_channels, input_size, input_size)

		if self.pred_type == 'flops':
			measure_result = self.measure_single_layer_flops(layer, input_shape) / batch_size
		elif self.pred_type == 'latency':
			measure_result = self.measure_single_layer_latency(layer, input_shape)

		efficiency_dict['other_blocks'][layer_idx] = measure_result

	elif op_type == LinearLayer:
		_, input_size, in_channels, out_channels, kernel_size, stride = config
		in_channels = int(round(in_channels * self.multiplier))
		out_channels = int(round(out_channels * self.multiplier))
		build_config = {
			# 'name': LinearLayer.__name__,
			'in_features': in_channels,
			'out_features': out_channels
		}

In [1]:
import torch

# Read pt file from path 
#path = '/l/users/shikhar.srivastava/data/ofa/www.dropbox.com/s/1qbsgjqanxgw2ji/p_m_train.pt'
path ='/l/users/shikhar.srivastava/data/ofa/www.dropbox.com/s/gb0xe86zulsjs99/p_mod_zoo.pt'
files = torch.load(path)

In [2]:
files.keys()

dict_keys(['dataset', 'acc', 'n_params', 'topol', 'f_emb'])

In [10]:
files['topol'][0]

[5,
 5,
 3,
 7,
 3,
 3,
 5,
 5,
 7,
 5,
 3,
 5,
 3,
 5,
 7,
 3,
 7,
 3,
 7,
 3,
 4,
 3,
 4,
 3,
 4,
 4,
 3,
 3,
 3,
 6,
 4,
 3,
 6,
 3,
 6,
 4,
 4,
 6,
 6,
 6,
 4,
 3,
 2,
 3,
 2]

In [18]:
files['f_emb']

[tensor([0.6876, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([ 0.2272, -0.3029, -0.1857,  ..., -0.1262, -0.0000, -0.3728]),
 tensor([-0.2580, -0.0000, -0.0000,  ..., -0.0000, -0.0000,  1.3191]),
 tensor([73.9549, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([7.1190, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([-0.0000, -0.0000, -0.0000,  ..., -0.0000, 1.5584, -0.0000]),
 tensor([ 3.9480, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0483]),
 tensor([15.1548, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([-0., -0., -0.,  ..., -0., -0., -0.]),
 tensor([-0.1450, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([12.9656, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000]),
 tensor([-0.3112,  1.1543, -0.0000,  ..., -0.0000, -0.3218, -0.0000]),
 tensor([ 1.0323, -0.0000, -0.3701,  ..., -0.0000, -0.0000, -0.1281]),
 tensor([-0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.3743]),
 tensor([ -0.0000, 369.4255,  -0.

In [7]:
import pandas as pd

In [12]:
pd.DataFrame(files['topol'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,35,36,37,38,39,40,41,42,43,44
0,5,5,3,7,3,3,5,5,7,5,...,4,4,6,6,6,4,3,2,3,2
1,5,3,5,3,7,7,5,5,7,5,...,3,6,6,6,6,2,2,3,4,2
2,3,3,7,7,5,5,7,3,5,3,...,6,6,3,6,6,4,2,2,3,2
3,7,7,7,7,3,5,7,3,3,7,...,3,6,6,4,4,3,2,4,4,2
4,7,5,3,7,5,5,3,7,3,5,...,6,4,4,6,4,3,3,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13995,3,5,5,5,3,5,3,7,5,7,...,4,6,6,6,6,2,3,2,2,2
13996,3,5,3,5,5,7,5,3,7,7,...,6,3,3,3,6,2,2,3,4,3
13997,7,3,5,5,7,5,5,5,7,5,...,6,6,4,6,4,3,4,2,4,4
13998,7,7,7,5,3,5,7,7,5,3,...,6,6,6,6,4,4,2,2,4,3
