In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import torch

from torchinfo import summary
from tabulate import tabulate
import torchvision.models as models
from pprint import pprint

In [2]:
from mobilenet import MobileNet
from mobilenetv2 import MobileNetV2
from resnet import *
from densenet import DenseNet3
from pre_resnet import *
from lenet import LeNet
from vgg import VGG
from wrn import *
from pytorch_modelsize import *
from densenet2 import *
from resnet_cifar10 import resnet20, resnet32, resnet56, resnet110
from resnet_cifar10_sequential import resnet20 as resnet20_seq, resnet32 as resnet32_seq
from resnet164 import resnet_164, resnet_110
from react_resnet import Network as PreActResNet
# the only concern is the MobileNet, however
# 1. we don't need to compare against them, they don't even report a quantization bit width. and don't even known whether they quantize their feature? 
# 2. their result doesn't seem so trustworthy
# 3. their paper is only an arxiv one.
# 4. we just need to redo our experiment with mobilenet, then we definitely beat them.

# 91.44

In [26]:
models.Mob

AttributeError: module 'torchvision.models' has no attribute 'Mob'

In [3]:
results = {}

for name, net in [
                    ('lenet', LeNet()),
                    ('mobilenetv2', MobileNetV2()), 
                    ('mobilenet', MobileNet()),
                    ('resnet20', resnet20()),
                    ('resnet32', resnet32()),
                    ('resnet56', resnet56()),
                    ('resnet110', resnet110()),
                    ('resnet20_seq', resnet20_seq()),
                    ('resnet32_seq', resnet32_seq()),
                    ('densenet100', DenseNet3(100, 10, reduction=1.0)),
                    ('densenet76', DenseNet3(76, 10, reduction=1.0)),
                    ('densenet40', DenseNet3(40, 12, reduction=1.0)),
                    ('densenet121', DenseNet3(121, 10, reduction=1.0)),
                    ('pre_resnet110', PreActResNet(110)),
                    ('pre_resnet164', PreActResNet(164)),
                    ('vgg11', VGG('VGG11')),
                    ('vgg13', VGG('VGG13')),
                    ('vgg16', VGG('VGG16')),
                    ('vgg19', VGG('VGG19')),
                    ('wrn-22-8', Wide_ResNet(22, 8,0,10))
                 ]:
    s = summary(net, (1, 3, 32, 32))
    macs = s.to_readable(s.total_mult_adds)
    results[name] = {
        "# Params": f'{s.float_to_megabytes(s.total_params):.02f}M', 
        "# Features": f'{s.float_to_megabytes(s.total_output):.02f}M', 
        "FLOPS": f'{macs[1]:.02f}{macs[0]}',
        'stat': {
            'w': s.total_params,
            'f': s.total_output 
        }
    }

s = tabulate([[k, v['# Params'], v["# Features"], v["FLOPS"]]  for k, v in results.items()], headers=["Network", "# Params", "# Features", "FLOPS"])
print("\n\n")
print(s)

| Wide-Resnet 22x8



Network        # Params    # Features    FLOPS
-------------  ----------  ------------  -------
lenet          0.25M       0.05M         0.66M
mobilenetv2    9.19M       27.60M        91.19M
mobilenet      12.87M      6.59M         46.38M
resnet20       1.09M       3.21M         40.81M
resnet32       1.87M       5.05M         69.13M
resnet56       3.42M       8.72M         125.75M
resnet110      6.92M       16.97M        253.16M
resnet20_seq   1.09M       3.21M         40.81M
resnet32_seq   1.87M       5.05M         69.13M
densenet100    4.84M       53.34M        376.87M
densenet76     2.98M       35.32M        239.64M
densenet40     1.00M       14.36M        88.00M
densenet121    6.53M       68.99M        498.75M
pre_resnet110  6.92M       16.88M        253.16M
pre_resnet164  10.42M      25.13M        380.56M
vgg11          36.92M      2.42M         152.93M
vgg13          37.66M      4.00M         228.52M
vgg16          58.91M      4.42M         313.49M
vgg19    

In [5]:
def get_mem(net, wbits, fbits, wsp, fsp):
    stat = results[net]['stat']
#     print(stat)
    return stat['w'] * wbits * (1-wsp), stat['f'] * fbits * (1-fsp)

def nice_p(*args):
    param, feat = args[0]
    pd = args[1]
    print(f'{param/ 1e6:0.02f} & {feat/ 1e6:0.02f} & {pd * 1e6:0.02f}')

In [6]:
v = get_mem('lenet', 32, 32, 0, 0)# / 1e6 
    
nice_p(v, 67.52/sum(v))

1.98 & 0.42 & 28.12


In [13]:
#(Choi et al., 2016)
v = get_mem('resnet32', 8, 16, 0.778, 0) #/ 1e6 

nice_p(v, 92.64/sum(v))

0.83 & 20.19 & 4.41


In [14]:
# (Achterhold et al., 2018)

v = get_mem('densenet76', 2, 16, 0.54, 0)# / 1e6 
    
nice_p(v, 91.17/sum(v))

0.68 & 141.26 & 0.64


In [15]:
  # (Liu et al., 2018) 1
    
v = get_mem('vgg19', 16, 16, 0.95, 0) 

nice_p(v, 93.34/sum(v))

16.03 & 19.40 & 2.63


In [16]:
  # (Liu et al., 2018) 2
    
v = get_mem('pre_resnet110', 16, 16, 0.95, 0)# / 1e6 

nice_p(v, 92.53/sum(v))

1.38 & 67.50 & 1.34


In [17]:
  # (Liu et al., 2018) 3
    
v = get_mem('densenet100', 16, 16, 0.95, 0) #/ 1e6 

nice_p(v, 94.19/sum(v))

0.97 & 213.37 & 0.44


In [18]:
  # (Liu et al., 2018) 4

v = get_mem('vgg16', 16, 16, 0.7, 0.7) 

nice_p(v, 93.6/sum(v))

70.70 & 5.31 & 1.23


In [19]:
  # (Liu et al., 2018) 5

v = get_mem('pre_resnet164', 16, 16, 0.6, 0.6) 

nice_p(v, 94.23/sum(v))

16.67 & 40.21 & 1.66


In [20]:
  # (Liu et al., 2018) 6

v = get_mem('densenet40', 16, 16, 0.6, 0.6) #/ 1e6 
    
nice_p(v, 93.87/sum(v))

1.60 & 22.97 & 3.82


In [21]:
# n43_zhao2019variational 1

v = get_mem('densenet40', 16, 16, 0.6, 0.6) #/ 1e6 
    
nice_p(v, 93.16/sum(v))

1.60 & 22.97 & 3.79


In [22]:
# n43_zhao2019variational 2

v = get_mem('resnet20', 16, 16, 0.38, 0.38)  #(Yang et al., 2020)

nice_p(v, 91.66/sum(v))

2.70 & 7.96 & 8.59


In [23]:
# n43_zhao2019variational 3

v = get_mem('resnet56', 16, 16, 0.45, 0.45)  #(Yang et al., 2020)

nice_p(v, 92.26/sum(v))

7.53 & 19.18 & 3.45


In [24]:
# n43_zhao2019variational 4

v = get_mem('resnet110', 16, 16, 0.63, 0.63)  #(Yang et al., 2020)

nice_p(v,  92.96/sum(v))

10.25 & 25.12 & 2.63


In [25]:
v = get_mem('vgg16', 16, 16, 0.788, 0.788)  #(Xiao & Wang, 2019)

nice_p(v, 91.5/sum(v))

49.96 & 3.75 & 1.70


In [26]:
#(Dettmers & Zettlemoyer, 2019)  1

v = get_mem('vgg16', 16, 16, 0.95, 0) #/ 1e6 

nice_p(v, 93/sum(v))

11.78 & 17.70 & 3.15


In [27]:
#(Dettmers & Zettlemoyer, 2019)  2
v = get_mem('wrn-22-8', 16, 16, 0.95, 0) #/ 1e6 

nice_p(v, 95.07/sum(v))

13.73 & 115.34 & 0.74


In [28]:
v = get_mem('resnet20', 1.9, 16, 0.54, 0)  #(Yang et al., 2020)

nice_p(v, 91.15/sum(v))

0.24 & 12.85 & 6.97


In [36]:
v = get_mem('vgg11', 4.75, 5.4, 0, 0)  #(van Baalen et al., 2020)

nice_p(v, 93.23/sum(v))

43.85 & 3.27 & 1.98


In [37]:
v = get_mem('mobilenet', 8, 8, 0, 0)  #(Paupamah et al., 2020a)

nice_p(v, 90.59/sum(v))

25.74 & 13.17 & 2.33


In [29]:
#(Choi et al., 2020)
v = get_mem('resnet32', 8, 16, 0.875, 0) #/ 1e6 
    
nice_p(v, 92.57/sum(v))

0.47 & 20.19 & 4.48


------------

In [39]:
# ours 1
v = get_mem('resnet32', 8, 8, 0.5, 0.5) #/ 1e6 
    
nice_p(v, 90.16/sum(v))

1.87 & 5.05 & 13.04


In [40]:
# ours 2
v = get_mem('resnet32', 8, 8, 0.75, 0.75) #/ 1e6 
    
nice_p(v, 83.7/sum(v))

0.93 & 2.52 & 24.21


In [41]:
# ours 3
v = get_mem('resnet32', 4, 4, 0.5, 0.5) #/ 1e6 
    
nice_p(v, 87.3/sum(v))

0.93 & 2.52 & 25.25


In [42]:
# ours 4
v = get_mem('resnet32', 4, 4, 0.75, 0.75) #/ 1e6 
    
nice_p(v, 79.2/sum(v))

0.47 & 1.26 & 45.82


In [9]:
# ours 4
v = get_mem('resnet32', 8, 8, 0.4, 0.4) #/ 1e6 
    
nice_p(v, 91.77/sum(v))

2.24 & 6.06 & 11.06


In [10]:
v = get_mem('resnet32', 8, 8, 0.25, 0.25) #/ 1e6 
    
nice_p(v, 92.53/sum(v))

2.80 & 7.57 & 8.92


In [12]:
# v = get_mem('resnet32', 8, 8, 0, 0) #/ 1e6 
    
# nice_p(v, 92.53/sum(v))

# Performance Density for Cifar10

In [55]:
 92.6 / sum(get_mem('mobilenetv2', 32, 32, 0, 0)) * 1e6

0.31465248121762546

In [56]:
91.87/ sum(get_mem('mobilenetv2', 32, 32, 0, 0.5)) * 1e6

0.4995725438279609

In [57]:
91.44 / sum(get_mem('mobilenetv2', 8, 8, 0, 0.5)) * 1e6

1.9889371245293894

In [58]:
 86.84  / sum(get_mem('mobilenetv2', 8, 8, 0.5, 0.5) ) * 1e6

2.3606411636232045

In [59]:
85.94 / sum(get_mem('mobilenetv2', 8, 8, 0.5, 0) )*1e6

1.3347716686217639