In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
widenet = nn.Sequential(
    nn.Linear(2, 4),
    nn.Linear(4, 3)
)

deepnet = nn.Sequential(
    nn.Linear(2, 2),
    nn.Linear(2, 2),
    nn.Linear(2, 3)
)

print(widenet, end='\n\n')
print(deepnet)

Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=4, out_features=3, bias=True)
)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
  (2): Linear(in_features=2, out_features=3, bias=True)
)


In [3]:
for p in deepnet.named_parameters():
    print(p, end='\n\n')

('0.weight', Parameter containing:
tensor([[ 0.2695,  0.5613],
        [-0.0740,  0.6155]], requires_grad=True))

('0.bias', Parameter containing:
tensor([ 0.2429, -0.0915], requires_grad=True))

('1.weight', Parameter containing:
tensor([[ 0.6339, -0.0241],
        [-0.1968,  0.3195]], requires_grad=True))

('1.bias', Parameter containing:
tensor([-0.4535, -0.6919], requires_grad=True))

('2.weight', Parameter containing:
tensor([[-0.5542,  0.0681],
        [-0.3084, -0.2567],
        [ 0.5060, -0.6863]], requires_grad=True))

('2.bias', Parameter containing:
tensor([ 0.2320,  0.6932, -0.1243], requires_grad=True))



In [4]:
numNodeInWide = 0
for p in widenet.named_parameters():

    if 'bias' in p[0]:
        numNodeInWide += len(p[1])

numNodeInDeep = 0
for paramName, paramVect in widenet.named_parameters():

    if 'bias' in paramName:
        numNodeInDeep += len(paramVect)

print('There are %s nodes in the wide network.' %numNodeInWide)
print('There are %s nodes in the deep network.' %numNodeInDeep)

There are 7 nodes in the wide network.
There are 7 nodes in the deep network.


In [5]:
for p in deepnet.parameters():
    print(p, end='\n\n')

Parameter containing:
tensor([[ 0.2695,  0.5613],
        [-0.0740,  0.6155]], requires_grad=True)

Parameter containing:
tensor([ 0.2429, -0.0915], requires_grad=True)

Parameter containing:
tensor([[ 0.6339, -0.0241],
        [-0.1968,  0.3195]], requires_grad=True)

Parameter containing:
tensor([-0.4535, -0.6919], requires_grad=True)

Parameter containing:
tensor([[-0.5542,  0.0681],
        [-0.3084, -0.2567],
        [ 0.5060, -0.6863]], requires_grad=True)

Parameter containing:
tensor([ 0.2320,  0.6932, -0.1243], requires_grad=True)



In [6]:
# Number of trainable parameters

nparams = 0

for p in widenet.parameters():

    if p.requires_grad:
        print('This piece has %s parameters.'%p.numel())
        nparams += p.numel()

print('\n\nTotal of %s parameters'%nparams)

This piece has 8 parameters.
This piece has 4 parameters.
This piece has 12 parameters.
This piece has 3 parameters.


Total of 27 parameters


In [7]:
nparams = sum([p.numel() for p in widenet.parameters() if p.requires_grad ])
print('Widenet has %s parameter.'%nparams)

nparams = sum([p.numel() for p in deepnet.parameters() if p.requires_grad ])
print('Deepnet has %s parameter.'%nparams)

Widenet has 27 parameter.
Deepnet has 21 parameter.


In [11]:
import torch
import torch.nn as nn
from torch.autograd import Variable

from collections import OrderedDict
import numpy as np


def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
    result, params_info = summary_string(
        model, input_size, batch_size, device, dtypes)
    print(result)

    return params_info


def summary_string(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
    if dtypes == None:
        dtypes = [torch.FloatTensor]*len(input_size)

    summary_str = ''

    def register_hook(module):
        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        if (
            not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
        ):
            hooks.append(module.register_forward_hook(hook))

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of 2 for batchnorm
    x = [torch.rand(2, *in_size).type(dtype).to(device=device)
         for in_size, dtype in zip(input_size, dtypes)]

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    # print(x.shape)
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    summary_str += "----------------------------------------------------------------" + "\n"
    line_new = "{:>20}  {:>25} {:>15}".format(
        "Layer (type)", "Output Shape", "Param #")
    summary_str += line_new + "\n"
    summary_str += "================================================================" + "\n"
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]

        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        summary_str += line_new + "\n"

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(sum(input_size, ()))
                           * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. /
                            (1024 ** 2.))  # x2 for gradients
    total_params_size = abs(total_params * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    summary_str += "================================================================" + "\n"
    summary_str += "Total params: {0:,}".format(total_params) + "\n"
    summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n"
    summary_str += "Non-trainable params: {0:,}".format(total_params -
                                                        trainable_params) + "\n"
    summary_str += "----------------------------------------------------------------" + "\n"
    summary_str += "Input size (MB): %0.2f" % total_input_size + "\n"
    summary_str += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n"
    summary_str += "Params size (MB): %0.2f" % total_params_size + "\n"
    summary_str += "Estimated Total Size (MB): %0.2f" % total_size + "\n"
    summary_str += "----------------------------------------------------------------" + "\n"
    # return summary
    return summary_str, (total_params, trainable_params)

In [13]:
summary(widenet, (1, 2), device=torch.device('cpu'))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 4]              12
            Linear-2                 [-1, 1, 3]              15
Total params: 27
Trainable params: 27
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------



(tensor(27), tensor(27))