In [1]:
import numpy as np
import torch
import torch.nn as nn

In [8]:

# wide network: having more number of units
widenet = nn.Sequential(
    nn.Linear(2,4),
    nn.Linear(4,3)
)

# deep network: having more number of layers
deepnet = nn.Sequential(
    nn.Linear(2,4),
    nn.Linear(4,2),
    nn.Linear(2,3)
)

print(widenet)
print(' ')
print(deepnet)

Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=4, out_features=3, bias=True)
)
 
Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=4, out_features=2, bias=True)
  (2): Linear(in_features=2, out_features=3, bias=True)
)


In [9]:
# check parameters
for p in deepnet.named_parameters():
    print(p)
    print(" ")

('0.weight', Parameter containing:
tensor([[ 0.5606, -0.5118],
        [-0.1968,  0.6088],
        [-0.3864,  0.5486],
        [-0.1962,  0.0984]], requires_grad=True))
 
('0.bias', Parameter containing:
tensor([-0.2963,  0.0128,  0.1860, -0.7004], requires_grad=True))
 
('1.weight', Parameter containing:
tensor([[-0.4420,  0.2857, -0.4000, -0.1130],
        [ 0.3113, -0.4394,  0.4108, -0.1991]], requires_grad=True))
 
('1.bias', Parameter containing:
tensor([0.3584, 0.2066], requires_grad=True))
 
('2.weight', Parameter containing:
tensor([[-0.0228,  0.4942],
        [ 0.5134, -0.4814],
        [-0.6418, -0.1662]], requires_grad=True))
 
('2.bias', Parameter containing:
tensor([-0.5471, -0.0544, -0.0927], requires_grad=True))
 


In [10]:
# count the number of nodes ( = the number of biases, because each node has 1 bias)

numNodesInWide = 0
for p in widenet.named_parameters():
    if 'bias' in p[0]:
        numNodesInWide += len(p[1])


numNodesInDeep = 0
for p in deepnet.named_parameters():
    if 'bias' in p[0]:
        numNodesInDeep += len(p[1])

print('number of nodes in wide network = %s' %numNodesInWide)
print('number of nodes in deep network = %s' %numNodesInDeep)


number of nodes in wide network = 7
number of nodes in deep network = 9


In [12]:
# priniting just the parameters, not the 'named' parametes
for p in widenet.parameters():
    print(p)

Parameter containing:
tensor([[-0.6430,  0.0252],
        [-0.3161,  0.0916],
        [-0.1681,  0.6442],
        [-0.2973,  0.2794]], requires_grad=True)
Parameter containing:
tensor([-0.2417,  0.6168,  0.4603, -0.0376], requires_grad=True)
Parameter containing:
tensor([[-0.0518,  0.3019, -0.4385, -0.0741],
        [ 0.1513, -0.0060,  0.1908, -0.3846],
        [ 0.0278, -0.1474, -0.2976,  0.1015]], requires_grad=True)
Parameter containing:
tensor([-0.2698, -0.3144,  0.3840], requires_grad=True)


In [13]:
# number of trainable parameters ( = requires_grad==True 
#                                    because when requires_grad is True then backpropagation updates the parameters )

numTrainParams = 0

for p in widenet.parameters():
    if p.requires_grad:
        numTrainParams += p.numel() # numel returns the number of elements in the tensor
print(numTrainParams)

27


In [15]:
# another way
numTrainParams = np.sum([p.numel() for p in widenet.parameters() if p.requires_grad])
print(numTrainParams)

27


In [41]:
from pytorch_model_summary import summary
print(summary(widenet,torch.zeros((3, 2)),show_hierarchical=True))


-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Linear-1              [3, 4]              12              12
          Linear-2              [3, 3]              15              15
Total params: 27
Trainable params: 27
Non-trainable params: 0
-----------------------------------------------------------------------



Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True), 12 params
  (1): Linear(in_features=4, out_features=3, bias=True), 15 params
), 27 params



