# Building a Network with Pytorch

A key feature in Pytorch is the *module*, which encapsulate computational units like layers and networks.

A module is defined as a subclass of `nn.module`, which has a `.__init__(self)` method (of course) and a `.forward(self, x)` method:

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# A layer

class MyLinear(nn.Module):
    
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_features, out_features))
        self.bias = nn.Parameter(torch.randn(out_features))

    def forward(self, input):
        return (input @ self.weight) + self.bias
    
# A network

class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.l0 = MyLinear(4, 3)   # use self-defined layers
        self.l1 = nn.Linear(3, 1)  # use predefined layers
        
    def forward(self, x):
        x = self.l0(x)
        x = F.relu(x)
        x = self.l1(x)
        return x

- In `.__init__`, you initialize the module and define the trainable modules/parameters
  
  - The immediate submodules can be shown by generators `.children()` or `.named_children()`, and all submodules can be shown recursively by `.modules()` or `.named_modules()`

  - All parameters can be shown by `.parameters()` or `.named_parameters()`
  
- In `.forward()`, you define the computational flow of the module

In [7]:
myNet = Net()

print('\nChildren:\n')
for Child in myNet.children():
    print(Child)

print('\nNamed_modules:\n')
for Module in myNet.named_modules():
    print(Module)

print('\nParameters:\n')
for Para in myNet.parameters():
    print(Para)

print('\nNamed_parameters:\n')
for Para in myNet.named_parameters():
    print(Para)


Children:

MyLinear()
Linear(in_features=3, out_features=1, bias=True)

Named_modules:

('', Net(
  (l0): MyLinear()
  (l1): Linear(in_features=3, out_features=1, bias=True)
))
('l0', MyLinear())
('l1', Linear(in_features=3, out_features=1, bias=True))

Parameters:

Parameter containing:
tensor([[ 0.1162, -2.2683,  0.5358],
        [-0.2739, -0.7976, -0.3329],
        [ 0.1297, -1.6810, -0.6916],
        [ 0.6894,  0.7266,  0.3310]], requires_grad=True)
Parameter containing:
tensor([ 0.2122,  0.3660, -0.0027], requires_grad=True)
Parameter containing:
tensor([[ 0.4613,  0.2316, -0.2788]], requires_grad=True)
Parameter containing:
tensor([0.0131], requires_grad=True)

Named_parameters:

('l0.weight', Parameter containing:
tensor([[ 0.1162, -2.2683,  0.5358],
        [-0.2739, -0.7976, -0.3329],
        [ 0.1297, -1.6810, -0.6916],
        [ 0.6894,  0.7266,  0.3310]], requires_grad=True))
('l0.bias', Parameter containing:
tensor([ 0.2122,  0.3660, -0.0027], requires_grad=True))
('l1.we

You can use modules dynamically with the help of `.ModuleList()` or `.ModuleDict()`

In [8]:
class DynamicNet(nn.Module):
    
    def __init__(self, num_layers):
        super().__init__()
        self.linears = nn.ModuleList(
            [MyLinear(4, 4) for _ in range(num_layers)])
        self.activations = nn.ModuleDict({
            'relu': nn.ReLU(),
            'lrelu': nn.LeakyReLU()
        })
        self.final = MyLinear(4, 1)

    def forward(self, x, act):
        for linear in self.linears:
            x = linear(x)
        x = self.activations[act](x)
        x = self.final(x)
        return x

dynamic_net = DynamicNet(3)
sample_input = torch.randn(4)
output = dynamic_net(sample_input, 'relu')

print('\nNamed_modules:\n')
for Module in dynamic_net.named_modules():
    print(Module)

print('\nNamed_parameters:\n')
for Para in dynamic_net.named_parameters():
    print(Para)


Named_modules:

('', DynamicNet(
  (linears): ModuleList(
    (0): MyLinear()
    (1): MyLinear()
    (2): MyLinear()
  )
  (activations): ModuleDict(
    (relu): ReLU()
    (lrelu): LeakyReLU(negative_slope=0.01)
  )
  (final): MyLinear()
))
('linears', ModuleList(
  (0): MyLinear()
  (1): MyLinear()
  (2): MyLinear()
))
('linears.0', MyLinear())
('linears.1', MyLinear())
('linears.2', MyLinear())
('activations', ModuleDict(
  (relu): ReLU()
  (lrelu): LeakyReLU(negative_slope=0.01)
))
('activations.relu', ReLU())
('activations.lrelu', LeakyReLU(negative_slope=0.01))
('final', MyLinear())

Named_parameters:

('linears.0.weight', Parameter containing:
tensor([[ 0.1599,  0.4401, -0.8715,  1.4173],
        [ 0.3116, -0.0307,  0.2817,  0.8797],
        [-0.2651, -0.1729, -0.5786,  0.3769],
        [-0.8076,  1.2252, -1.8202,  0.3763]], requires_grad=True))
('linears.0.bias', Parameter containing:
tensor([-1.0593,  0.0390, -1.2481,  2.0366], requires_grad=True))
('linears.1.weight', Param

To train a network:

- Register the parameters into an optimizer

- Repeat the following:

  - Feed one sample into the network

  - Calculate the loss

  - Clear current gradient, then BP
  
  - Update the optimizer state (i.e. the parameters)

In [9]:
# Create the network (from previous section) and optimizer
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)

# Run a sample training loop that "teaches" the network
# to output the constant zero function
for _ in range(10000):
    input = torch.randn(4)
    output = net(input)
    loss = torch.abs(output)
    net.zero_grad()
    loss.backward()
    optimizer.step()