In [1]:
import pandas as pd
import torch
from torch import nn
from torch.nn import functional as F

## 5.1 Layers and Blocks

In [2]:
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(),nn.Linear(256,10))
X = torch.rand(2,20)
print(X)
net(X)

tensor([[0.9883, 0.0499, 0.0821, 0.6818, 0.9496, 0.0721, 0.6382, 0.2691, 0.9100,
         0.5898, 0.5419, 0.7953, 0.2514, 0.1650, 0.3465, 0.0815, 0.8157, 0.4499,
         0.8600, 0.5660],
        [0.3080, 0.5981, 0.0886, 0.2571, 0.1834, 0.6113, 0.6273, 0.3141, 0.5799,
         0.3765, 0.0710, 0.7928, 0.0979, 0.5826, 0.1557, 0.4331, 0.6096, 0.9529,
         0.7887, 0.4821]])


tensor([[ 0.1634, -0.0182,  0.2317,  0.1389, -0.1142, -0.0756,  0.1030,  0.1450,
          0.2843, -0.2025],
        [ 0.0634, -0.0206,  0.2404,  0.1266, -0.0476, -0.1267,  0.0138,  0.1316,
          0.2594, -0.1947]], grad_fn=<AddmmBackward>)

In [3]:
class MLP(nn.Module):
    '''
    Notice how we are using forward
    '''
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [4]:
mlp = MLP()
mlp(X)

tensor([[-0.0485, -0.1471,  0.3074, -0.2178,  0.2423, -0.0308, -0.0273,  0.0771,
          0.1722, -0.1497],
        [-0.0592, -0.1202,  0.2476, -0.2877,  0.0412,  0.0560, -0.0835,  0.1472,
          0.0921, -0.1167]], grad_fn=<AddmmBackward>)

In [5]:
class Mysequential(nn.Module):
    '''
    Implementation of sequential class
    '''
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    def forward(self,X):
        for block in self._modules.values():
            X = block(X)
        return X

In [6]:
myseq = Mysequential(nn.Linear(20,256), nn.ReLU(),nn.Linear(256,10))
myseq(X)

tensor([[ 0.0476,  0.1536,  0.1632,  0.3385, -0.0834, -0.0804, -0.2574,  0.2139,
         -0.1390,  0.2277],
        [-0.0166,  0.1269,  0.1481,  0.3419,  0.0086, -0.0574, -0.1453,  0.1242,
          0.0099,  0.3133]], grad_fn=<AddmmBackward>)

In [7]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` and `mm`
        # functions
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(-0.1855, grad_fn=<SumBackward0>)

## 5.2 Parameter Managemnt

In [8]:
X = torch.rand(2,4)
print(X)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
net(X)

tensor([[0.8248, 0.2350, 0.0836, 0.9077],
        [0.4899, 0.3617, 0.5442, 0.3765]])


tensor([[-0.1381],
        [-0.1642]], grad_fn=<AddmmBackward>)

In [9]:
print(net[2].state_dict())
print(net[1].state_dict())

OrderedDict([('weight', tensor([[-0.3511,  0.2567,  0.2352,  0.1210, -0.0555,  0.2756, -0.1982,  0.2760]])), ('bias', tensor([-0.0047]))])
OrderedDict()


In [10]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0047], requires_grad=True)
tensor([-0.0047])


In [11]:
net[2].weight.grad == None

True

In [12]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [13]:
net.state_dict()['2.bias'].data

tensor([-0.0047])

In [None]:
def block1():
    