# Building Models with PyTorch

torch.nn.Module and torch.nn.Parameter

In [3]:
import torch

class TinyModel(torch.nn.Module):
    
    def __init__(self):
        super(TinyModel, self).__init__()
        
        self.linear1 = torch.nn.Linear(100, 200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200, 10)
        self.softmax = torch.nn.Softmax()
        
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x
    
tinymodel = TinyModel()

print('The Model: ')
print(tinymodel)

print('\n\nJust one layer: ')
print(tinymodel.linear2)

print('\n\nModel parameters: ')
for param in  tinymodel.parameters():
    print(param)
    
print('\n\nLayer params: ')
for param in tinymodel.linear2.parameters():
    print(param)

The Model: 
TinyModel(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)


Just one layer: 
Linear(in_features=200, out_features=10, bias=True)


Model parameters: 
Parameter containing:
tensor([[-6.8152e-02, -4.9489e-02, -8.2287e-02,  ...,  2.8960e-02,
          1.4670e-02,  8.8196e-02],
        [-2.8234e-02, -6.3169e-05, -3.1043e-02,  ..., -9.3712e-02,
          5.0704e-02,  6.6511e-02],
        [-6.9824e-02,  8.2345e-02,  3.6071e-02,  ..., -3.8940e-02,
         -7.9457e-02,  6.8927e-02],
        ...,
        [ 4.5794e-02,  1.1396e-02,  4.6661e-02,  ...,  3.1534e-02,
         -4.7502e-02,  6.9681e-02],
        [-2.2402e-02, -6.4405e-02, -1.8062e-02,  ..., -7.8268e-02,
          9.5082e-02,  6.2663e-03],
        [-4.8274e-02,  1.5618e-02, -9.9394e-02,  ...,  5.3213e-02,
          8.4867e-02, -3.6768e-04]], requires_grad=True)
Parameter containing:
te

Common Layer Types

Most basic type of neural network is linear or fully connected layer.

In [4]:
lin = torch.nn.Linear(3, 2)
x = torch.rand(1, 3)
print('Input: ')
print(x)

print('\n\nWeight and Bias parameters:')
for param in lin.parameters():
    print(param)
    
y = lin(x)
print('\n\nOutput: ')
print(y)

Input: 
tensor([[0.8771, 0.8899, 0.2731]])


Weight and Bias parameters:
Parameter containing:
tensor([[ 0.2607, -0.5673, -0.0860],
        [ 0.4644, -0.3599,  0.1655]], requires_grad=True)
Parameter containing:
tensor([-0.0286, -0.3336], requires_grad=True)


Output: 
tensor([[-0.3283, -0.2014]], grad_fn=<AddmmBackward0>)


If we multiply 'x' by the linear layer's weights, and add the biases, we will get the output 'y'

Convolutional Layers(CNN)

In [5]:
import torch.functional as F

class LeNet(torch.nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel (black n white), 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = torch.nn.Conv2d(1, 6, 5)  # (no of input channels, no of output features, window/kernel size)
        # output tensor to con1 gives us 6x28x28. 6 features with 28x28 height n width of map
        self.conv2 = torch.nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = torch.nn.Linear(16 * 6 * 6, 120) # 6 * 6 from image resolution
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)
        
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is square, we can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = F.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except batch domension
        num_features = 1
        for s in size:
            num_features *= s
            
        return num_features
        

Recurrent Layers (RNN)

RNN are used for sequential data, from time series to the DNA nucleotides. An RNN does this by maintaining a hidden state that acts as a srt of memory for what is has been seen in hte sequence so far

In [6]:
class LSTMTagger(torch.nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)
        
        # LSTM takes work embeddings as inputs and outputs hidden states with dimensionality hidden_dim
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)
        
        # the linear layer that maps from hidden state space to tag space
        self.hidden2tag = torch.nn.Linear(hidden_dim, target_size)
        
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores
    
    

Constructor has four arguments:
    1. vocab_size : no of words in input vocabulary. Each word is one-hot vector
    2. tagset_size : no of tags in the output set
    3. embedding_dim : size of embedding space for the vocabulary
    4. hidden_dim : the size of the LSTM's memory

Data Manipulation Layers

Max pooling reduce a tensor by combining cells, an dassinging the maximumvalue of the input cells to the output cell. This works as a layer that perform important function, but don't participate in larning process