# PyTorch Models

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

Get Device for Training

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


Define the Class

We define our neural network by subclassing nn.Module, and initialize the neural network layers in __init__. 
Every nn.Module subclass implements the operations on input data in the forward method.

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        out = self.model(x)
        return out

We create an instance of NeuralNetwork, and move it to the device, and print its structure.

In [None]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): ReLU()
    (5): Linear(in_features=512, out_features=10, bias=True)
    (6): ReLU()
  )
)


Calling the model on the input returns a 10-dimensional tensor with raw predicted values for each class. We get the prediction probabilities by passing it through an instance of the nn.Softmax module.

In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([9])


Model Layers

In [None]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


nn.Flatten

In [None]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


nn.Linear

In [21]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)

print(hidden1.size())
print(layer1.weight)

torch.Size([3, 20])
Parameter containing:
tensor([[ 0.0057, -0.0096, -0.0044,  ...,  0.0207, -0.0173,  0.0329],
        [ 0.0263, -0.0323,  0.0053,  ..., -0.0314,  0.0171,  0.0270],
        [-0.0004, -0.0221,  0.0190,  ...,  0.0120,  0.0343, -0.0274],
        ...,
        [-0.0332, -0.0055, -0.0231,  ..., -0.0352,  0.0320,  0.0060],
        [ 0.0163,  0.0067, -0.0313,  ..., -0.0129, -0.0212, -0.0256],
        [-0.0205,  0.0064,  0.0340,  ..., -0.0138,  0.0106, -0.0008]],
       requires_grad=True)


Linear Initialization

In [22]:
nn.init.xavier_uniform_(layer1.weight)

Parameter containing:
tensor([[ 6.8406e-02,  7.8624e-02, -3.9066e-03,  ...,  2.4580e-02,
          5.0176e-02, -6.7120e-02],
        [ 1.0522e-02,  5.4147e-05, -3.2445e-02,  ..., -3.2423e-02,
          1.3482e-02, -1.3048e-02],
        [-7.5344e-02, -5.0828e-02, -7.6298e-02,  ...,  3.0200e-02,
         -6.2669e-02, -6.7936e-02],
        ...,
        [-8.4119e-02, -1.9566e-03,  7.7000e-02,  ..., -4.7244e-02,
         -7.0805e-03, -7.4807e-03],
        [ 3.1301e-02,  2.3326e-02, -6.7337e-02,  ...,  6.8971e-03,
          4.6559e-02,  8.0729e-02],
        [-6.0988e-02,  3.2871e-02,  7.6366e-02,  ...,  8.0971e-02,
          1.7221e-02,  4.5636e-02]], requires_grad=True)

nn.ReLU

In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.6997, -0.9624, -0.6161, -0.1603,  0.0869,  0.5333,  0.1063, -0.0837,
          0.0817,  0.0978,  0.1040, -0.4412, -0.0218,  0.4277,  0.5699,  0.0344,
         -0.3595,  0.0093,  0.2188, -0.2606],
        [-0.6647, -0.9542, -0.1727, -0.1723,  0.1687,  0.3513,  0.1463, -0.0413,
          0.0237, -0.1532, -0.1458, -0.7049, -0.0215,  0.0649,  0.3023,  0.0640,
         -0.0970,  0.0933,  0.4213,  0.0547],
        [-0.4028, -0.4902, -0.5802, -0.0746, -0.2058,  0.5985, -0.3594, -0.2759,
         -0.0299,  0.1895,  0.2191, -0.3780,  0.2269, -0.0784,  0.5282,  0.1934,
         -0.2905, -0.3756,  0.3082, -0.0226]], grad_fn=<AddmmBackward>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0869, 0.5333, 0.1063, 0.0000, 0.0817,
         0.0978, 0.1040, 0.0000, 0.0000, 0.4277, 0.5699, 0.0344, 0.0000, 0.0093,
         0.2188, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.1687, 0.3513, 0.1463, 0.0000, 0.0237,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0649, 0.302

nn.Sequential

In [23]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)

input_image = torch.rand(3,28,28)
print(input_image)

logits = seq_modules(input_image)
print(logits)

tensor([[[0.0135, 0.0152, 0.2819,  ..., 0.1375, 0.3881, 0.0883],
         [0.6229, 0.0647, 0.1520,  ..., 0.5410, 0.0336, 0.2606],
         [0.9148, 0.5572, 0.9916,  ..., 0.1814, 0.8677, 0.5344],
         ...,
         [0.9080, 0.0680, 0.3945,  ..., 0.1684, 0.2955, 0.0381],
         [0.4827, 0.2420, 0.4573,  ..., 0.1136, 0.4132, 0.5936],
         [0.8703, 0.3715, 0.4160,  ..., 0.4397, 0.8300, 0.2034]],

        [[0.3092, 0.6768, 0.6481,  ..., 0.3904, 0.4691, 0.2271],
         [0.4452, 0.6845, 0.6677,  ..., 0.7008, 0.9842, 0.8984],
         [0.1827, 0.2682, 0.1178,  ..., 0.8572, 0.7105, 0.8653],
         ...,
         [0.5357, 0.6848, 0.9471,  ..., 0.1511, 0.5416, 0.1522],
         [0.8901, 0.9187, 0.8521,  ..., 0.6686, 0.5996, 0.1544],
         [0.1877, 0.8754, 0.8909,  ..., 0.4464, 0.9585, 0.9919]],

        [[0.4783, 0.6017, 0.3565,  ..., 0.8839, 0.4042, 0.3392],
         [0.2071, 0.2087, 0.9456,  ..., 0.1241, 0.6113, 0.5017],
         [0.1826, 0.5815, 0.8130,  ..., 0.3252, 0.1493, 0.

nn.Softmax

In [None]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)

print(pred_prob)
print(pred_prob.sum(dim=1))

tensor([[0.1155, 0.1211, 0.1018, 0.0842, 0.1406, 0.0654, 0.0929, 0.0943, 0.1048,
         0.0793],
        [0.0981, 0.1086, 0.1020, 0.0825, 0.1542, 0.0698, 0.0958, 0.1096, 0.1137,
         0.0657],
        [0.0940, 0.1057, 0.1074, 0.0829, 0.1460, 0.0744, 0.0976, 0.1116, 0.1126,
         0.0677]], grad_fn=<SoftmaxBackward>)
tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward1>)


Model Parameters

In [None]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): ReLU()
    (5): Linear(in_features=512, out_features=10, bias=True)
    (6): ReLU()
  )
) 


Layer: model.1.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0156, -0.0256,  0.0296,  ..., -0.0193, -0.0341,  0.0056],
        [ 0.0328,  0.0334,  0.0167,  ...,  0.0046, -0.0063,  0.0347]],
       grad_fn=<SliceBackward>) 

Layer: model.1.bias | Size: torch.Size([512]) | Values : tensor([-0.0340,  0.0351], grad_fn=<SliceBackward>) 

Layer: model.3.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0344,  0.0136, -0.0120,  ..., -0.0302, -0.0264, -0.0325],
        [ 0.0397,  0.0277,  0.0132,  ..., -0.0196,  0.0170,  0.0262]],
       grad_fn=<SliceBackward>) 

Layer: model.3.bias | Size: torch.Size([512]) | Values : tensor([-0.031