In [1]:
import torch

In [3]:
class NeuralNetwork(torch.nn.Module):
    # parameterize num of inputs & outputs to reuse same code for diff datasets with diff num of features and classes
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(

            # first hidden layer
            # Linear layer takes num of input and output nodes as args
            torch.nn.Linear(num_inputs, 30),
            # Nonlinear activation functions are placed bw hidden layers
            torch.nn.ReLU(),

            # second hidden layer
            # The num of output nodes in prev hidden layer is equal to input of next hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )
    
    def forward(self, x):
        logits = self.layers(x)
        # the output of the last layer are called logits
        return logits

In [4]:
model = NeuralNetwork(50, 3)

In [5]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [6]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters: ", num_params)

Total number of trainable model parameters:  2213


In [7]:
# access weight parameter matrix from first Linear layer
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0392, -0.0775, -0.0300,  ...,  0.1131,  0.1076, -0.0769],
        [-0.0138,  0.0322, -0.0596,  ...,  0.0396, -0.0206,  0.1047],
        [-0.1298,  0.0083, -0.0049,  ..., -0.0868,  0.0101,  0.0912],
        ...,
        [ 0.0066,  0.0942, -0.0337,  ..., -0.0089,  0.1274, -0.0954],
        [-0.0613, -0.1061,  0.0324,  ..., -0.1378,  0.0567, -0.1304],
        [ 0.0922, -0.1069,  0.0259,  ...,  0.0429, -0.1194, -0.1237]],
       requires_grad=True)


In [None]:
# shape of weight parameter matrix from first Linear layer
print(model.layers[0].weight.shape)

torch.Size([30, 50])


In [9]:
# access bias parameter matrix from first Linear layer
print(model.layers[0].bias)

Parameter containing:
tensor([-0.0327,  0.0788,  0.0992,  0.1292, -0.1204,  0.0609,  0.1022, -0.0856,
        -0.1064,  0.0502, -0.0154, -0.1026, -0.1351, -0.0687,  0.0022,  0.1350,
        -0.0332, -0.0419, -0.0697, -0.0032,  0.1289,  0.1124, -0.0852, -0.0603,
         0.0642,  0.0202,  0.0840,  0.0076, -0.0215,  0.1272],
       requires_grad=True)


In [None]:
# shape of bias parameter matrix from first Linear layer
print(model.layers[0].bias.shape)

torch.Size([30])
