# Computing the gradients of the loss with respect to trainable variables

In [2]:
import torch

In [4]:
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.5, requires_grad=True)

x = torch.tensor([1.4])
y = torch.tensor([2.1])

z = torch.add(torch.mul(x, w), b)

loss = (y - z).pow(2).sum()

loss.backward()
print('dL/dw : ', w.grad)
print('dL/db : ', b.grad)

dL/dw :  tensor(-0.5600)
dL/db :  tensor(-0.4000)


# Models based on nn.Sequential

In [5]:
import torch.nn as nn

Define a sequential model with two linear layers (`nn.Linear`) and two ReLU activation functions (`nn.ReLU`). The first layer takes 4 inputs and produces 16 outputs, while the second layer takes those 16 outputs and produces 32.

In [6]:
model = nn.Sequential(
    nn.Linear(4, 16),
    nn.ReLU(),
    nn.Linear(16, 32),
    nn.ReLU()
)

model

Sequential(
  (0): Linear(in_features=4, out_features=16, bias=True)
  (1): ReLU()
  (2): Linear(in_features=16, out_features=32, bias=True)
  (3): ReLU()
)

Configure the first fully connected layer by specifying the initial value distribution for the weight. Then, we will configure the second fully connected layer by computing the L1 penalty term for the weight matrix

In [7]:
nn.init.xavier_uniform_(model[0].weight)
l1_weight = 0.01
l1_penalty = l1_weight * model[2].weight.abs().sum()