In [17]:
import torch
import torch.nn as nn

x = torch.rand(1, requires_grad=True)
y = torch.rand(1)
y.requires_grad = True
loss = y - x

In [18]:
loss.backward()
print(x.grad, y.grad)

tensor([-1.]) tensor([1.])


In [19]:
x = torch.ones(4)
y = torch.zeros(3)
w = torch.rand(4, 3, requires_grad=True)
b = torch.rand(3, requires_grad=True)
z = torch.matmul(x, w) + b
print(w, b, z)

tensor([[0.6824, 0.2366, 0.8142],
        [0.6178, 0.1740, 0.1260],
        [0.2713, 0.0818, 0.8764],
        [0.4297, 0.1040, 0.6553]], requires_grad=True) tensor([0.4725, 0.5997, 0.6947], requires_grad=True) tensor([2.4737, 1.1961, 3.1666], grad_fn=<AddBackward0>)


In [20]:
import torch.nn.functional as F

loss = F.mse_loss(z, y)
loss.backward()
print(loss, w.grad, b.grad)

tensor(5.8592, grad_fn=<MseLossBackward0>) tensor([[1.6492, 0.7974, 2.1111],
        [1.6492, 0.7974, 2.1111],
        [1.6492, 0.7974, 2.1111],
        [1.6492, 0.7974, 2.1111]]) tensor([1.6492, 0.7974, 2.1111])


In [21]:
threshold = 0.1
learning_rate = 0.1
iteration_num = 0

while loss > threshold:
    iteration_num += 1
    w = w - learning_rate * w.grad
    b = b - learning_rate * b.grad    
    print (iteration_num, loss, z, y)

    w.detach_().requires_grad_(True)
    b.detach_().requires_grad_(True)
    
    z = torch.matmul(x, w) + b
    loss = F.mse_loss(z, y)
    loss.backward()

print (iteration_num + 1, loss, z, y)

1 tensor(5.8592, grad_fn=<MseLossBackward0>) tensor([2.4737, 1.1961, 3.1666], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
2 tensor(2.6041, grad_fn=<MseLossBackward0>) tensor([1.6492, 0.7974, 2.1111], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
3 tensor(1.1574, grad_fn=<MseLossBackward0>) tensor([1.0994, 0.5316, 1.4074], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
4 tensor(0.5144, grad_fn=<MseLossBackward0>) tensor([0.7330, 0.3544, 0.9383], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
5 tensor(0.2286, grad_fn=<MseLossBackward0>) tensor([0.4886, 0.2363, 0.6255], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
6 tensor(0.1016, grad_fn=<MseLossBackward0>) tensor([0.3258, 0.1575, 0.4170], grad_fn=<AddBackward0>) tensor([0., 0., 0.])
7 tensor(0.0452, grad_fn=<MseLossBackward0>) tensor([0.2172, 0.1050, 0.2780], grad_fn=<AddBackward0>) tensor([0., 0., 0.])


In [22]:
w = torch.tensor(4.0, requires_grad=True)

z = 2 * w
z.backward()
print(w.grad)

z = 2 * w
z.backward()
print(w.grad)

z = 2 * w
z.backward()
print(w.grad)

tensor(2.)
tensor(4.)
tensor(6.)


In [26]:
class Model(nn.Module):
    def __init__(self, input_dim, ouput_dim):
        super().__init__()
        self.linear = nn.Linear(in_features=input_dim, out_features=ouput_dim)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        return self.activation(self.linear(x))

In [27]:
x = torch.ones(4)
y = torch.zeros(3)
model = Model(4, 3)
loss_function = nn.MSELoss()

In [30]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs + 1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()    

In [31]:
print(loss)
for param in model.parameters():
    print(param)

tensor(0.0204, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.7898, -0.3002, -0.5082, -0.1300],
        [-0.3001, -0.3746, -0.5472, -0.2440],
        [-0.0799, -0.0414, -0.7197, -0.3134]], requires_grad=True)
Parameter containing:
tensor([ 0.0252, -0.4637, -0.6081], requires_grad=True)
