In [1]:
import torch
import numpy as np

#### Building a Perceptron Manually

In [3]:
# Hypothesis Function --> h = w ^ T * x
x = torch.tensor([1., 4., -2.])
y = torch.tensor([1.])
w = torch.tensor([1., 1., 1.], requires_grad = True)    # computing the derivative with respect to w --> set requires_grad to True
h = torch.matmul(w, x)                                  # computing the dot product (output) --> Step 1: Forward Pass
print(f'Forward Pass, h = {h}')

J = (h - y) ** 2                                        # Squared Loss
J.backward()
print(f'Backward Pass, Derivatives = {w.grad}')         # Computing the derivatives --> Step 2: Backward Pass
w = w - 0.5 * w.grad                                    # Step 3 --> Update the weights of w by old vals of w so that we are better able to estimate y
                                                        # alpha is 0.5 here
print(f'Weights Update, w = {w}')

Forward Pass, h = 3.0
Backward Pass, Derivatives = tensor([ 4., 16., -8.])
Weights Update, w = tensor([-1., -7.,  5.], grad_fn=<SubBackward0>)


#### Building a Perceptron Using PyTorch

In [13]:
from torch import nn
from torch import optim                                 # used to compute the derivative

In [5]:
x = torch.tensor([[4., -2.]])                           # one sample (4, -2 are the inputs)
x.shape

torch.Size([1, 2])

In [10]:
y = torch.tensor([[1.]])
h = nn.Linear(2, 1, bias = True)                        # h = wx + b
                                                        # input dimension = 2 (2 features), output dimension = 1
h.weight = nn.Parameter(torch.tensor([[1., 1.]]))
h.bias = nn.Parameter(torch.tensor([[1.]]))

In [11]:
h.bias

Parameter containing:
tensor([[1.]], requires_grad=True)

In [14]:
optimizer = optim.SGD(h.parameters(), lr = 0.5)           # need to optimize all the parameters of the model h
cost =  nn.MSELoss()                                      # specify the costs (manually, done through J, here done through mean square loss error)

# Now do all 3 steps

# One Iteration of the Backpropagation Algorithm
optimizer.zero_grad()                                     # at every iteration, this needs to be there (reseting)
out = h(x)
print(f'Forward Pass, h = {h}')
loss = cost(out, y)
loss.backward()
print(f'Backward Pass, Derivatives = {h.bias.grad} {h.weight.grad}')
optimizer.step()
print(f'Weights Update, w = {h.bias.detach().numpy()} {h.weight.detach().numpy()}')     # detaching because it lives on the GPU, to use it, bring it to your CPU     
                                                                                        # no need to attach again if you want to use it

Forward Pass, h = Linear(in_features=2, out_features=1, bias=True)
Backward Pass, Derivatives = tensor([[4.]]) tensor([[16., -8.]])
Weights Update, w = [[-1.]] [[-7.  5.]]
