## Backprop demo on a single unit linear MLP
This is a demonstration of how backpropagation works on a single unit MLP

In [96]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

A single neuron/unit MLP with linear activation

In [97]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

Build a network. Model a function: y = 2*x + 1

Perform supervised learning using the following dataset:


| Step |  x   |  y  |
| :-- | ---: | ---:|
|  0   | 0.   |  1. |
|  1   | 1.   |  3. |

At each step, perform backprop and print the gradients.

In [107]:
net = Net()

# for ease of backprop demo, we set weight=1.0 and bias=0.0
net.fc1.bias = torch.nn.Parameter(torch.tensor([0.]))
net.fc1.weight = torch.nn.Parameter(torch.tensor([[1.]]))
print("---------------------0-----------------------")
print("0. Bias grad: ", net.fc1.bias.grad)
print("0. Weights grad: ", net.fc1.weight.grad)
print("0. Bias: ", net.fc1.bias[0])
print("0. Weights: ", net.fc1.weight[0])

# x=0, y=1.
input = torch.tensor([0.])
print("0. Input: ", input)
input.unsqueeze(0)
output = net(input)
print("0. Predicted Output: ", output)
target = torch.tensor([1.])
target = target.view(1, -1)

# Use MSE Loss
criterion = nn.MSELoss()
loss = criterion(output, target)
print("0. Loss: ", loss)

# Use SGD optimizer with learning rate of 0.1
optimizer = optim.SGD(net.parameters(), lr=0.1)
# Clear optimizer gradient
optimizer.zero_grad()
# Perform backprop
loss.backward(retain_graph=True)
# Update weight and bias
optimizer.step()
print("\n---------------------1-----------------------")
print("1. Bias grad: ", net.fc1.bias.grad)
print("1. Weights grad: ",net.fc1.weight.grad)
print("1. Bias: ", net.fc1.bias[0])
print("1. Weights: ",net.fc1.weight[0])

# x=1.0, y=3.0
input = torch.tensor([1.])
print("1. Input: ", input)
input.unsqueeze(0)
output = net(input)
print("1. Predicted Output: ", output)
target = torch.tensor([3.])
target = target.view(1, -1)

optimizer.zero_grad()
loss = criterion(output, target)
print("1. Loss: ", loss)
loss.backward()
optimizer.step()
print("\n---------------------2-----------------------")
print("2. Bias grad: ", net.fc1.bias.grad)
print("2. Weights grad: ",net.fc1.weight.grad)
print("2. Bias: ", net.fc1.bias[0])
print("2. Weights: ",net.fc1.weight[0])

---------------------0-----------------------
0. Bias grad:  None
0. Weights grad:  None
0. Bias:  tensor(0., grad_fn=<SelectBackward>)
0. Weights:  tensor([1.], grad_fn=<SelectBackward>)
0. Input:  tensor([0.])
0. Predicted Output:  tensor([0.], grad_fn=<AddBackward0>)
0. Loss:  tensor(1., grad_fn=<MseLossBackward>)

---------------------1-----------------------
1. Bias grad:  tensor([-2.])
1. Weights grad:  tensor([[0.]])
1. Bias:  tensor(0.2000, grad_fn=<SelectBackward>)
1. Weights:  tensor([1.], grad_fn=<SelectBackward>)
1. Input:  tensor([1.])
1. Predicted Output:  tensor([1.2000], grad_fn=<AddBackward0>)
1. Loss:  tensor(3.2400, grad_fn=<MseLossBackward>)

---------------------2-----------------------
2. Bias grad:  tensor([-3.6000])
2. Weights grad:  tensor([[-3.6000]])
2. Bias:  tensor(0.5600, grad_fn=<SelectBackward>)
2. Weights:  tensor([1.3600], grad_fn=<SelectBackward>)
