PyTorch Model Design Steps
* designing model(input size, outputsize, forward pass) ==> construction of loss and optmizer ==> training loop

In [1]:
import torch
import torch.nn as nn 

In [12]:
#example: f=2*x: 
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)
#X and Y has to be 2d
test_sample = torch.tensor([5], dtype=torch.float32)
n_samples, n_features = X.shape

In [13]:
input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)
#getting predictions
print (f'Prediction before training: f() = {model(test_sample).item() :.3f}')

Prediction before training: f() = 1.008


Computing the gradients manually:
Note that the formula for the mean squared error is: 
$ J = 1/n * (wx -y)^2$
Numerical competition of the gradient of w is therefore given by:
$ dJ/dw = 1/N*2x(wx-y)$ *note x and y are constants

In [14]:
#implementing the training
lr = 0.01
n_iters = 100
#training loop
loss = nn.MSELoss() #using Pytorch's implementation of mse
optimizer = torch.optim.SGD(model.parameters(), lr=lr) #optimizer to optimize and update our weights
for epoch in range(n_iters):
    #prediction which is the forward pass
    y_prediction = model(X)
    l = loss(y, y_prediction)
    #gradients
    l.backward() #dl/dw
    #dw = w.grad
    #updating our weights:: shouldn't part of computation graph
    optimizer.step() #performing an optimization step
    #emptying our gradients to avoid accumulation
    optimizer.zero_grad()
    if epoch%10 == 0:
        [w, b] = model.parameters() #will retun list of list
        print(f'epoch {epoch+1}: w={w[0][0].item():.3f}, loss={l:.8f}')
print (f'Prediction after training: f() = {model(test_sample).item() :.3f}')

epoch 1: w=0.596, loss=26.38856125
epoch 11: w=1.772, loss=0.68274188
epoch 21: w=1.962, loss=0.01767081
epoch 31: w=1.992, loss=0.00046340
epoch 41: w=1.997, loss=0.00001784
epoch 51: w=1.998, loss=0.00000597
epoch 61: w=1.998, loss=0.00000535
epoch 71: w=1.998, loss=0.00000503
epoch 81: w=1.998, loss=0.00000473
epoch 91: w=1.998, loss=0.00000446
Prediction after training: f() = 9.996


In [17]:
#custom linear regression model
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        #defining the layers
        self.lin = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.lin(x)
model = LinearRegression(input_size, output_size)

In [18]:
#implementing the training
lr = 0.01
n_iters = 100
#training loop
loss = nn.MSELoss() #using Pytorch's implementation of mse
optimizer = torch.optim.SGD(model.parameters(), lr=lr) #optimizer to optimize and update our weights
for epoch in range(n_iters):
    #prediction which is the forward pass
    y_prediction = model(X)
    l = loss(y, y_prediction)
    #gradients
    l.backward() #dl/dw
    #dw = w.grad
    #updating our weights:: shouldn't part of computation graph
    optimizer.step() #performing an optimization step
    #emptying our gradients to avoid accumulation
    optimizer.zero_grad()
    if epoch%10 == 0:
        [w, b] = model.parameters() #will retun list of list
        print(f'epoch {epoch+1}: w={w[0][0].item():.3f}, loss={l:.8f}')
print (f'Prediction after training: f() = {model(test_sample).item() :.3f}')

epoch 1: w=0.366, loss=22.45846939
epoch 11: w=1.458, loss=0.73970139
epoch 21: w=1.641, loss=0.16855136
epoch 31: w=1.679, loss=0.14507772
epoch 41: w=1.693, loss=0.13627990
epoch 51: w=1.703, loss=0.12833849
epoch 61: w=1.711, loss=0.12086832
epoch 71: w=1.720, loss=0.11383314
epoch 81: w=1.728, loss=0.10720748
epoch 91: w=1.736, loss=0.10096740
Prediction after training: f() = 9.471
