In [None]:
%matplotlib inline
from matplotlib import pyplot as plt

import torch
from torch import nn
from torch.nn import Parameter
from torch.autograd import Variable

## Optimization

In [None]:
x = Variable(torch.Tensor([[1.0, 1.4], 
                           [1.0, 3.1], 
                           [1.0, 3.6], 
                           [1.0, 4.2], 
                           [1.0, 6.1], 
                           [1.0, 8.0]]))
y = Variable(torch.Tensor([1.4, 3.1, 3.6, 4.2, 6.1, 8.0]))

noise = Variable(torch.randn(y.size()))
y.add_(0.1 * noise)  # let's add noise to make it more complicated :)

In [None]:
weights = Variable(torch.randn(2, 1), requires_grad=True)
weights2 = Variable(torch.Tensor([[0.], [1.]]))

In [None]:
# random weights
weights

In [None]:
# at some point we want them to become sth like
weights2

## Calculating functions each time

In [None]:
loss1 = []

for i in range(10000):

    net_output = x.mm(weights).t()
    loss = torch.mean((net_output - y)**2)
    loss.backward()
    
    weights.data.add_(-0.01 * weights.grad.data)
    weights.grad.data.zero_()
    # gradients are being added cumulatively, so they need zeroing after each update!
    
    loss1.append(loss.item())

In [None]:
x.mm(weights2).t()

In [None]:
# new, modified weights
weights

In [None]:
print("Minimal loss: {:.3f}".format(loss1[-1]))
plt.plot(range(len(loss1)), loss1)

## Module

It is very useful to create a network as a class.
Use `Parameter` instead of `Variable`. In this case it:

* by default computes gradient for it
* registers it as a trainable parameters for optimizer

In [None]:
class Model(nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        self.weights = Parameter(torch.zeros(2, 1))
    
    def forward(self, x):
        output = x.mm(self.weights)
        return output.view(-1)

In [None]:
model = Model()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

In [None]:
list(model.parameters())

In [None]:
import torch.nn.functional as F

In [None]:
loss2 = []
for i in range(1000):
    optimizer.zero_grad()
    outputs = model(x)
    
    loss = criterion(outputs, y)
    loss2.append(loss.item())
    loss.backward()        

    optimizer.step()

In [None]:
print("Minimal loss: {:.3f}".format(loss1[-1]))
plt.plot(range(len(loss2)), loss2)

In [None]:
list(model.parameters())

## Module - version with nn.Linear 

Some parameters can be directly used from `nn` library. It makes it slightly simpler than manually writing all expressions.


In [None]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        self.fc = nn.Linear(2, 1, bias=False)
    
    def forward(self, x):
        output = self.fc(x)
        return output.view(-1)

In [None]:
model = Model()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.04)

In [None]:
model

In [None]:
list(model.parameters())

In [None]:
loss3 = []
for i in range(1000):
    optimizer.zero_grad()
    outputs = model(x)
    
    loss = criterion(outputs, y)
    loss3.append(loss.item())
    loss.backward()        

    optimizer.step()

In [None]:
print("Minimal loss: {:.3f}".format(loss3[-1]))
plt.plot(range(len(loss3)), loss3)

In [None]:
outputs

# Sequential

For some simple models, we can just compose layers using `nn.Sequential`.
Sometimes it's convenient, but often only for sub-parts of the network.

In [None]:
model = nn.Sequential(
    nn.Linear(2, 1)
)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

In [None]:
model

In [None]:
loss4 = []
for i in range(1000):
    optimizer.zero_grad()
    outputs = model(x)
    
    loss = criterion(outputs, y.unsqueeze(-1)) 
    # depending on other operations sometimes we need to be careful
    # about shapes; sequueze/unsqueeze are common for PyTorch
    loss4.append(loss.item())
    loss.backward()        

    optimizer.step()

In [None]:
print("Minimal loss: {:.3f}".format(loss4[-1]))
plt.plot(range(len(loss4)), loss4)