In [1]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
torch.set_printoptions(edgeitems=2)

# t_c are temperatures in Celsius
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
# t_u are the unknown units
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])
# Add the extra dimension at axis 1
t_c = torch.tensor(t_c).unsqueeze(1) 
t_u = torch.tensor(t_u).unsqueeze(1) 

# separate training and validation data
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

class SubclassModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.hidden_linear = nn.Linear(1, 13)
        self.output_linear = nn.Linear(13, 1)
        
    def forward(self, input):
        hidden_t = self.hidden_linear(input)
        activated_t = torch.tanh(hidden_t)
        output_t = self.output_linear(activated_t)
        
        return output_t
    
def training_loop(n_epochs, optimizer, model, loss_fn, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        # forward propagation and calculate loss
        train_t_p = model(train_t_u) 
        train_loss = loss_fn(train_t_p, train_t_c)
        
        # all requires_grad args are forced to False inside this block
        with torch.no_grad(): 
            val_t_p = model(val_t_u)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False 
                
        # This could be done at any point in the loop prior to calling loss.backward()
        optimizer.zero_grad()
        # back propagation
        train_loss.backward()
        # gradient descent
        optimizer.step()

        # logging
        if epoch <= 3 or epoch % 500 == 0:
            print('Epoch {}, Training loss {}, Validation loss {}'.format(
                epoch, float(train_loss), float(val_loss)))

  del sys.path[0]
  


In [2]:
subclass_model = SubclassModel()
optimizer = optim.Adam(subclass_model.parameters(), lr=1e-2) 

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    model = subclass_model, 
    loss_fn = nn.MSELoss(),
    train_t_u = train_t_u, 
    val_t_u = val_t_u, 
    train_t_c = train_t_c,
    val_t_c = val_t_c) 

Epoch 1, Training loss 159.493896484375, Validation loss 301.6928405761719
Epoch 2, Training loss 157.00473022460938, Validation loss 296.9797668457031
Epoch 3, Training loss 154.5560760498047, Validation loss 292.30792236328125
Epoch 500, Training loss 9.453768730163574, Validation loss 4.634730815887451
Epoch 1000, Training loss 2.915877342224121, Validation loss 6.790077209472656
Epoch 1500, Training loss 1.636523962020874, Validation loss 10.700006484985352
Epoch 2000, Training loss 1.47880220413208, Validation loss 18.23255157470703
Epoch 2500, Training loss 1.4053064584732056, Validation loss 12.723899841308594
Epoch 3000, Training loss 1.3663643598556519, Validation loss 8.851823806762695
Epoch 3500, Training loss 1.3228051662445068, Validation loss 7.098437786102295
Epoch 4000, Training loss 1.2599670886993408, Validation loss 6.0246262550354
Epoch 4500, Training loss 1.2378054857254028, Validation loss 6.077049732208252
Epoch 5000, Training loss 1.2277344465255737, Validation 

In [3]:
subclass_model(val_t_u)

tensor([[16.1833],
        [22.5342]], grad_fn=<AddmmBackward>)

In [4]:
val_t_c

tensor([[13.],
        [21.]])

In [5]:
subclass_model.hidden_linear.weight

Parameter containing:
tensor([[-0.5497],
        [-0.7100],
        [-0.0995],
        [ 0.8792],
        [ 0.8614],
        [-0.8993],
        [ 0.4741],
        [-0.7448],
        [-0.8313],
        [-0.1044],
        [ 0.9547],
        [-0.8931],
        [-0.1994]], requires_grad=True)

In [6]:
subclass_model.hidden_linear.bias

Parameter containing:
tensor([ 0.3794, -0.1218,  6.6280,  0.8143,  0.5835, -0.6513, -0.6846, -0.8382,
         0.5822,  5.3046,  0.9968,  0.6348,  4.2853], requires_grad=True)

In [7]:
subclass_model.hidden_linear.weight.grad

tensor([[0.0000],
        [0.0000],
        [0.0298],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0531],
        [0.0000],
        [0.0000],
        [0.0161]])

In [8]:
subclass_model.output_linear.weight

Parameter containing:
tensor([[-0.7466, -0.9554, -7.0336,  1.0472,  0.5834, -0.8309,  0.7443, -0.7942,
         -0.8668, -6.6206,  0.8292, -0.9431, -5.7018]], requires_grad=True)