### 5.7 Exercise
Redefine the model to be w2 * t_u ** 2 + w1 * t_u + b.
1. What parts of the training loop, and so on, need to change to accommodate this redefinition?
2. What parts are agnostic to swapping out the model?
3. Is the resulting loss higher or lower after training?
4. Is the actual result better or worse?

In [11]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [12]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [13]:
def model(t_u, w1, w2, b):
    return (w2 * t_u ** 2) + (w1 * t_u) + b

In [14]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [15]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'lr_scheduler']

In [16]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [17]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

params

tensor([-2.6181e+00, -2.3049e+02, -5.9642e-02], requires_grad=True)

In [18]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad() # <1>
loss.backward()
optimizer.step()

params

tensor([ 9.9720e-01,  9.8260e-01, -4.8176e-04], requires_grad=True)

In [21]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params) 
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 5000 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [22]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate) # <1>

training_loop(
    n_epochs = 50000, 
    optimizer = optimizer,
    params = params, # <1> 
    t_u = t_un,
    t_c = t_c)

Epoch 5000, Loss 10.704610
Epoch 10000, Loss 8.639528
Epoch 15000, Loss 7.169393
Epoch 20000, Loss 6.122478
Epoch 25000, Loss 5.376626
Epoch 30000, Loss 4.844937
Epoch 35000, Loss 4.465601
Epoch 40000, Loss 4.194641
Epoch 45000, Loss 4.000774
Epoch 50000, Loss 3.861749


tensor([-0.8879,  0.5570, -0.8753], requires_grad=True)

In [23]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([ 7, 10,  5,  8,  1,  9,  3,  2,  4]), tensor([6, 0]))

In [24]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [25]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # <2>
        optimizer.step()

        if epoch <= 3 or epoch % 5000 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [27]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 300000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = val_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 782.4936, Validation loss 195.6477
Epoch 2, Training loss 739.9783, Validation loss 188.4566
Epoch 3, Training loss 699.8079, Validation loss 181.5967
Epoch 5000, Training loss 9.7319, Validation loss 18.2807
Epoch 10000, Training loss 8.2674, Validation loss 14.7246
Epoch 15000, Training loss 7.1632, Validation loss 12.0005
Epoch 20000, Training loss 6.3302, Validation loss 9.9092
Epoch 25000, Training loss 5.7016, Validation loss 8.2998
Epoch 30000, Training loss 5.2268, Validation loss 7.0579
Epoch 35000, Training loss 4.8678, Validation loss 6.0968
Epoch 40000, Training loss 4.5961, Validation loss 5.3507
Epoch 45000, Training loss 4.3901, Validation loss 4.7695
Epoch 50000, Training loss 4.2336, Validation loss 4.3150
Epoch 55000, Training loss 4.1144, Validation loss 3.9583
Epoch 60000, Training loss 4.0232, Validation loss 3.6770
Epoch 65000, Training loss 3.9531, Validation loss 3.4543
Epoch 70000, Training loss 3.8990, Validation loss 3.2772
Epoch 75000,

tensor([-0.9431,  0.5893, -1.7604], requires_grad=True)