In [1]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim
torch.set_printoptions(edgeitems=2)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0,
                    3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [5]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

In [6]:
params.grad is None

True

In [7]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

In [8]:
params.grad

tensor([4517.2969,   82.6000])

In [9]:
if params.grad is not None:
    params.grad.zero_()

In [10]:
params.grad is None

False

In [11]:
def training_loop(n_epochs, learning_rate, params,
                 t_u, t_c):
    '''Conducts the PyTorch training loop'''
    for epoch in range(1, n_epochs+1):
        if params.grad is not None:
            params.grad.zero_()
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()
        
        with torch.no_grad():
            params -= learning_rate * params.grad
        
        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

In [12]:
training_loop(
    n_epochs=5000,
    learning_rate=1e-2,
    params = torch.tensor([1.0, 0.0], requires_grad=True),
    t_u = t_un,
    t_c = t_c
)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [13]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [14]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [15]:
t_p = model(t_u,*params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

In [16]:
#we did not zero out the gradients above, so we will do so here
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad()
loss.backward()
optimizer.step()

params

tensor([1.7761, 0.1064], requires_grad=True)

In [17]:
def training_loop(n_epochs, optimizer, params,
                 t_u, t_c):
    '''Conducts the PyTorch training loop'''
    for epoch in range(1, n_epochs+1):
        #if params.grad is not None:
        #    params.grad.zero_()
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

In [18]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    params=params,
    t_u=t_un,
    t_c=t_c
)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [19]:
#let's replace the optimizer with ADAM
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_loop(
    n_epochs=2000,
    optimizer=optimizer,
    params=params,
    t_u = t_u,
    t_c = t_c
)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928579
Epoch 2000, Loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)

In [20]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 9,  2,  1,  6,  4,  3,  7, 10,  0]), tensor([5, 8]))

In [21]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [22]:
def training_loop(n_epochs, optimizer, params,
                 train_t_u, val_t_u, train_t_c, val_t_c):
    '''Conducts the PyTorch training loop'''
    for epoch in range(1, n_epochs+1):
        #if params.grad is not None:
        #    params.grad.zero_()
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)
        
        val_t_p = model(val_t_u, *params)
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch <= 3 or epoch % 500 == 0:
            print(f'Epoch (epoch), Training Loss {train_loss.item():.4f}',
                 f'Validation loss {val_loss.item():.4f}')
    return params

In [23]:
params = torch.tensor([0.0, 1.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params,
    train_t_u = train_t_un,
    val_t_u = val_t_un,
    train_t_c = train_t_c,
    val_t_c=val_t_c)



Epoch (epoch), Training Loss 196.3611 Validation loss 37.0000
Epoch (epoch), Training Loss 57.2173 Validation loss 2.0759
Epoch (epoch), Training Loss 38.2301 Validation loss 14.3776
Epoch (epoch), Training Loss 7.0996 Validation loss 11.9447
Epoch (epoch), Training Loss 3.1487 Validation loss 7.7541
Epoch (epoch), Training Loss 2.6022 Validation loss 6.4424
Epoch (epoch), Training Loss 2.5266 Validation loss 5.9887
Epoch (epoch), Training Loss 2.5162 Validation loss 5.8247
Epoch (epoch), Training Loss 2.5147 Validation loss 5.7643


tensor([  5.3084, -16.5866], requires_grad=True)

In [24]:
def training_loop(n_epochs, optimizer, params,
                 train_t_u, val_t_u, train_t_c, val_t_c):
    '''Conducts the PyTorch training loop'''
    for epoch in range(1, n_epochs+1):
        #if params.grad is not None:
        #    params.grad.zero_()
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)
        
        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch <= 3 or epoch % 500 == 0:
            print(f'Epoch (epoch), Training Loss {train_loss.item():.4f}',
                 f'Validation loss {val_loss.item():.4f}')
    return params

In [25]:
params = torch.tensor([0.0, 1.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params,
    train_t_u = train_t_un,
    val_t_u = val_t_un,
    train_t_c = train_t_c,
    val_t_c=val_t_c)




Epoch (epoch), Training Loss 196.3611 Validation loss 37.0000
Epoch (epoch), Training Loss 57.2173 Validation loss 2.0759
Epoch (epoch), Training Loss 38.2301 Validation loss 14.3776
Epoch (epoch), Training Loss 7.0996 Validation loss 11.9447
Epoch (epoch), Training Loss 3.1487 Validation loss 7.7541
Epoch (epoch), Training Loss 2.6022 Validation loss 6.4424
Epoch (epoch), Training Loss 2.5266 Validation loss 5.9887
Epoch (epoch), Training Loss 2.5162 Validation loss 5.8247
Epoch (epoch), Training Loss 2.5147 Validation loss 5.7643


tensor([  5.3084, -16.5866], requires_grad=True)

In [26]:
def calc_forward(t_u, t_c, is_train):
    ...