In [1]:
%matplotlib inline
from argparse import Namespace
import numpy as np
import torch
from dask import delayed
#from dask.distributed import Client
torch.set_printoptions(edgeitems=2)

In [2]:
#client = Client(asynchronous=True)

In [3]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0,
                    3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [4]:
def model(t_u, w, b):
    return w * t_u + b

In [5]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

In [7]:
params.grad is None

True

In [8]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

params.grad

tensor([4517.2969,   82.6000])

In [9]:
if params.grad is not None:
    params.grad.zero_()

In [10]:
def training_loop(n_epochs, learning_rate, model, loss_fn, params, t_u, t_c):
    for epoch in range(1, n_epochs+1):
        if params.grad is not None:
            params.grad.zero_()
        
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()
        
        with torch.no_grad():
            params -= learning_rate * params.grad
        
        if epoch % 500 == 0:
            print("Epoch %d, Loss %f" % (epoch, float(loss)))
    return params

In [11]:
params = Namespace(
    n_epochs=5000,
    learning_rate=1e-2,
    model=model,
    loss_fn=loss_fn,
    params=torch.tensor([1.0, 0.0], requires_grad=True),
    t_u = t_un,
    t_c = t_c)

training_loop(
    n_epochs=params.n_epochs,
    learning_rate=params.learning_rate,
    model=params.model,
    loss_fn=params.loss_fn,
    params=params.params,
    t_u=params.t_u,
    t_c=params.t_c,
)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [12]:
#await (result1 := client.submit(lambda x: x(**vars(params)),
                 #  training_loop))
#result1

In [13]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [14]:
params = Namespace(
    params=torch.tensor([1.0, 0.0], requires_grad=True),
    learning_rate=1e-5)

optimizer = optim.SGD([params.params], lr=params.learning_rate)

In [15]:
t_p = model(t_u, *params.params)
loss = loss_fn(t_p, t_c)
loss.backward()

optimizer.step()

params.params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [16]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad()
loss.backward()
optimizer.step()

params

tensor([1.7761, 0.1064], requires_grad=True)

In [17]:
def training_loop(n_epochs, optimizer, model, loss_fn, params, t_u, t_c):
    for epoch in range(1, n_epochs+1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 500 == 0:\
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
        
    return params

In [18]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate=1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_params = Namespace(
    n_epochs=5000,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    params=params,
    t_u=t_un,
    t_c=t_c)


training_loop(
    n_epochs=training_params.n_epochs,
    optimizer=training_params.optimizer,
    model=training_params.model,
    loss_fn=training_params.loss_fn,
    params=training_params.params,
    t_u=training_params.t_u,
    t_c=training_params.t_c)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [19]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate=1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_params = Namespace(
    n_epochs=2000,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    params=params,
    t_u=t_u,
    t_c=t_c)

training_loop(
    n_epochs=training_params.n_epochs,
    optimizer=training_params.optimizer,
    model=training_params.model,
    loss_fn=training_params.loss_fn,
    params=training_params.params,
    t_u=training_params.t_u,
    t_c=training_params.t_c)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928579
Epoch 2000, Loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)

In [20]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate=1e-1
optimizer = optim.Adam([params], lr=learning_rate)

training_params = Namespace(
    n_epochs=2000,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    params=params,
    t_u=t_u,
    t_c=t_c)



In [21]:
training_loop(
    n_epochs=training_params.n_epochs,
    optimizer=training_params.optimizer,
    model=training_params.model,
    loss_fn=training_params.loss_fn,
    params=training_params.params,
    t_u=training_params.t_u,
    t_c=training_params.t_c)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928579
Epoch 2000, Loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)

In [22]:
a = delayed(training_loop)(**vars(training_params))
a.compute()
#a.compute()

Epoch 500, Loss 2.927645
Epoch 1000, Loss 2.927646
Epoch 1500, Loss 2.927645
Epoch 2000, Loss 2.927646


tensor([  0.5368, -17.3048], requires_grad=True)

In [53]:
n_samples = delayed(t_u.shape)[0]
n_val = delayed(int(0.2 * (n_samples1 := n_samples.compute())))
shuffled_indices0 = delayed(torch.randperm(n_samples1))
shuffled_indices1 = shuffled_indices.compute()

train_indices = shuffled_indices1[:-(n_val1 := n_val.compute())]
val_indices = shuffled_indices1[-n_val1:]

train_indices, val_indices

(tensor([ 0,  7,  5, 10,  2,  3,  6,  4,  1]), tensor([9, 8]))

In [55]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [80]:
#train_t_c
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward() # <2>
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [82]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate=1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    params=params,
    train_t_u=train_t_un,
    val_t_u=val_t_un,
    train_t_c=train_t_c,
    val_t_c=val_t_c)

Epoch 1, Training loss 92.6912, Validation loss 24.8936
Epoch 2, Training loss 42.1972, Validation loss 6.1380
Epoch 3, Training loss 34.1627, Validation loss 11.0368
Epoch 500, Training loss 6.1071, Validation loss 11.2445
Epoch 1000, Training loss 2.5403, Validation loss 9.5169
Epoch 1500, Training loss 2.0684, Validation loss 8.9927
Epoch 2000, Training loss 2.0060, Validation loss 8.8157
Epoch 2500, Training loss 1.9977, Validation loss 8.7532
Epoch 3000, Training loss 1.9966, Validation loss 8.7307


tensor([  5.4040, -16.9541], requires_grad=True)