In [1]:
import torch
import torch.optim as optim
import numpy as np

In [56]:
def model(t_u: torch, w:torch, b)->torch: 
    if len(t_u.shape)==1:
        return t_u*w+b
    ret = torch.zeros(t_u.shape[0])
    for i in range(0,t_u.shape[0]):
        ret[i]=(t_u[i,:]*w).sum()+b
    return ret

def dmodel_dw(t_u:torch, w:torch, b):
    return t_u

def dmodel_db(t_u:torch, w:torch, b):
    return 1.0
    
def loss_fn(t_p:torch, t_c: torch):
    temp = (t_p - t_c)**2/2.0
    return temp.mean()

def dloss_fn(t_p:torch, t_c:torch):
    temp = -(t_p - t_c)/t_p.size(0)
    return temp

def grad_fn(t_u:torch, t_c:torch, t_p:torch, w:torch, b):
    dloss_dtp = dloss_fn(t_p,t_c)
    dloss_dw = dloss_dtp*dmodel_dw(t_u, w, b)
    dloss_db = dloss_dtp*dmodel_db(t_u,w,b)
    return torch.stack([dloss_dw, dloss_db])


In [3]:
# data
t_c =[0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0] # answer
t_u =[35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # input

In [4]:
def add_squared_row(lst) -> np.ndarray:
    temp = np.array([lst, lst]).T
    temp[:,1] = temp[:,1]**2
    return temp

t_c =[0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0] # answer
t_u =[35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # input


In [5]:
def shuffle_and_devide_set(t_c: torch, t_u:torch, ratio):
    n_samples = t_u.shape[0]
    n_val = int(ratio*n_samples)

    shuffled_indices = torch.randperm(n_samples)

    train_indices = shuffled_indices[:-n_val]
    val_indices = shuffled_indices[-n_val:]

    train_indices, val_indices
    train_t_u = t_u[train_indices]
    train_t_c = t_c [train_indices]

    val_t_u = t_u[val_indices]
    val_t_c = t_c [val_indices]

    return train_t_u, train_t_c, val_t_u, val_t_c

In [27]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,train_t_c, val_t_c):
    for epoch in range(1, n_epochs+1):
        train_t_p = model(train_t_u, params[1:],params[0])
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = model(val_t_u, params[1:],params[0])
            val_loss = loss_fn(val_t_p, val_t_c)            
            assert val_loss.requires_grad == False
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        
        if epoch <=3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f}, Validation loss {val_loss.item():.4f}")
            # print('    param: ',params)
    return params

In [61]:
t_u_ = np.array(t_u)*0.1
t_c_ = torch.tensor(t_c)
t_u_ = torch.tensor(add_squared_row(t_u_))
train_t_u, train_t_c, val_t_u, val_t_c = shuffle_and_devide_set(t_c_, t_u_, 0.2)
params = torch.tensor([0.0,1.0,1.0], dtype=float, requires_grad=True)
lr = 1e-3
optimizer = optim.SGD([params],lr)
params = training_loop(10000,optimizer, params, train_t_u, val_t_u,train_t_c,  val_t_c )

Epoch 1, Training loss 372.4724, Validation loss 182.3080
Epoch 2, Training loss 38.2776, Validation loss 28.7074
Epoch 3, Training loss 8.3362, Validation loss 14.2582
Epoch 500, Training loss 1.9655, Validation loss 7.2597
Epoch 1000, Training loss 1.3604, Validation loss 5.5682
Epoch 1500, Training loss 1.2545, Validation loss 4.9850
Epoch 2000, Training loss 1.2354, Validation loss 4.7571
Epoch 2500, Training loss 1.2315, Validation loss 4.6588
Epoch 3000, Training loss 1.2302, Validation loss 4.6112
Epoch 3500, Training loss 1.2294, Validation loss 4.5842
Epoch 4000, Training loss 1.2287, Validation loss 4.5657
Epoch 4500, Training loss 1.2280, Validation loss 4.5508
Epoch 5000, Training loss 1.2273, Validation loss 4.5373
Epoch 5500, Training loss 1.2266, Validation loss 4.5246
Epoch 6000, Training loss 1.2259, Validation loss 4.5121
Epoch 6500, Training loss 1.2252, Validation loss 4.4998
Epoch 7000, Training loss 1.2245, Validation loss 4.4876
Epoch 7500, Training loss 1.2238, 

In [59]:
t_u_ = np.array(t_u)*0.1
t_c_ = torch.tensor(t_c)
t_u_ = torch.tensor(t_u_)
train_t_u, train_t_c, val_t_u, val_t_c = shuffle_and_devide_set(t_c_, t_u_, 0.2)
params = torch.tensor([0.0, 1.0], dtype=float, requires_grad=True)
lr = 1e-2
optimizer = optim.SGD([params],lr)
params = training_loop(10000,optimizer, params, train_t_u, val_t_u,train_t_c,  val_t_c )

Epoch 1, Training loss 45.8830, Validation loss 14.5284
Epoch 2, Training loss 31.3057, Validation loss 5.1708
Epoch 3, Training loss 24.1730, Validation loss 1.7999
Epoch 500, Training loss 7.2598, Validation loss 2.7805
Epoch 1000, Training loss 3.5439, Validation loss 2.3056
Epoch 1500, Training loss 2.1953, Validation loss 2.1202
Epoch 2000, Training loss 1.7058, Validation loss 2.0450
Epoch 2500, Training loss 1.5282, Validation loss 2.0130
Epoch 3000, Training loss 1.4637, Validation loss 1.9985
Epoch 3500, Training loss 1.4403, Validation loss 1.9915
Epoch 4000, Training loss 1.4318, Validation loss 1.9880
Epoch 4500, Training loss 1.4287, Validation loss 1.9861
Epoch 5000, Training loss 1.4276, Validation loss 1.9850
Epoch 5500, Training loss 1.4272, Validation loss 1.9844
Epoch 6000, Training loss 1.4270, Validation loss 1.9840
Epoch 6500, Training loss 1.4270, Validation loss 1.9838
Epoch 7000, Training loss 1.4270, Validation loss 1.9837
Epoch 7500, Training loss 1.4270, Val