In [21]:
import torch
import torch.optim as optim
import numpy as np

In [22]:
def model(t_u: torch, w:torch, b)->torch: 
    if len(t_u.shape)==1:
        return t_u*w+b
    ret = torch.zeros(t_u.shape[0])
    for i in range(0,t_u.shape[0]):
        ret[i]=(t_u[i,:]*w).sum()+b
    return ret

def dmodel_dw(t_u:torch, w:torch, b):
    return t_u

def dmodel_db(t_u:torch, w:torch, b):
    return 1.0
    
def loss_fn(t_p:torch, t_c: torch):
    temp = (t_p - t_c)**2/2.0
    return temp.mean()

def dloss_fn(t_p:torch, t_c:torch):
    temp = -(t_p - t_c)/t_p.size(0)
    return temp

def grad_fn(t_u:torch, t_c:torch, t_p:torch, w:torch, b):
    dloss_dtp = dloss_fn(t_p,t_c)
    dloss_dw = dloss_dtp*dmodel_dw(t_u, w, b)
    dloss_db = dloss_dtp*dmodel_db(t_u,w,b)
    return torch.stack([dloss_dw, dloss_db])


In [23]:
def add_squared_row(lst) -> np.ndarray:
    temp = np.array([lst, lst]).T
    temp[:,1] = temp[:,1]**2
    return temp

t_c =[0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0] # answer
t_u =[35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # input


In [24]:
def shuffle_and_devide_set(t_c: torch, t_u:torch, ratio):
    n_samples = t_u.shape[0]
    n_val = int(ratio*n_samples)

    shuffled_indices = torch.randperm(n_samples)

    train_indices = shuffled_indices[:-n_val]
    val_indices = shuffled_indices[-n_val:]

    train_indices, val_indices
    train_t_u = t_u[train_indices]
    train_t_c = t_c [train_indices]

    val_t_u = t_u[val_indices]
    val_t_c = t_c [val_indices]

    return train_t_u, train_t_c, val_t_u, val_t_c

In [31]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,train_t_c, val_t_c, print_enabled: bool=True):
    for epoch in range(1, n_epochs+1):
        train_t_p = model(train_t_u, params[1:],params[0])
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad():
            val_t_p = model(val_t_u, params[1:],params[0])
            val_loss = loss_fn(val_t_p, val_t_c)            
            assert val_loss.requires_grad == False
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if(print_enabled):
            if epoch <=3 or epoch % 500 == 0:
                print(f"Epoch {epoch:5d}, Training loss {train_loss.item():8.4f}, Validation loss {val_loss.item():8.4f}")
                # print('    param: ',params)
    return params

In [35]:
# add squared input
t_u_ = np.array(t_u)*0.1
t_c_ = torch.tensor(t_c)
t_u_ = torch.tensor(add_squared_row(t_u_))
train_t_u, train_t_c, val_t_u, val_t_c = shuffle_and_devide_set(t_c_, t_u_, 0.2)
params = torch.tensor([0.0,1.0,1.0], dtype=float, requires_grad=True)
lr = 1e-3
optimizer = optim.SGD([params],lr)
params = training_loop(20000,optimizer, params, train_t_u, val_t_u,train_t_c,  val_t_c, False )
print(params)

tensor([-0.9588, -0.6620,  0.5285], dtype=torch.float64, requires_grad=True)


In [34]:
# just input
t_u_ = np.array(t_u)*0.1
t_c_ = torch.tensor(t_c)
t_u_ = torch.tensor(t_u_)
train_t_u, train_t_c, val_t_u, val_t_c = shuffle_and_devide_set(t_c_, t_u_, 0.2)
params = torch.tensor([0.0, 1.0], dtype=float, requires_grad=True)
lr = 1e-3
optimizer = optim.SGD([params],lr)
params = training_loop(20000,optimizer, params, train_t_u, val_t_u,train_t_c,  val_t_c, False )
print(params)

tensor([-15.0652,   4.9888], dtype=torch.float64, requires_grad=True)
