In [11]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)
import torch.optim as optim

In [12]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u


In [18]:
def model(t_u,w,b):
    return w * t_u + b

def loss_fn(t_p,t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [19]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'lr_scheduler']

In [20]:
params = torch.tensor([1.0,0.0],requires_grad = True)
learning_rate = 1e-5
optimizer = optim.SGD([params],lr = learning_rate)

In [21]:
t_p = model(t_u,*params)
loss = loss_fn(t_p,t_c)
loss.backward()


optimizer.step()
params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [23]:
# loop
def training_loop(n_epochs,optimizer,params,t_u,t_c):
    for epoch in range(1,n_epochs+1):
        t_p = model(t_u,*params)
        loss = loss_fn(t_p,t_c)
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 500 == 0:
            print('Epoch %d,Loss %f' % (epoch,float(loss)))
            
    return params

In [25]:
params = torch.tensor([1.0,0.0],requires_grad = True)
learning_rate = 1e-2
optimizer = optim.SGD([params],lr = learning_rate)

training_loop(5000,optimizer,params,t_un,t_c)

Epoch 500,Loss 7.860118
Epoch 1000,Loss 3.828538
Epoch 1500,Loss 3.092191
Epoch 2000,Loss 2.957697
Epoch 2500,Loss 2.933134
Epoch 3000,Loss 2.928648
Epoch 3500,Loss 2.927830
Epoch 4000,Loss 2.927680
Epoch 4500,Loss 2.927651
Epoch 5000,Loss 2.927648


tensor([  5.3671, -17.3012], requires_grad=True)

In [28]:
# 使用ADAM

params = torch.tensor([1.0,0.0],requires_grad = True)
learning_rate = 1e-1
optimizer = optim.Adam([params],lr = learning_rate)

training_loop(2000,optimizer,params,t_u,t_c)

Epoch 500,Loss 7.612903
Epoch 1000,Loss 3.086700
Epoch 1500,Loss 2.928578
Epoch 2000,Loss 2.927646


tensor([  0.5367, -17.3021], requires_grad=True)

#### 评估损失（训练集）
规则：
1. 如果训练损失没有下降，可能是因为模型对于数据来说过于简单，另一个可能就是我们的数据没有包含有效信息
2. 如果训练损失和验证损失发散（趋势不同），证明模型过拟合了

In [36]:
# 分离数据集

n_samples = t_u.shape[0]
n_val = int(0.2*n_samples)

shuffled_indices = torch.randperm(n_samples)    # 随机排列样本的索引

train_indices = shuffled_indices[:-n_val]       # 取前几个作为训练集（得到其索引）
val_indices = shuffled_indices[-n_val:]

train_indices,val_indices

(tensor([ 7,  8,  4,  2, 10,  3,  6,  1,  5]), tensor([0, 9]))

In [44]:
# 取对应的数据

train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1*train_t_u
val_t_un = 0.1*val_t_u

In [54]:
def training_loop(n_epochs,optimizer,params,train_t_u,val_t_u,train_t_c,val_t_c):
    for epoch in range(1,n_epochs+1):
        train_t_p = model(train_t_u,*params)
        train_loss = loss_fn(train_t_p,train_t_c)
        with torch.no_grad():                         # 验证时不产生额外参数
            val_t_p = model(val_t_u,*params)
            val_loss = loss_fn(val_t_p,val_t_c)
            assert val_loss.requires_grad == False
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch <=3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [55]:
params = torch.tensor([1.0,0.0],requires_grad = True)
learning_rate = 1e-1
optimizer = optim.Adam([params],lr = learning_rate)

training_loop(3000,optimizer,params,train_t_un,val_t_un,train_t_c,val_t_c)

Epoch 1, Training loss 91.7935, Validation loss 28.9332
Epoch 2, Training loss 82.2039, Validation loss 25.7886
Epoch 3, Training loss 73.4814, Validation loss 23.3551
Epoch 500, Training loss 2.7724, Validation loss 5.1029
Epoch 1000, Training loss 2.7248, Validation loss 4.6795
Epoch 1500, Training loss 2.7248, Validation loss 4.6792
Epoch 2000, Training loss 2.7248, Validation loss 4.6792
Epoch 2500, Training loss 2.7248, Validation loss 4.6792
Epoch 3000, Training loss 2.7248, Validation loss 4.6792


tensor([  5.3414, -16.7803], requires_grad=True)