In [2]:
import torch

## 自动求导

In [3]:
x=torch.randn(3,4,requires_grad=True)
x

tensor([[-1.5104, -0.5024,  0.6039,  0.7769],
        [-0.7227,  0.4577, -0.6372,  1.5187],
        [-0.3497, -0.3751,  2.0976,  1.6070]], requires_grad=True)

In [4]:
b=torch.randn(3,4,requires_grad=True)

In [5]:
t=x+b
y=t.sum()

In [6]:
y.backward()

In [7]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [8]:
x.requires_grad,b.requires_grad,y.requires_grad#只要有求导的需要，就会为True，即使没有指定。

(True, True, True)

## 反向传播计算 

In [9]:
x = torch.rand(1)
b = torch.rand(1, requires_grad = True)
w = torch.rand(1, requires_grad = True)
y = w * x 
z = y + b     #z=wx+b

In [10]:
z.backward(retain_graph=True)#如果不清空会累加起来

In [11]:
w.grad

tensor([0.8784])

In [12]:
b.grad

tensor([1.])

## 线性回归模型

  ###       ·不加激活函数的全连接层

In [13]:
import numpy as np

In [14]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape

(11, 1)

In [15]:
y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train.shape

(11, 1)

In [16]:
import torch
import torch.nn as nn

In [17]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  #一个全连接层，参数：输入输出维度

    def forward(self, x):
        out = self.linear(x)
        return out  #前向传播

In [18]:
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

In [19]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

### 参数与损失函数 

In [20]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)#参数优化算法指定
criterion = nn.MSELoss()#损失函数

### 训练模型

In [21]:
for epoch in range(epochs):
    epoch += 1
    # 注意转行成tensor
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)

    # 梯度要清零每一次迭代
    optimizer.zero_grad() 

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 返向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.13502870500087738
epoch 100, loss 0.07701527327299118
epoch 150, loss 0.043926727026700974
epoch 200, loss 0.025054140016436577
epoch 250, loss 0.014290008693933487
epoch 300, loss 0.008150437846779823
epoch 350, loss 0.004648687317967415
epoch 400, loss 0.002651460003107786
epoch 450, loss 0.0015122797340154648
epoch 500, loss 0.0008625488844700158
epoch 550, loss 0.0004919555503875017
epoch 600, loss 0.00028059404576197267
epoch 650, loss 0.00016004606732167304
epoch 700, loss 9.128185774898157e-05
epoch 750, loss 5.205920751905069e-05
epoch 800, loss 2.9695725970668718e-05
epoch 850, loss 1.693694321147632e-05
epoch 900, loss 9.661706826591399e-06
epoch 950, loss 5.510627943294821e-06
epoch 1000, loss 3.142324658256257e-06


In [22]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.9967023],
       [ 2.9971771],
       [ 4.997652 ],
       [ 6.998127 ],
       [ 8.998602 ],
       [10.999077 ],
       [12.999552 ],
       [15.000027 ],
       [17.000502 ],
       [19.000977 ],
       [21.001451 ]], dtype=float32)

### 模型保存与读取

In [23]:
torch.save(model.state_dict(), 'model.pkl')

In [24]:
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### 使用GPU训练

In [25]:
import torch
import torch.nn as nn
import numpy as np


class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)#传入GPU


criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)#传入GPU
    labels = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad() 

    outputs = model(inputs)

    loss = criterion(outputs, labels)

    loss.backward()

    optimizer.step()

    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.4468052089214325
epoch 100, loss 0.25484079122543335
epoch 150, loss 0.14535175263881683
epoch 200, loss 0.08290304988622665
epoch 250, loss 0.04728482663631439
epoch 300, loss 0.026969486847519875
epoch 350, loss 0.015382410027086735
epoch 400, loss 0.008773569948971272
epoch 450, loss 0.005004085134714842
epoch 500, loss 0.0028541460633277893
epoch 550, loss 0.001627901685424149
epoch 600, loss 0.0009285155101679265
epoch 650, loss 0.000529574987012893
epoch 700, loss 0.000302047876175493
epoch 750, loss 0.0001722822489682585
epoch 800, loss 9.826547466218472e-05
epoch 850, loss 5.604909165413119e-05
epoch 900, loss 3.1966392270987853e-05
epoch 950, loss 1.8232703951071016e-05
epoch 1000, loss 1.0399753591627814e-05
