In [7]:
import torch
import numpy as np
import torch.nn as nn

In [3]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)

In [4]:
import torch.utils.data as Data

batch = 10

dataset = Data.TensorDataset(features, labels)

data_iter = Data.DataLoader(dataset, batch_size=batch, shuffle=True)

In [5]:
for X, y in data_iter:
    print(X, y)
    break

tensor([[ 0.8030,  1.5968],
        [ 0.8364, -0.4698],
        [ 2.0730,  1.7439],
        [ 0.5571,  0.1515],
        [-0.5259, -1.1967],
        [-1.1063,  0.7440],
        [-1.9027, -0.1122],
        [-0.5266,  0.2075],
        [ 0.5122,  1.6053],
        [ 1.3023,  0.1512]]) tensor([ 0.3782,  7.4686,  2.4213,  4.7982,  7.2094, -0.5560,  0.7937,  2.4311,
        -0.2319,  6.3022])


In [10]:
class LinearNet(nn.Module):
    def __init__(self, num_feature):
        """
        :param num_feature:  the number of example's feature
        """
        super().__init__()
        self.linear = nn.Linear(num_feature, 1)

    def forward(self, x):
        y_hat = self.linear(x)
        return y_hat

net = LinearNet(num_inputs)
print(net)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [14]:
net = nn.Sequential(
    LinearNet(num_inputs)
)

for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.0192,  0.2857]], requires_grad=True)
Parameter containing:
tensor([0.3573], requires_grad=True)


In [26]:
from torch.nn import init

init.normal_(net[0].linear.weight, mean=0, std=0.01)
init.constant_(net[0].linear.bias, val=0)  # 也可以直接修改bias的data: net[0].bias.data.fill_(0)

Parameter containing:
tensor([0.], requires_grad=True)

In [17]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.0177,  0.0241]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [18]:
loss = nn.MSELoss()

In [20]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [21]:
# 调整学习率
for param_group in optimizer.param_groups:
    param_group['lr'] *= 0.1 # 学习率为之前的0.1倍

In [22]:
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [27]:
num_epoch = 30

for epoch in range(num_epoch):

    for X, y in data_iter:
        output = net(X)
        l = loss(y, output.view(y.size()))
        optimizer.zero_grad()
        l.backward()
        optimizer.step()

    print("epoch {:d}, loss: {:7f}".format(epoch+1, l.item()))

epoch 1, loss: 11.649608
epoch 2, loss: 2.967695
epoch 3, loss: 1.056254
epoch 4, loss: 0.304587
epoch 5, loss: 0.044725
epoch 6, loss: 0.019371
epoch 7, loss: 0.002882
epoch 8, loss: 0.002456
epoch 9, loss: 0.000464
epoch 10, loss: 0.000201
epoch 11, loss: 0.000102
epoch 12, loss: 0.000134
epoch 13, loss: 0.000122
epoch 14, loss: 0.000156
epoch 15, loss: 0.000112
epoch 16, loss: 0.000086
epoch 17, loss: 0.000106
epoch 18, loss: 0.000100
epoch 19, loss: 0.000138
epoch 20, loss: 0.000121
epoch 21, loss: 0.000080
epoch 22, loss: 0.000074
epoch 23, loss: 0.000037
epoch 24, loss: 0.000081
epoch 25, loss: 0.000156
epoch 26, loss: 0.000042
epoch 27, loss: 0.000057
epoch 28, loss: 0.000056
epoch 29, loss: 0.000053
epoch 30, loss: 0.000029


In [30]:
# check the value of param

dense = net[0]
print(true_w, dense.linear.weight)  # weight and bias belong to class attribute 'linear'
print(true_b, dense.linear.bias)

[2, -3.4] Parameter containing:
tensor([[ 1.9999, -3.3995]], requires_grad=True)
4.2 Parameter containing:
tensor([4.1997], requires_grad=True)
