In [121]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

## Pytorch基础: 模型参数设置和输入、输出形状

在PyTorch的torch.nn中，模型类的参数设置的都是输入、输出的`特征`，而输入输出的样本大小（批量大小），则直接由数据决定，不需要在模型中进行参数设置。

# Dataset格式

In [98]:
# 一般来说我们用DataLoader, TensorDataset这两个包，
# 其中先使用TensorDataset转换数据个数，再放入DataLoader中作为一个生成器
x = torch.randn(32, 4)  # x data (torch tensor)
y = torch.randn(32, 1)      # y data (torch tensor)
 
# 先转换成 torch 能识别的 Dataset
torch_dataset = TensorDataset(x, y)
 
# 把 dataset 放入 DataLoader
data_iter = DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=3,               # batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2)              # 多线程来读数据

In [20]:
# check how data_iter works
count = 0 
for X, y in data_iter:
    if count == 0:
        print("feature:", X, "\n", "target:", y)
    count += 1
    continue
print(f'data_iter中总共有{count}个batch的数据。')

feature: tensor([[-1.0117, -0.8266, -0.6284, -2.5816],
        [-0.0178, -0.9856, -1.4027, -0.3659],
        [ 0.5260,  0.1485,  0.3743,  0.1258]]) 
 target: tensor([0.2598, 1.8055, 1.7201])
data_iter中总共有11个batch的数据。


# Linear Regression

## 首先使用类的格式写一个单层线性神经网络

In [191]:
class LinearModel(nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        linear = nn.Linear(in_features=4, out_features=1)
        linear.weight.data.normal_(0, 0.01)
        linear.bias.data.fill_(0)
        self.layer = linear
    def forward(self, x):
        x = self.layer(x)
        # x = x.sum(axis=0)
        return x

In [107]:
# 使用上例一样的数据
x = torch.randn(32, 4)  # x data (torch tensor)
y = torch.randn(32, 1)      # y data (torch tensor)
# 先转换成 torch 能识别的 Dataset
torch_dataset = TensorDataset(x, y)
data_iter = DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=3,               # batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2)              # 多线程来读数据

In [72]:
lm = LinearModel()
# 此时代表32 * 4的自变量x和4 * 1的系数矩阵相乘，保证了相容
print(f'输出形状: {lm(x).shape}')

输出形状: torch.Size([32, 1])


## 我们还可以使用nn.Sequential简洁实现

In [73]:
# construct a linear neural net
net = nn.Sequential(nn.Linear(4, 1))

# initialization of parameters
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
print(f'输出形状: {net(x).shape}')

输出形状: torch.Size([32, 1])


In [74]:
# let us check the detailed values of parameters
for paras in net.parameters():
    print(paras)

Parameter containing:
tensor([[ 0.0113,  0.0072, -0.0092, -0.0127]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


## 损失函数和优化器

In [111]:
# specification of loss function
# specification of optimizer
loss = nn.MSELoss()
optim = torch.optim.SGD(lm.parameters(), lr=0.03)

In [112]:
y_hat = lm(x)
loss(y_hat, y)

tensor(1.3066, grad_fn=<MseLossBackward0>)

In [113]:
# 查看loss的算法：MSE就是误差的平方的平均值
print((sum((y - y_hat) ** 2) / len(y)).item())
print(round((sum((y - y_hat) ** 2) / len(y)).item(), 5) == round(loss(y_hat, y).item(), 5))

1.3066014051437378
True


## 反向传播训练

In [115]:
# 首先明确的是，在反向传播之前，各个参数的梯度为空
for p in lm.parameters():
    print(p, p.grad)

Parameter containing:
tensor([[-0.0095,  0.0007,  0.0019, -0.0010]], requires_grad=True) tensor([[-19.5573, -32.4925,   1.5564, -22.6102]])
Parameter containing:
tensor([0.], requires_grad=True) tensor([1.1328])


In [116]:
# start training 
# loss backward + opt step
num_epochs = 3  # 总共遍历三次全样本
for epoch in range(num_epochs):
    for X_train, y_train in data_iter:
        l = loss(lm(X_train), y_train)
        # 对optimizer进行zero_grad操作可以将定义的线性模型的参数的梯度化为0
        # 每步进行梯度下降的时候，我们都需要将优化器中保存的参数重置为0，否则这次计算的梯度就会累加到上次的梯度中
        optim.zero_grad()
        l.backward()
        # 优化器走一步，就是根据目前的梯度进行一次梯度下降
        optim.step()
    l = loss(lm(x), y)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 1.289197
epoch 2, loss 1.278411
epoch 3, loss 1.361941


# Logistic Regression

## 生成模拟数据

In [180]:
# generating the synthetic data following a logit model
"""⽣成logit(p = 1) = XW + b噪声"""
# 真实参数
w = torch.tensor([2, -3.4])
b = 1.2
num_examples = 30

# 根据真实参数，模拟数据
X = torch.normal(0, 1, (num_examples, len(w)))
prob = torch.matmul(X, w) + b
prob = torch.exp(prob) / (1 + torch.exp(prob))

# prob是长度30的向量，这里根据每个分量对应生成y∈{0, 1}
y = np.random.binomial(np.ones(num_examples, dtype = "int"), prob)
y = torch.tensor(y).reshape((-1, 1))

#  构造数据生成器
torch_dataset = TensorDataset(X, y)
data_iter = DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=3,               # batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2)              # 多线程来读数据

In [181]:
X.shape, y.shape, prob.shape

(torch.Size([30, 2]), torch.Size([30, 1]), torch.Size([30]))

## Logistic 损失函数

In [189]:
# logistic loss function, i.e., the negative log-likelihood
def logit_loss(y_hat, y): 
    """logit loss function"""
    # 这个公式的详细推导见Logistic Regression 笔记
    return (- y.reshape(y_hat.shape) * y_hat + torch.log(1 + torch.exp(y_hat))).sum()


## 训练Logistic Regression

In [190]:
# training procedure
loss = logit_loss
lm = nn.Sequential(nn.Linear(2, 1))
optim = torch.optim.SGD(lm.parameters(), lr=0.03)

# tuning parameters
lr = 0.1
num_epochs = 10

# initialization
# initialization of parameters
lm[0].weight.data.normal_(0, 0.01)
lm[0].bias.data.fill_(1)

# start training 
for epoch in range(num_epochs):
    for X_train, y_train in data_iter:
        y_train = y_train.to(torch.float)
        l = loss(lm(X_train), y_train)
        # 对optimizer进行zero_grad操作可以将定义的线性模型的参数的梯度化为0
        # 每步进行梯度下降的时候，我们都需要将优化器中保存的参数重置为0，否则这次计算的梯度就会累加到上次的梯度中
        optim.zero_grad()
        l.backward()
        # 优化器走一步，就是根据目前的梯度进行一次梯度下降
        optim.step()
    l = loss(lm(X), y)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 17.202641
epoch 2, loss 14.917555
epoch 3, loss 13.360222
epoch 4, loss 12.256972
epoch 5, loss 11.437306
epoch 6, loss 10.835018
epoch 7, loss 10.369900
epoch 8, loss 9.999341
epoch 9, loss 9.704663
epoch 10, loss 9.456801


In [5]:
mat = [[0] * 3 for _ in range(3)]
mat

[[0, 0, 0], [0, 0, 0], [0, 0, 0]]

In [6]:
mat[0][1] = 1

In [7]:
mat

[[0, 1, 0], [0, 0, 0], [0, 0, 0]]