# 1 环境配置

# 2 基本数据处理与计算操作

## 2.1 创建Tensor

In [None]:
import torch

In [None]:
x = torch.empty(2, 3)
print(x)

In [None]:
x = torch.rand(2, 3)
print(x)

In [None]:
x = torch.zeros(2, 3, dtype=torch.long)
print(x)

In [None]:
x = torch.tensor([[5.5, 3], [2.2, 5]])
print(x)

In [None]:
x = x.new_ones(2, 3, dtype=torch.float64)
print(x)

x = torch.randn_like(x, dtype=torch.float)
print(x)

In [None]:
print(x.size())
print(x.shape)

## 2.2 Tensor的相关操作

In [None]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
print(x + y)

print(torch.add(x, y))

y.add_(x)
print(y)

In [None]:
y = x[0, :]
y += 1
print(y)
print(x[0, :])

In [None]:
y = x.view(6)
z = x.view(-1, 2)
print(x.size(), y.size(), z.size())

In [None]:
x += 1
print(x)
print(y)

In [None]:
x_cp = x.clone().view(6)
x -= 1
print(x)
print(x_cp)

## 2.3 广播机制

In [None]:
x = torch.arange(1, 3).view(1, 2)
print(x)
y = torch.arange(1, 4).view(3, 1)
print(y)
print(x + y)

## 2.4 Tensor和Numpy相互转换

In [None]:
a = torch.ones(3)
b = a.numpy()
print(a, b)

a += 1
print(a, b)
b += 1
print(a, b)

In [None]:
import numpy as np

a = np.ones(3)
b = torch.from_numpy(a)
print(a, b)

a += 1
print(a, b)
b += 1
print(a, b)

In [None]:
c = torch.tensor(a)
a += 1
print(a, c)

## 2.5 Tensor on GPU

In [None]:
if torch.cuda.is_available():
    devc = torch.device("cuda")
    y = torch.ones_like(x, device=devc)
    x = x.to(devc)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

## 2.6 自动求梯度

In [None]:
x = torch.ones(2, 2, requires_grad=True)
print(x)
print(x.grad_fn)

In [None]:
y = x + 2
print(y)
print(y.grad_fn)

In [None]:
z = y * y * 3
out = z.mean()
print(z)
print(out)

In [None]:
a = torch.randn(2, 2)
a = (a * 3) / (a - 1)
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

In [None]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
out.backward()
print(x.grad)

In [None]:
out2 = x.sum()
out2.backward()
print(x.grad)

out3 = x.sum()
x.grad.data.zero_()
out3.backward()
print(x.grad)

In [None]:
x = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
y = 2 * x
z = y.view(2, 2)
print(z)

In [None]:
v = torch.tensor([[1.0, 0.1], [0.01, 0.001]], dtype=torch.float)
z.backward(v)
print(x.grad)

# 3 线性回归实现

## 3.1 手动实现线性回归

### 导入所需库

In [None]:
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

### 生成数据

In [None]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(
    np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float
)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)

In [None]:
def use_svg_display():
    display.set_matplotlib_formats("svg")


def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams["figure.figsize"] = figsize


set_figsize()
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)

### 读取数据

In [None]:
num_inputs = 2


# 数据迭代器 iterator
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i : min(i + batch_size, num_examples)])
        yield features.index_select(0, j), labels.index_select(0, j)

### 构建模型

In [None]:
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

In [None]:
w.requires_grad_(True)
b.requires_grad_(True)

### 前向函数

In [None]:
def linreg(X, w, b):
    return torch.mm(X, w) + b

### 损失函数

In [None]:
def squared_loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2

### 优化器

In [None]:
# stochastic gradient descent (SGD)
# 随机梯度下降
def sgd(params, lr, batch_size):
    for param in params:
        param.data -= lr * param.grad / batch_size

### 模型训练

In [None]:
lr = 0.03
num_epochs = 128
batch_size = 64
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()
        l.backward()
        sgd([w, b], lr, batch_size)
        w.grad.data.zero_()
        b.grad.data.zero_()
    train_1 = loss(net(features, w, b), labels).mean().item()
    print("epoch %d, loss %f" % (epoch + 1, train_1))

In [None]:
print(true_w, "\n", w)
print(true_b, "\n", b)

## 3.2 利用torchnn实现线性回归

In [None]:
import torch
import torch.nn as nn

### 读取数据

In [None]:
lr = 0.03
import torch.utils.data as data

batch_size = 64
dataset = data.TensorDataset(features, labels)

data_iter = data.DataLoader(
    dataset=dataset,
    batch_size=batch_size,
    shuffle=True,  # 打乱数据
    num_workers=0,  # 读取线程数,win下为0
)

### 构建模型(nn.Module)

In [None]:
class LinearNet(nn.Module):
    def __init__(self, n_features):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_features, 1)

    # forward 定义前向传播
    def forward(self, x):
        y = self.linear(x)
        return y


net = LinearNet(num_inputs)

### 构建模型(其他方法)

In [None]:
# 使用 nn.Sequential 构建模型
# 写法一
net = nn.Sequential(
    nn.Linear(num_inputs, 1)
    # 此处可以添加其他层
)

# 写法二
# net = nn.Sequential()
# net.add_module('linear',nn.Linear(num_inputs,1))
# net.add_module(...)   # 此处可以添加其他层

# 写法三
# from collections import OrderedDict
# net = nn.Sequential(OrderedDict([
#     ('linear',nn.Linear(num_inputs,1))
#     # 此处可以添加其他层
# ]))

### 模型参数初始化

In [None]:
from torch.nn import init

init.normal_(net[0].weight, mean=0, std=0.01)
init.constant_(
    net[0].bias, val=0
)  # 也可以直接修改bias的data：net[0].bias.data.fill_(0)
# 上述代码，将参数中的每个元素随机初始化为了均值为0，标准差为0.01的正态分布，
# 同时将偏差初始化为零

### 损失函数和优化器

In [None]:
# 均方误差作为损失(取了均值，是标量，可以直接.backward())
loss = nn.MSELoss()

In [None]:
import torch.optim as optim

# stochastic gradient descent
# 随机梯度下降法，学习率为0.03
optimizer = optim.SGD(net.parameters(), lr=0.03)

# 可以为不同的子网络设置不同学习率
# optimizer = optim.SGD(
#     [
#         {"params": net.subnet1.parameters(), "lr": 0.01},
#         {"params": net.subnet2.parameters(), "lr": 0.1},
#         {"params": net.subnet3.parameters(), "lr": 0.05},
#         {"params": net.subnet4.parameters(), "lr": 0.03},
#         {"params": net.subnet5.parameters(), "lr": 0.02},
#     ],
#     lr=0.03,
# )

### 模型训练

In [None]:
num_epochs = 128
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print(f"Epoch {epoch}, loss: {l.item()}")

In [None]:
print(true_w, "\n", w)
print(true_b, "\n", b)

## 3.3 常用损失函数

In [None]:
# 平均绝对误差(MAE): mean absolute error
loss = nn.L1Loss()

# 均方误差(MSE): mean squared error
loss = nn.MSELoss()

# 二元交叉熵损失函数: binary cross-entropy loss
loss = nn.BCELoss()

# 包含sigmoid层的二元交叉熵损失函数: binary cross-entropy loss with sigmoid
loss = nn.BCEWithLogitsLoss()

# 交叉熵损失函数: cross-entropy loss
loss = nn.CrossEntropyLoss()  # 内含softmax运算

## 3.4 模型预测及评价

In [None]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [None]:
# 评价模型net在数据集data_iter上的准确率
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

# 4 实验

## 4.1 pytorch基本操作考察

### 1

In [None]:
import torch

M = torch.randint(1, 10, (1, 3))
N = torch.randint(1, 10, (2, 1))
print(M)
print(N)
# 法一
R = M - N
print(R)
# 法二
R = torch.sub(M, N)
print(R)
# 法三
R = M.sub(N)
print(R)

### 2

In [None]:
import torch

P = torch.normal(mean=0, std=0.01, size=(3, 2))
Q = torch.normal(mean=0, std=0.01, size=(4, 2))
print(P)
print(Q)
QT = Q.t()  # 或者使用 Q.transpose(0, 1)
print(QT)
result = torch.mm(P, QT)  # 或者 P @ QT
print(result)

### 3

In [None]:
import torch

x = torch.tensor(1, dtype=float, requires_grad=True)
y1 = x**2
with torch.no_grad():
    y2 = x**3
y3 = y1 + y2
print(y3)
y3.backward()
print(x.grad)

print(y1.grad_fn)
print(y2.grad_fn)
print(y3.grad_fn)

## 4.2 动手实现logistic回归

### 4.2.1 数据生成

In [None]:
import torch as tc
import matplotlib.pyplot as plt

# CUDA
devc = tc.device("cuda" if tc.cuda.is_available() else "cpu")

n_data = tc.ones(50, 2)
x1 = tc.normal(2 * n_data, 1)
y1 = tc.zeros(50)
x2 = tc.normal(-2 * n_data, 1)
y2 = tc.ones(50)

x = tc.cat((x1, x2), 0).float().to(devc)
y = tc.cat((y1, y2), 0).float().to(devc)

plt.scatter(
    x.detach().cpu().numpy()[:, 0],
    x.detach().cpu().numpy()[:, 1],
    c=y.detach().cpu().numpy(),
    s=100,
    lw=0,
    cmap="RdYlGn",
)
plt.show()

### 4.2.2 手动实现

In [None]:
import torch as tc
import torch.nn as nn

#### 4.2.2.1 数据迭代器

In [None]:
def data_random_iter(batch_size, features, labels):
    num_examples = features.shape[0]
    indices = tc.randperm(num_examples, device=devc)
    for i in range(0, num_examples, batch_size):
        j = indices[i : min(i + batch_size, num_examples)]
        yield features.index_select(0, j), labels.index_select(0, j)


data_iter = data_random_iter

#### 4.2.2.2 模型

In [None]:
w = tc.normal(0, 0.01, size=(x.shape[1], 1), device=devc, requires_grad=True)
b = tc.zeros(1, 1, requires_grad=True, device=devc)


def linear_regression(X, w, b):
    return tc.addmm(b, X, w).squeeze(-1)


net = linear_regression

#### 4.2.2.3 损失函数

In [None]:
loss_BCEwithLogists = nn.BCEWithLogitsLoss()

# TODO 完全手动实现

loss = loss_BCEwithLogists

#### 4.2.2.4 优化器

In [None]:
def optim_sgd(params, lr):
    for param in params:
        param.data -= lr * param.grad
        # 若loss计算采用sum而非mean，则需要除以batch_size
        param.grad.zero_()


optimizer = optim_sgd

#### 4.2.2.5 准确度函数

In [None]:
def accuracy_binary_classification(x, y, w, b, net):
    with tc.no_grad():
        y_hat = net(x, w, b)
        acc = ((y_hat > 0.5) == y).float().mean().item()
    return acc


evaluate_accuracy = accuracy_binary_classification

#### 4.2.2.6 训练函数

In [None]:
def train(x, y, w, b, lr, batch_size, epochs):
    for epoch in range(epochs):
        epoch_loss = 0
        for x_batch, y_batch in data_iter(batch_size, x, y):
            y_hat = net(x_batch, w, b)
            los = loss(y_hat, y_batch)
            los.backward()
            optimizer([w, b], lr)
            epoch_loss += los.detach().item() / len(x_batch)
        print(f"Epoch {epoch + 1}, Loss {los.item()}")
    return w, b

#### 4.2.2.7 训练过程

In [None]:
lr = 0.03
batch_size = 32
epochs = 1024

w, b = train(x, y, w, b, lr, batch_size, epochs)
accuracy = evaluate_accuracy(x, y, w, b, net)
print(f"Accuracy: {accuracy}")

### 4.2.3 torch.nn实现

In [None]:
import torch as tc
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import torch.utils.data as tata

#### 4.2.3.1 数据迭代器

In [None]:
def get_data_iter(x, y, batch_size):
    dataset = tata.TensorDataset(x, y)
    return tata.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

#### 4.2.3.2 模型

In [None]:
class LinearNet(nn.Module):
    def __init__(self, n_features):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_features, 1)

    def forward(self, x):
        return self.linear(x).squeeze(-1)


def get_net(n_features, device):
    net = LinearNet(n_features).to(device)
    init.normal_(net.linear.weight, mean=0, std=0.01)
    init.constant_(net.linear.bias, val=0)
    return net

#### 4.2.3.3 损失函数

In [None]:
loss_fn = nn.BCEWithLogitsLoss()

#### 4.2.3.4 优化器

In [None]:
def get_optimizer(net, lr):
    return optim.SGD(net.parameters(), lr=lr)

#### 4.2.3.5 准确度函数

In [None]:
def evaluate_accuracy(data_iter, net, device):
    acc_sum, n = 0.0, 0
    net.eval()
    with tc.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            acc_sum += ((y_hat > 0) == y).float().sum().item()
            n += y.shape[0]
    net.train()
    return acc_sum / n

#### 4.2.3.6 训练函数

In [None]:
def train(net, data_iter, loss_fn, optimizer, epochs, device):
    net.train()
    for epoch in range(epochs):
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss_fn(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        print(f"Epoch {epoch + 1}, Loss {l.item()}")

#### 4.2.3.7 训练过程

In [None]:
lr = 0.03
batch_size = 32
epochs = 1024

# 创建组件
data_iter = get_data_iter(x, y, batch_size)
net = get_net(x.shape[1], devc)
optimizer = get_optimizer(net, lr)

# 训练
train(net, data_iter, loss_fn, optimizer, epochs, devc)

# 评估
acc = evaluate_accuracy(data_iter, net, devc)
print(f"Accuracy: {acc}")

## 4.3 动手实现softmax回归

### 4.3.1 获取数据

In [None]:
import torch as tc
import torchvision as tcvs
import torchvision.transforms as trans

In [None]:
mnist_train = tcvs.datasets.FashionMNIST(
    root="Datasets",
    train=True,
    download=False,
    transform=trans.ToTensor(),
)
mnist_test = tcvs.datasets.FashionMNIST(
    root="Datasets",
    train=False,
    download=False,
    transform=trans.ToTensor(),
)

In [None]:
batch_size = 256
train_iter = torch.utils.data.DataLoader(
    mnist_train, batch_size=batch_size, shuffle=True
)
test_iter = torch.utils.data.DataLoader(
    mnist_test, batch_size=batch_size, shuffle=False
)

### 4.3.2 手动实现

### 4.3.3 torchrch.nn实现