# 3.6 softmax回归的从零开始实现

In [1]:
import torch
import torchvision
import numpy as np
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
import d2lzh_pytorch as d2l

print(torch.__version__)
print(torchvision.__version__)

1.9.1+cpu
0.10.1+cpu


## 3.6.1 获取和读取数据

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## 3.6.2 初始化模型参数

In [3]:
num_inputs = 784
num_outputs = 10

W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)

In [4]:
W

tensor([[ 0.0025,  0.0134,  0.0139,  ...,  0.0054, -0.0002,  0.0108],
        [ 0.0051,  0.0062,  0.0020,  ...,  0.0170,  0.0046,  0.0122],
        [-0.0061, -0.0210, -0.0106,  ..., -0.0208,  0.0212, -0.0160],
        ...,
        [ 0.0138, -0.0051,  0.0013,  ...,  0.0004,  0.0044, -0.0056],
        [ 0.0032,  0.0188,  0.0101,  ...,  0.0100,  0.0136,  0.0115],
        [-0.0016, -0.0039,  0.0036,  ...,  0.0109, -0.0076, -0.0087]])

In [6]:
W.shape

torch.Size([784, 10])

In [5]:
b

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [7]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [8]:
# keepdim:保持指定的维度不变
X = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(X.sum(dim=0, keepdim=True))
print(X.sum(dim=1, keepdim=True))


tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])


## 3.6.3 实现softmax运算

In [9]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition  # 这里应用了广播机制

In [10]:
X = torch.rand((2, 5))
X_prob = softmax(X)
print(X_prob, X_prob.sum(dim=1))

tensor([[0.1550, 0.2261, 0.1406, 0.2190, 0.2593],
        [0.2574, 0.1541, 0.2498, 0.1172, 0.2215]]) tensor([1.0000, 1.0000])


In [12]:
a = np.random.rand(2,5)
a

array([[0.18134264, 0.93282624, 0.59987693, 0.37352347, 0.65445965],
       [0.17734857, 0.16597106, 0.9781978 , 0.36188388, 0.83188441]])

In [13]:
a.exp()

AttributeError: 'numpy.ndarray' object has no attribute 'exp'

In [14]:
torch.rand((2, 5)).exp()

tensor([[1.7844, 1.2579, 2.3124, 1.5675, 1.0130],
        [1.0225, 2.2718, 1.0110, 2.0718, 2.0584]])

**torch增加了很多方法。**

## 3.6.4 定义模型

In [49]:
def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

In [16]:
y = torch.LongTensor([0, 2])
y.view(-1,1)

tensor([[0],
        [2]])

## 3.6.5 定义损失函数

In [17]:
tensor_0 = torch.arange(3, 12).view(3, 3)
print(tensor_0)

tensor([[ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]])


In [18]:
index = torch.tensor([[2, 1, 0]])
tensor_1 = tensor_0.gather(0, index)
print(tensor_1)

tensor([[9, 7, 5]])


In [19]:
index = torch.tensor([[2, 1, 0],
                     [0,1,2]])
tensor_0.gather(0, index)

tensor([[ 9,  7,  5],
        [ 3,  7, 11]])

In [20]:
index = torch.tensor([[2, 1, 0]])
tensor_1 = tensor_0.gather(1, index)
print(tensor_1)

tensor([[5, 4, 3]])


In [24]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))

tensor([[0.1000],
        [0.5000]])

In [29]:
y_hat

tensor([[0.1000, 0.3000, 0.6000],
        [0.3000, 0.2000, 0.5000]])

In [38]:
y = torch.LongTensor([1, 2])
y.view(-1, 1)

tensor([[1],
        [2]])

In [39]:
c = y_hat.gather(1, y.view(-1, 1))
c

tensor([[0.3000],
        [0.5000]])

In [34]:
- torch.log(c)

tensor([[2.3026],
        [0.6931]])

### torch.gather()用法

从上面的例子可以看出，torch.gather(dim),dim指定哪一维度，那一维度是固定的。


In [41]:
y_hat

tensor([[0.1000, 0.3000, 0.6000],
        [0.3000, 0.2000, 0.5000]])

In [43]:
y_hat.gather(1, torch.tensor([[1],[2]])) 

tensor([[0.3000],
        [0.5000]])

In [None]:
"""
行数是固定自增长的，[0],[1]
dim = 1,即按照列的方向 ；torch.tensor 指定列 [1],[2]
则组合行列是[0,1],[1,2],因此取出来是 [[0.3],[0.5]]
"""

In [44]:
def cross_entropy(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))

## 3.6.6 计算分类准确率

In [45]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [46]:
print(accuracy(y_hat, y))

0.5


In [47]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [50]:
print(evaluate_accuracy(test_iter, net))

0.0691


## 3.6.7 训练模型

In [51]:
num_epochs, lr = 5, 0.1

# 本函数已保存在d2lzh_pytorch包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到
            
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

epoch 1, loss 0.7855, train acc 0.750, test acc 0.794
epoch 2, loss 0.5708, train acc 0.813, test acc 0.807
epoch 3, loss 0.5249, train acc 0.826, test acc 0.817
epoch 4, loss 0.5017, train acc 0.832, test acc 0.821
epoch 5, loss 0.4853, train acc 0.837, test acc 0.829


## 3.6.8 预测

In [1]:
X, y = iter(test_iter).next()

true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])

NameError: name 'test_iter' is not defined

**不知道什么问题，内核老是动不动就挂掉。**