In [2]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
d2l.use_svg_display()

In [None]:
# 读取fashion-mnist
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root="../data",train=True,transform=trans,download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="../data",train=False,transform=trans,download=True)

In [None]:
len(mnist_train)

In [None]:
# 将数字标签和文本标签进行转换
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 
                   'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i) ]for i in labels]

def show_images(imgs,num_rows,num_cols,titles=None,scale=1.5):
    figsize = (num_cols * scale,num_rows * scale)
    _,axes = d2l.plt.subplots(num_rows,num_cols,figsize=figsize)
    axes = axes.flatten()
    for i ,(ax,img) in enumerate(zip(axes,imgs)):
        if torch.is_tensor(img):
            ax.imshow(img.numpy())
        else:
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes


In [None]:
X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));


In [None]:
train_iter = data.DataLoader(mnist_train, batch_size=256,shuffle=True,num_workers = 4)

timer = d2l.Timer()
for x,y in train_iter:
    print(x.shape)
    continue
f'{timer.stop():.2f} sec'


In [None]:
# 整合所有数据读取组件
def load_data_fashion_mnist(batch_size, resize=None): #@save
    """下载Fashion-MNIST数据集，然后将其加载到内存中"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,num_workers=4),
            data.DataLoader(mnist_test, batch_size, shuffle=False,num_workers=4))


In [None]:
#  从零开始实现softmax回归
import torch
import torchvision
from IPython import display
from torch.utils import data
from d2l import torch as d2l
from torchvision import transforms


# 读入图片，设置batch_size
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
# for x,y in train_iter:
#     print(x.shape, x.dtype, y.shape, y.dtype)

In [None]:
#  设置权重

num_inputs = 784  # 将一张28*28的图片转换为向量
num_outputs = 10  # 网络输出维度为10类

w = torch.normal(0,0.01,size = (num_inputs,num_outputs),requires_grad=True)
b = torch.zeros(num_outputs,requires_grad=True)





In [None]:
x = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
x.sum(0,keepdim=True),x.sum(1,keepdim=True)

## softmax 计算公式

$$softmax(x)_{ij} = \frac {exp(x_{ij})}{\sum_{k} exp(x_{ik})}$$

In [None]:
# 实现softmax
def softmax(x):
    x_exp = torch.exp(x)
    partition = x_exp.sum(1, keepdim=True)
    return x_exp / partition   # 广播机制

In [None]:
x = torch.normal(0,1,(2,5))

x_prob = softmax(x)
x,torch.exp(x),torch.exp(x).sum(1, keepdim=True), x_prob,x_prob.sum(1)


In [None]:
def net(x):
    return softmax(torch.matmul(x.reshape((-1,w.shape[0])),w)+b)

In [None]:
# 知识补充   
#  y 为样本的正确标签，即标签为‘0’和‘2’。在第⼀个样本中，第⼀类是正确的预测；⽽在第⼆个样本中，第三类是正确的预测。
# y_hat 为三个标签的预测概率
# 然后使⽤y作为y_hat中概率的索引，我们选择第⼀个样本中第⼀个类的概率和第⼆个样本中第三个类的概率
y = torch.tensor([0,2])
y_hat = torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y_hat[[0,1],y]               #y_hat 第0和第个样本的第y个值

In [None]:
# 交叉熵损失
def cross_entropy(y_hat, y):
    