In [11]:
import torch
import torchvision
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import sys
import gzip
import os
sys.path.append("..")
import d2lzh_pytorch as d2l

In [12]:
"""
    data_folder: 文件目录
    data_name： 数据文件名
    label_name：标签数据文件名
"""

def load_data(data_folder, data_name, label_name):
    with gzip.open(os.path.join(data_folder,label_name), 'rb') as lbpath: # rb表示的是读取二进制数据
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(os.path.join(data_folder,data_name), 'rb') as imgpath:
        x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
    return (x_train, y_train)


In [13]:
class DealDataset(Dataset):
    """
        读取数据、初始化数据
    """
    def __init__(self, folder, data_name, label_name,transform=None):
        (train_set, train_labels) = load_data(folder, data_name, label_name) # 其实也可以直接使用torch.load(),读取之后的结果为torch.Tensor形式
        self.train_set = train_set
        self.train_labels = train_labels
        self.transform = transform

    def __getitem__(self, index):

        img, target = self.train_set[index], int(self.train_labels[index])
        if self.transform is not None:
            img = self.transform(img)
        return img, target

    def __len__(self):
        return len(self.train_set)


In [14]:
mnist_train = DealDataset('./fashion-mnist-master/data/fashion/', "train-images-idx3-ubyte.gz","train-labels-idx1-ubyte.gz",transform=transforms.ToTensor())
mnist_test = DealDataset('./fashion-mnist-master/data/fashion/', "t10k-images-idx3-ubyte.gz","t10k-labels-idx1-ubyte.gz",transform=transforms.ToTensor())

In [16]:
batch_size = 256#批量读取大小为256
if sys.platform.startswith('win'):
    num_workers = 0  # 0表示不用额外的进程来加速读取数据
else:
    num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)


已知每个样本输入是高和宽均为28像素的图像。模型的输入向量的长度是 28×28=78428×28=784：该向量的每个元素对应图像中每个像素。由于图像有10个类别，单层神经网络输出层的输出个数为10，因此softmax回归的权重和偏差参数分别为784×10784×10和1×101×10的矩阵。

In [20]:
num_inputs = 784
num_outputs=10
W = torch.tensor(np.random.normal(0,0.01,
        (num_inputs,num_inputs,num_outputs))
                        ,dtype=torch.float)
b=torch.zeros(num_outputs,dtype=torch.float)

In [21]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 


tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [23]:
X = torch.tensor([[1, 2, 3], [4, 5, 6]])
X

tensor([[1, 2, 3],
        [4, 5, 6]])

In [27]:
X.sum(dim=0,keepdim=True)


tensor([[5, 7, 9]])

In [28]:
X.sum(dim=1,keepdim=True)

tensor([[ 6],
        [15]])

In [34]:
def softmax(O):
    O_exp=O.exp()
    partition=O_exp.sum(dim=1,keepdim=True)
    return O_exp/partition

可以看到，对于随机输入，我们将每个元素变成了非负数，且每一行和为1。

In [43]:
#test检验softmax
X = torch.rand((2, 5))
X_prob = softmax(X)
print(X_prob, X_prob.sum(dim=1))

tensor([[0.2200, 0.2828, 0.1557, 0.1903, 0.1512],
        [0.1210, 0.1318, 0.3059, 0.1416, 0.2997]]) tensor([1., 1.])


## softmax

$O=x^TW+b$

In [52]:
def net(X):
    return softmax(torch.mm(X.view(-1,num_inputs),W)+b)

## 定义损失函数

gather函数的总用相当于，得出对应类别的对应概率，比如y=0类别，在[0.1,0.3,0.6]当中对应概率为0.1

In [62]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))


tensor([[0.1000],
        [0.5000]])

In [53]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

![image.png](attachment:image.png)

In [54]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()


In [60]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter,net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n


In [63]:
print(evaluate_accuracy(test_iter,net))

RuntimeError: matrices expected, got 2D, 3D tensors at C:\w\1\s\tmp_conda_3.7_105232\conda\conda-bld\pytorch_1579085620499\work\aten\src\TH/generic/THTensorMath.cpp:131

In [65]:
num_epochs, lr = 5, 0.1#迭代周期数和学习率

# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到


            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)


NameError: name 'cross_entropy' is not defined

In [66]:
X, y = iter(test_iter).next()

true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])


RuntimeError: matrices expected, got 2D, 3D tensors at C:\w\1\s\tmp_conda_3.7_105232\conda\conda-bld\pytorch_1579085620499\work\aten\src\TH/generic/THTensorMath.cpp:131