In [11]:
import torch
import numpy as np
import sys
import torchvision.transforms as transforms
sys.path.append("..")
import d2lzh_pytorch as d2l
import torchvision 
from torch.utils.data.dataset import Dataset
import os
import d2lzh_pytorch as d2l
import torch
import sys
import gzip
import numpy as np

In [12]:
"""
    data_folder: 文件目录
    data_name： 数据文件名
    label_name：标签数据文件名
"""

def load_data(data_folder, data_name, label_name):
    with gzip.open(os.path.join(data_folder,label_name), 'rb') as lbpath: # rb表示的是读取二进制数据
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(os.path.join(data_folder,data_name), 'rb') as imgpath:
        x_train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
    return (x_train, y_train)


In [13]:
class DealDataset(Dataset):
    """
        读取数据、初始化数据
    """
    def __init__(self, folder, data_name, label_name,transform=None):
        (train_set, train_labels) = load_data(folder, data_name, label_name) # 其实也可以直接使用torch.load(),读取之后的结果为torch.Tensor形式
        self.train_set = train_set
        self.train_labels = train_labels
        self.transform = transform

    def __getitem__(self, index):

        img, target = self.train_set[index], int(self.train_labels[index])
        if self.transform is not None:
            img = self.transform(img)
        return img, target

    def __len__(self):
        return len(self.train_set)


In [26]:
mnist_train = DealDataset('./fashion-mnist-master/data/fashion/', "train-images-idx3-ubyte.gz","train-labels-idx1-ubyte.gz",transform=transforms.ToTensor())
mnist_test = DealDataset('./fashion-mnist-master/data/fashion/', "t10k-images-idx3-ubyte.gz","t10k-labels-idx1-ubyte.gz",transform=transforms.ToTensor())

In [27]:
batch_size = 256
if sys.platform.startswith('win'):
    num_workers = 0  # 0表示不用额外的进程来加速读取数据
else:
    num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)


In [28]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
##因此，输入个数为784，输出个数为10。实验中，我们设超参数隐藏单元个数为256。
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)


## 定义激活函数

In [29]:
def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))


## 定义模型

同softmax回归一样，我们通过view函数将每张原始图像改成长度为num_inputs的向量。

In [30]:
def net(X):
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, W1) + b1)
    return torch.matmul(H, W2) + b2


## 定义损失函数

In [31]:
loss = torch.nn.CrossEntropyLoss()


## 训练模型

![image.png](attachment:image.png)

In [32]:
num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)


epoch 1, loss 0.0030, train acc 0.717, test acc 0.713
epoch 2, loss 0.0019, train acc 0.822, test acc 0.828
epoch 3, loss 0.0017, train acc 0.845, test acc 0.846
epoch 4, loss 0.0015, train acc 0.856, test acc 0.850
epoch 5, loss 0.0015, train acc 0.864, test acc 0.860
