In [None]:
# AlexNet（２０１２），Alex Krizhevsky 
# 贡献：首个深度神经网络（８层）．它是浅层神经网络和深度神经网络的分界线。
#      首次证明了学习到的特征可以超越手工设计的特征．

In [None]:
# 早期图像分类任务的主要流程是：
# 1. 获取图像数据集
# ２．使用已有的　特征提取函数　生成图像的特征
# 3. 使用机器学习模型对图像分类

# 其中第三步是早期ＭＬ里面仅限的一步．当前ＣＶ研究者则认为，
# ＣＶ中真正重要的是数据和特征．或者说，较干净的数据集和较有效的特征
# 比机器学习模型的选择对分类的结果影响更大．

![AlexNet](./5.6_alexnet.png)

In [None]:
### AlNet网络结构
# 与LeNet的不同之处：
# 1.使用了较大的卷积窗口11x11（这是因为两者的训练数据集图像尺寸不一样），后边使用较小尺寸的kernel
# 2. 将sigmoid替换为简单的relu．一方面，sigmoid在input值较大/过小时，梯度更新值常常接近零，这不利于参数的训练．
#   其二，sigmoid涉及幂运算，计算量较大．
# 3.在fc-fc-fc的前两层使用了dropout防止过拟合
# 4.使用了丰富的数据增广方式来增加数据量，防止过拟合．

In [1]:
import time
import torch
from torch import nn, optim
import torchvision
import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l 

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            # layer 1
            nn.Conv2d(1, 96, 11, 4), # in, out, k_s, stride, pad
            nn.ReLU(),
            nn.MaxPool2d(3, 2),  # kernel_size, stride
            
            # layer 2
            nn.Conv2d(96, 256, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            
            # layer 3            
            nn.Conv2d(256, 384, 3, 1, 1),
            nn.ReLU(),
            
            # layer 4
            nn.Conv2d(384, 384, 3, 1, 1),
            nn.ReLU(),
            
            # layer 5
            nn.Conv2d(384, 256, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
        )
        
        self.fc = nn.Sequential(
            
            # layer 6
            nn.Linear(256*5*5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            # layer 7
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            # layer 8
            nn.Linear(4096, 10),
        )
        
    def forward(self, img):
        feature = self.conv(img)
#         print(feature.shape)      # 128x256x5x5, 所以layer6输入是256x5x5,不用考虑batchsize
#         d2l.FlattenLayer(feature)
        output = self.fc(feature.view(img.shape[0], -1)) # batch
#         print(output.shape)
        return output
        

In [2]:
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (

In [None]:
### 读取数据（此处加入了对数据进行增广的操作－resize）

In [3]:
def load_data_fashion_mnist(batch_size, resize=None, root='../Datasets/FashionMINST/'):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(resize)) # 变换１
    trans.append(torchvision.transforms.ToTensor())         # 变换２
    
    transform = torchvision.transforms.Compose(trans) # 组合１/2等多种变换
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=False, transform=transform)
    mnist_test  = torchvision.datasets.FashionMNIST(root=root, train=True, download=False, transform=transform)
    
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter  = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)
    
    return train_iter, test_iter


In [None]:
### 开始训练

In [4]:
# 做数据增广（把input尺寸由28x28放大到224x224）
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.6360, train_acc 0.760, test acc 0.847, time 62.4 sec
epoch 2, loss 0.3368, train_acc 0.876, test acc 0.898, time 62.7 sec
epoch 3, loss 0.2833, train_acc 0.895, test acc 0.907, time 63.0 sec
epoch 4, loss 0.2530, train_acc 0.907, test acc 0.921, time 63.5 sec
epoch 5, loss 0.2278, train_acc 0.915, test acc 0.920, time 63.4 sec
epoch 6, loss 0.2151, train_acc 0.920, test acc 0.932, time 63.4 sec
epoch 7, loss 0.1990, train_acc 0.926, test acc 0.933, time 63.3 sec
epoch 8, loss 0.1869, train_acc 0.930, test acc 0.938, time 63.4 sec
epoch 9, loss 0.1749, train_acc 0.935, test acc 0.942, time 63.6 sec
epoch 10, loss 0.1616, train_acc 0.939, test acc 0.953, time 63.2 sec


In [None]:
# 不做数据增广(失败了，因为输入尺寸太小，而且中间还做了三次maxpooling)
# batch_size = 128
# train_iter, test_iter = load_data_fashion_mnist(batch_size)

# lr, num_epochs = 0.001, 10
# optimizer = torch.optim.Adam(net.parameters(), lr=lr)
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)