In [None]:
# AlexNet（２０１２），Alex Krizhevsky 
# 贡献：首个深度神经网络（８层）．它是浅层神经网络和深度神经网络的分界线。
#       首次证明了学习到的特征可以超越手工设计的特征．

In [None]:
# 早期图像分类任务的主要流程是：
# 1. 获取图像数据集
# ２．使用已有的　特征提取函数　生成图像的特征
# 3. 使用机器学习模型对图像分类

# 其中第三步是早期ＭＬ里面仅限的一步．当前ＣＶ研究者则认为，
# ＣＶ中真正重要的是数据和特征．或者说，较干净的数据集和较有效的特征
# 比机器学习模型的选择对分类的结果影响更大．

![AlexNet](./5.6_alexnet.png)

In [None]:
### AlNet网络结构
# 与LeNet的不同之处：
# 1.使用了较大的卷积窗口11x11（这是因为两者的训练数据集图像尺寸不一样），后边使用较小尺寸的kernel
# 2. 将sigmoid替换为简单的relu．一方面，sigmoid在input值较大/过小时，梯度更新值常常接近零，这不利于参数的训练．
#   其二，sigmoid涉及幂运算，计算量较大．
# 3.在fc-fc-fc的前两层使用了dropout防止过拟合
# 4.使用了丰富的数据增广方式来增加数据量，防止过拟合．

In [None]:
import time
import torch
from torch import nn, optim
import torchvision
import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l 


class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            # layer 1
            nn.Conv2d(1, 96, 11, 4), # in, out, k_s, stride, pad
            nn.ReLU(),
            nn.MaxPool2d(3, 2),  # kernel_size, stride
            
            # layer 2
            nn.Conv2d(96, 256, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            
            # layer 3            
            nn.Conv2d(256, 384, 3, 1, 1),
            nn.ReLU(),
            
            # layer 4
            nn.Conv2d(384, 384, 3, 1, 1),
            nn.ReLU(),
            
            # layer 5
            nn.Conv2d(384, 256, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
        )
        
        self.fc = nn.Sequential(
            
            # layer 6
            nn.Linear(256*5*5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            # layer 7
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            # layer 8
            nn.Linear(4096, 10),
        )
        
    def forward(self, img):
        feature = self.conv(img)
#         print(feature.shape)      # 128x256x5x5, 所以layer6输入是256x5x5,不用考虑batchsize
#         d2l.FlattenLayer(feature)
        output = self.fc(feature.view(img.shape[0], -1)) # batch
#         print(output.shape)
        return output
        

In [None]:
net = AlexNet()
print(net)

In [None]:
### 读取数据（此处加入了对数据进行增广的操作－resize）

In [None]:
def load_data_fashion_mnist(batch_size, resize=None, root='../Datasets/FashionMINST/'):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(resize)) # 变换１
    trans.append(torchvision.transforms.ToTensor())         # 变换２
    
    transform = torchvision.transforms.Compose(trans) # 组合１/2等多种变换
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=False, transform=transform)
    mnist_test  = torchvision.datasets.FashionMNIST(root=root, train=True, download=False, transform=transform)
    
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter  = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)
    
    return train_iter, test_iter


In [None]:
### 开始训练

In [None]:
# 做数据增广（把input尺寸由28x28放大到224x224）
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

In [None]:
# 不做数据增广(失败了，因为输入尺寸太小，而且中间还做了三次maxpooling)
# batch_size = 128
# train_iter, test_iter = load_data_fashion_mnist(batch_size)

# lr, num_epochs = 0.001, 10
# optimizer = torch.optim.Adam(net.parameters(), lr=lr)
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)