In [1]:
import sys
sys.path.append("..")
import torch
from torch import nn
from torchvision import transforms
from dataloaders.FashionMnist import load_FashionMnist
from tools.model_trainer import train_model
from tools.model_tester import test_model
from torchsummary import summary
from easydict import EasyDict as edict

# 1 加载数据集

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
cfg = edict({
    'device':device,
    'batch_size': 64,
    'num_classes': 10,  # 分类类别
    'lr': 0.01,  # 学习率
    'epoch_size': 20  # 训练次数
})

transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.1307,), (0.3081,))
])
train_iter, _, test_iter = load_FashionMnist(batch_size=cfg.batch_size, transform=transform)
print(f"train size: {len(train_iter.dataset)}")
# print(f"valid size: {len(valid_iter.dataset)}")
print(f"test size: {len(test_iter.dataset)}")
for X, y in test_iter:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

train size: 60000
test size: 10000
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# 2 不使用正则化的卷积神经网络

In [6]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr)
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_without_normalization') 

training on cuda
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train result
loss 0.2835
final train Accuracy 0.8990
final valid Accuracy 0.8810
4411.0887 examples/sec on cuda
13.5948 sec/epoch on cuda


# 3 对比正则化技术

## 3.1 使用L1正则化

In [3]:
from tools.model_trainer import train_batch_L1

net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr, weight_decay=0.001) # L2正则化
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, train_func=train_batch_L1, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_L1_normalization') 

training on cuda
Epoch 1/20
0/937 [               ]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train result
loss 0.7215
final train Accuracy 0.8357
final valid Accuracy 0.8274
3259.2972 examples/sec on cuda
18.3991 sec/epoch on cuda


## 3.2 使用L2正则化

In [4]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr, weight_decay=0.001) # L2正则化
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_L2_normalization') 

training on cuda
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train result
loss 0.4722
final train Accuracy 0.8289
final valid Accuracy 0.8303
4986.0428 examples/sec on cuda
12.0272 sec/epoch on cuda


## 3.3 使用Dropout正则化

In [5]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_Dropout') 

training on cuda
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train result
loss 0.6312
final train Accuracy 0.7947
final valid Accuracy 0.8350
5365.2348 examples/sec on cuda
11.1771 sec/epoch on cuda


## 3.4 使用BN正则化

In [None]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.BatchNorm2d(6), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.BatchNorm2d(6), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_BN') 

## 3.5 使用Layer Normalization正则化

In [7]:
summary(net, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
              ReLU-2            [-1, 6, 28, 28]               0
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              ReLU-5           [-1, 16, 10, 10]               0
         MaxPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
              ReLU-9                  [-1, 120]               0
           Linear-10                   [-1, 84]          10,164
             ReLU-11                   [-1, 84]               0
           Linear-12                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
---------------------------------

In [8]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.LayerNorm((6, 28, 28)), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.LayerNorm((16, 10, 10)), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_LN') 

training on cuda
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train result
loss 0.4954
final train Accuracy 0.8290
final valid Accuracy 0.8394
3110.6675 examples/sec on cuda
19.2782 sec/epoch on cuda


## 3.6 SELU

In [9]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.SELU(),
    nn.Linear(120, 84), nn.SELU(),
    nn.Linear(84, cfg.num_classes)
)

# 默认是Kaiming初始化

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr)
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_SELU') 

training on cuda
Epoch 1/20
107/937 [=              ]

KeyboardInterrupt: 