In [15]:
import moxing as mox

mox.file.copy_parallel("runs","obs://lxh-bigdata/runs") 

In [1]:
%pip install torchsummary
%pip install easydict
%pip install ipywidgets

Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
Collecting torchsummary
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/7d/18/1474d06f721b86e6a9b9d7392ad68bed711a02f3b61ac43f13c719db50a6/torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
You should consider upgrading via the '/home/ma-user/anaconda3/envs/PyTorch-1.8/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
You should consider upgrading via the '/home/ma-user/anaconda3/envs/PyTorch-1.8/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
Collecting ipywidgets
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages

In [1]:
import sys
sys.path.append("..")
import torch
from torch import nn
from torchvision import transforms
from dataloaders.FashionMnist import load_FashionMnist
from tools.model_trainer import train_model
from tools.model_tester import test_model
from torchsummary import summary
from easydict import EasyDict as edict

# 0 超参数设置

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
cfg = edict({
    'device':device,
    'batch_size': 64,
    'num_classes': 10,  # 分类类别
    'lr': 0.001,  # 学习率
    'epoch_size': 30  # 训练次数
})

# 1 加载数据集

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.1307,), (0.3081,))
])
train_iter, _, test_iter = load_FashionMnist(batch_size=cfg.batch_size, transform=transform)
print(f"train size: {len(train_iter.dataset)}")
# print(f"valid size: {len(valid_iter.dataset)}")
print(f"test size: {len(test_iter.dataset)}")
for X, y in test_iter:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

train size: 60000
test size: 10000
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


# 2 不使用正则化的卷积神经网络

In [4]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr)
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_without_normalization') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.0177
final train Accuracy 0.9940
final valid Accuracy 0.9116
24055.9773 examples/sec on cuda
2.4929 sec/epoch on cuda


# 3 对比正则化技术

## 3.1 使用L1正则化

### alpha=1e-5

In [5]:
from tools.model_trainer import train_batch_L1

net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
def train_func(*args, **kwargs):
    return train_batch_L1(*args, **kwargs, lamda=1e-5)

train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, train_func=train_func, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_L1_normalization') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.1468
final train Accuracy 0.9854
final valid Accuracy 0.9153
18468.6631 examples/sec on cuda
3.2470 sec/epoch on cuda


### alpha=1e-4

In [13]:
from tools.model_trainer import train_batch_L1

net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
def train_func(*args, **kwargs):
    return train_batch_L1(*args, **kwargs, lamda=1e-4)

train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, train_func=train_func, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_L1_normalization_1e-4') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.3067
final train Accuracy 0.9303
final valid Accuracy 0.9053
18536.0503 examples/sec on cuda
3.2352 sec/epoch on cuda


## 3.2 使用L2正则化

In [6]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr, weight_decay=0.001) # L2正则化
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_L2_normalization') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.1531
final train Accuracy 0.9451
final valid Accuracy 0.9047
23666.4952 examples/sec on cuda
2.5339 sec/epoch on cuda


## 3.3 使用Dropout正则化

In [7]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_Dropout') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.0980
final train Accuracy 0.9660
final valid Accuracy 0.9217
23411.3607 examples/sec on cuda
2.5615 sec/epoch on cuda


## 3.4 使用BN正则化

In [8]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(32), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(128), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_BN') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.0118
final train Accuracy 0.9960
final valid Accuracy 0.9269
18241.0340 examples/sec on cuda
3.2875 sec/epoch on cuda


## 3.5 使用Layer Normalization正则化

In [5]:
summary(net, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             288
              ReLU-2           [-1, 32, 26, 26]               0
            Conv2d-3           [-1, 64, 24, 24]          18,432
              ReLU-4           [-1, 64, 24, 24]               0
         MaxPool2d-5           [-1, 64, 12, 12]               0
            Conv2d-6          [-1, 128, 10, 10]          73,728
              ReLU-7          [-1, 128, 10, 10]               0
         MaxPool2d-8            [-1, 128, 5, 5]               0
           Flatten-9                 [-1, 3200]               0
           Linear-10                  [-1, 120]         384,120
             ReLU-11                  [-1, 120]               0
           Linear-12                   [-1, 84]          10,164
             ReLU-13                   [-1, 84]               0
           Linear-14                   

In [9]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False),
    nn.LayerNorm((32, 26, 26)), nn.ReLU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False),
    nn.LayerNorm((64, 24, 24)), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False),
    nn.LayerNorm((128, 10, 10)), nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.ReLU(),
    nn.Linear(120, 84), nn.ReLU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr) 
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_LN') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.0205
final train Accuracy 0.9928
final valid Accuracy 0.9167
18832.6801 examples/sec on cuda
3.1843 sec/epoch on cuda


## 3.6 SELU & alpha dropout

In [10]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.SELU(),
    nn.Linear(120, 84), nn.SELU(),
    nn.Linear(84, cfg.num_classes)
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr)
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_SELU') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.0307
final train Accuracy 0.9911
final valid Accuracy 0.9144
24366.6471 examples/sec on cuda
2.4611 sec/epoch on cuda


In [11]:
net = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0, bias=False), nn.SELU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(128*5*5, 120), nn.SELU(),
    nn.AlphaDropout(0.5),
    nn.Linear(120, 84), nn.SELU(),
    nn.AlphaDropout(0.5),
    nn.Linear(84, cfg.num_classes)
)


loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=cfg.lr)
train_model(net, train_iter, test_iter, loss_fn, ['accuracy'], optimizer, num_epochs=cfg.epoch_size, device=device, use_tensorboard=True, print_log=True,comment='_DL_Exp2_with_SELU_alphaDropout') 

training on cuda
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train result
loss 0.1121
final train Accuracy 0.9621
final valid Accuracy 0.9188
22046.3982 examples/sec on cuda
2.7201 sec/epoch on cuda
