# BTVN: Training Neural Networks (Tiếp)
Trong phần này các bạn sẽ làm quen với kỹ thuật model ensemble để tăng độ chính xác khi suy diễn

In [1]:
!nvidia-smi



import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import glob
import cv2
import torch.nn.functional as F
from torch.autograd import Variable
import os

import torchvision
import torchvision.transforms as transforms

from torch.nn import CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
import matplotlib.pyplot as plt
from torchsummary import summary

zsh:1: command not found: nvidia-smi


Tải dữ liệu và cài đặt một kiến trúc mạng nơ-ron đơn giản theo mô tả phía dưới

In [18]:
def load_data(data_dir="./data"):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(
        root=data_dir, train=True, download=True, transform=transform)

    testset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False, download=True, transform=transform)

    return trainset, testset


class Net(nn.Module):
    ######################
    def __init__(self, l1= 132, l2 = 84,  out_size = 10):
        super(Net, self).__init__()
        self.net_CNN = nn.Sequential(nn.Conv2d(3, 6, kernel_size=5),
                                 nn.MaxPool2d(2, 2),
                                 nn.Conv2d(6, 16, 5),
                                 nn.MaxPool2d(2,2),
                                 )
        self.fc1 = nn.Linear(16*5*5, l1)
        self.fc2 = nn.Linear(l1, l2)    
        self.fc3 = nn.Linear(l2, out_size)
    
    def forward(self, x):
        x = self.net_CNN (x)

        x = x.reshape(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x_bar = self.fc3(x)

        return x_bar
    ######################

model = Net()
if torch.cuda.is_available():
    model.cuda()
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 132]          52,932
            Linear-6                   [-1, 84]          11,172
            Linear-7                   [-1, 10]             850
Total params: 67,826
Trainable params: 67,826
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.26
Estimated Total Size (MB): 0.33
----------------------------------------------------------------


Hàm đánh giá độ chính xác trên tập test

In [22]:
def test_accuracy(net, testloader, device="cpu"):
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            ######################
            outputs = net(images).to(device)
            _, predicted = torch.max(outputs.data, 1)
            total = labels.size(0)
            correct += (predicted == labels).sum().item()
            ######################

    return correct / total

Hàm huấn luyện mô hình

In [20]:
def train(net, criterion, optimizer, save_path, device="cpu"):
    T_cur = 0
    for epoch in range(1, epochs+1):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        T_cur += 1
        
        # warm-up
        if epoch <= warm_epoch:
            optimizer.param_groups[0]['lr'] = (1.0 * epoch) / warm_epoch  * init_lr
        else: 
            # cosine annealing lr
            optimizer.param_groups[0]['lr'] = last_lr + (init_lr - last_lr) * (1 + np.cos(T_cur * np.pi / T_max)) / 2

        for i, data in enumerate(train_iter, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i + 1 == len(train_iter):
                print("[Epoch %d] loss: %.3f" % (epoch, running_loss / epoch_steps))
                running_loss = 0.0
                
    print("Finished Training")
    print("Test accuracy:", test_accuracy(net, test_iter, device))
    torch.save(net.state_dict(), save_path)

Thiết lập các tham số và hai kiến trúc mạng khác nhau

In [13]:
epochs = 10
warm_epoch = 5
init_lr = 1e-2
last_lr = 1e-4
T_max = epochs

configs = [{'l1': 64, 'l2': 32}, {'l1': 128, 'l2': 64}]

trainset, testset = load_data('./data')
train_iter = torch.utils.data.DataLoader(
    trainset,
    batch_size=128,
    shuffle=True,
)
test_iter = torch.utils.data.DataLoader(
    testset, batch_size=4, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


Huấn luyện hai mạng mô tả trong configs

In [23]:
os.makedirs('./snapshot', exist_ok=True)

for i, cfg in enumerate(configs):
    print(cfg)
    net = Net(cfg['l1'], cfg['l2'])
    ######################
    device = 'cpu'
    if torch.cuda.is_available():
        device = 'cuda:0'
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr = init_lr)

    save_path = f'./snapshot/model{i}.pth'
    train(net, criterion, optimizer, save_path, device)
    ######################

{'l1': 64, 'l2': 32}
[Epoch 1] loss: 1.609
[Epoch 2] loss: 1.393
[Epoch 3] loss: 1.348
[Epoch 4] loss: 1.336
[Epoch 5] loss: 1.342
[Epoch 6] loss: 1.150
[Epoch 7] loss: 1.092
[Epoch 8] loss: 1.047
[Epoch 9] loss: 1.019
[Epoch 10] loss: 1.006
Finished Training
Test accuracy: 1552.5
{'l1': 128, 'l2': 64}
[Epoch 1] loss: 1.583
[Epoch 2] loss: 1.414
[Epoch 3] loss: 1.394
[Epoch 4] loss: 1.402
[Epoch 5] loss: 1.416
[Epoch 6] loss: 1.194
[Epoch 7] loss: 1.122
[Epoch 8] loss: 1.073
[Epoch 9] loss: 1.040
[Epoch 10] loss: 1.027
Finished Training
Test accuracy: 1554.25


Kết hợp kết quả hai mạng (ensemble)

In [None]:
from tqdm import tqdm

def test_ensemble(device="cpu"):
    correct = 0
    total = 0

    # Load tất cả model vào bộ nhớ trước
    nets = []
    for i, cfg in enumerate(configs):
        net = Net(cfg['l1'], cfg['l2'])
        net.to(device)
        net.load_state_dict(torch.load(f'./snapshot/model{i}.pth'))
        net.eval()
        nets.append(net)

    with torch.no_grad():
        for data in tqdm(test_iter):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            final_outputs = torch.zeros((images.size(0), 10)).to(device)
            for net in nets:
                outputs = net(images)
                final_outputs += outputs
            final_outputs /= len(nets)
            _, predicted = torch.max(final_outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [27]:
test_ensemble()

AssertionError: Torch not compiled with CUDA enabled