In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
import time
import os
import copy
import csv

from PIL import Image

# 대화 모드
plt.ion()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

## 데이터셋

In [None]:
# 이미지 리사이즈 및 정규화
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
])

# dataset 및 dataloader
target_dir = "../input/state-farm-distracted-driver-detection/imgs/train"
dataset = torchvision.datasets.ImageFolder(root=target_dir,
                                           transform=transform)
class_names = dataset.classes

dataset_sizes = {}
dataset_sizes["train"] = int(0.95 * len(dataset))
dataset_sizes["val"] = len(dataset) - dataset_sizes["train"]

datasets = {}
datasets["train"], datasets["val"] = torch.utils.data.random_split(
    dataset, [dataset_sizes["train"], dataset_sizes["val"]])

dataloaders = {}
dataloaders["train"] = torch.utils.data.DataLoader(datasets["train"],
                                                   batch_size=128,
                                                   shuffle=True,
                                                   num_workers=8)
dataloaders["val"] = torch.utils.data.DataLoader(datasets["val"],
                                                   batch_size=4,
                                                   shuffle=True,
                                                   num_workers=2)

for x in ["train", "val"]:
    print("Loaded {} images under {}".format(dataset_sizes[x], x))
    
print("Classes: ")
print(class_names)

## Utils

In [None]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([.5, .5, .5])
    std = np.array([.5, .5, .5])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

def show_databatch(inputs, classes):
    out = torchvision.utils.make_grid(inputs)
    imshow(out, title=[class_names[x] for x in classes])

# 학습 데이터의 배치
inputs, classes = next(iter(dataloaders['val']))

# 배치로부터 격자 형태의 이미지
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval() # 평가 모드; batch norm, dropout, ... X
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'ground truth:{class_names[labels[j]]}, predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [None]:
def eval_model(model, criterion):
    since = time.time()
    avg_loss = 0
    avg_acc = 0
    loss_test = 0
    acc_test = 0
    
    for i, data in enumerate(dataloaders["val"]):
        model.eval()
        inputs, labels = data[0].to(device), data[1].to(device)

        outputs = model(inputs)

        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)

        loss_test += loss.item()
        acc_test += torch.sum(preds == labels.data)
        
    avg_loss = loss_test / dataset_sizes["val"]
    avg_acc = acc_test.double() / dataset_sizes["val"]
    
    elapsed_time = time.time() - since
    print()
    print("Evaluation completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Avg loss (test): {:.4f}".format(avg_loss))
    print("Avg acc (test): {:.4f}".format(avg_acc))

## 모델생성

In [None]:
class VGG11(nn.Module):
    def __init__(
        self,
        num_classes: int = 1000,
    ) -> None:
        super(VGG11, self).__init__()
        """
        특징 추출 레이어
        """
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        """
        Conv + Fully Conn 구성 대비,
        Conv + AdaptiveAvgPooling + Fully Connected 구성이 다음과 같은 장점을 가짐:
        1. 다양한 입력 이미지 사이즈
        2. 학습 파라미터 수 감소
        """
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        """
        앞선 conv layer에서 특징을 추출했다면,
        추출된 특징들(512개)을 이용해 분류
        """
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
# 모델 생성 및 사전훈련된 가중치 불러오기
WEIGHTS_URL = "https://download.pytorch.org/models/vgg11-bbd30ac9.pth"
model = VGG11()
model.load_state_dict(torch.utils.model_zoo.load_url(WEIGHTS_URL, progress=True))
print(model.classifier[6].out_features) # 1000 


# 이전 레이어 학습 정지
for param in model.features.parameters():
    param.require_grad = False

# 새로운 레이어 추가
num_features = model.classifier[6].in_features
features = list(model.classifier.children())[:-1]
features.extend([nn.Linear(num_features, len(class_names))])
model.classifier = nn.Sequential(*features) # classifier 변경
print(model)

In [None]:
model.to(device)
    
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

## 훈련전 모델 평가 및 시각화

In [None]:
print("Test before training")
eval_model(model, criterion)
visualize_model(model)

## 훈련

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    avg_loss = 0
    avg_acc = 0
    avg_loss_val = 0
    avg_acc_val = 0
    
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs))
        print('-' * 10)
        
        # training
        loss_train = 0
        loss_val = 0
        acc_train = 0
        acc_val = 0
        
        model.train(True)
        
        # 데이터 반복
        for i, data in enumerate(dataloaders["train"]):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            # 파라미터 경사도 초기화
            optimizer.zero_grad()
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            # 역전파
            loss.backward()
            # 최적화
            optimizer.step()
            
            loss_train += loss.item()
            acc_train += torch.sum(preds == labels.data)
        
        avg_loss = loss_train / dataset_sizes["train"]
        avg_acc = acc_train.double() / dataset_sizes["train"]
        
        # validation
        model.train(False)
        model.eval()
            
        for i, data in enumerate(dataloaders["val"]):
            inputs, labels = data[0].to(device), data[1].to(device)

            outputs = model(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            loss_val += loss.item()
            acc_val += torch.sum(preds == labels.data)
        
        avg_loss_val = loss_val / dataset_sizes["val"]
        avg_acc_val = acc_val.double() / dataset_sizes["val"]
        
        print("Epoch {} result: ".format(epoch))
        print("Avg loss (train): {:.4f}".format(avg_loss))
        print("Avg acc (train): {:.4f}".format(avg_acc))
        print("Avg loss (val): {:.4f}".format(avg_loss_val))
        print("Avg acc (val): {:.4f}".format(avg_acc_val))
        print('-' * 10)
        print()
        
        if avg_acc_val > best_acc:
            best_acc = avg_acc_val
            best_model_wts = copy.deepcopy(model.state_dict())
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=3)
# 모델 저장
torch.save(model.state_dict(), 'model.pt')

## 모델 평가

In [None]:
eval_model(model, criterion)

In [None]:
visualize_model(model, num_images=8)

## 테스트

In [None]:
# 모델 준비
model = VGG11(10)
model.load_state_dict(torch.load("model.pt"))
model.to(device)
model.eval()

In [None]:
# test 이미지 경로
paths = []
target_dir = "../input/state-farm-distracted-driver-detection/imgs/test"
for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
    for fname in sorted(fnames):
        path = os.path.join(target_dir, fname)
        paths += [path]

In [None]:
predictions = []

# 이미지를 batch_size개씩 모아서 처리
batch_size = 16
for i in range(round((len(paths) / batch_size) + 0.5)):
    batch = paths[i*batch_size:(i+1)*batch_size]
    inputs = []
    for j, path in enumerate(batch):
        img = Image.open(path).convert('RGB')
        img = transform(img)
        inputs.append(img)

    inputs = torch.stack(inputs).to(device)

    # prediction
    preds = model(inputs)
    preds = F.softmax(preds, dim=1)
    predictions += [preds.detach().cpu().numpy()]
    del preds
    torch.cuda.empty_cache()

In [None]:
# write csv
with open('submission.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(["img","c0","c1","c2","c3","c4","c5","c6","c7","c8","c9"])
    for i, batch in enumerate(predictions):
        for j, pred in enumerate(batch):
            row = [os.path.basename(paths[i * batch_size + j])] + list(map(str, pred))
            writer.writerow(row)