In [1]:
import numpy as np
import os
from PIL import Image
import csv

import re
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import time
import matplotlib.pyplot as plt
import torch.utils.data as data
import json
from torchvision.datasets import ImageFolder
from pathlib import Path
from sklearn.model_selection import train_test_split

##### Efficient Net V1
from efficientnet_pytorch import EfficientNet

try:
    from tqdm.notebook import tqdm
except ImportError:
    print('tqdm could not be imported. If you want to use progress bar during training,'
          'install tqdm from https://github.com/tqdm/tqdm.')

# File Path

In [2]:
train_img_path = './train_images'
test_img_path = './test_images'
train_label_file = 'train.csv'
test_label_file = 'test.csv'

# Read list

In [3]:
train_img = []
train_label = []

In [4]:
# img list
for file in os.listdir(train_img_path):
    train_img.append(train_img_path + '/' + file)

# read label
with open(train_label_file, newline = '') as csvfile:
    rows = csv.reader(csvfile)
    for i, row in enumerate(rows):
        if i: train_label.append(int(row[1]))
train_label = np.array(train_label)

# Custom Dataset

In [5]:
class AOIDataset(Dataset):
    def __init__(self, img_list, label_list = None, transform = None):
        self.img_list = img_list
        self.label = label_list
        self.transform = transform

    def __getitem__(self, index):
        # Read img
        img = Image.open(self.img_list[index])
        if self.transform:
            img = self.transform(img)
        if len(self.label):
            return img, self.label[index]
        return img

    def __len__(self):
        return len(self.img_list)

# 超參數

In [6]:
split_rate = 0.5
BATCH_SIZE = 24
num_workers = 0
Epoch = 100
lr = 1e-4
img_size = 224
patience = 8 # 耐心程度

# dataloader

In [7]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

valid_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(train_img, train_label, test_size=1-split_rate, random_state=42)

train_dataset = AOIDataset(img_list = X_train, label_list = y_train,
                    transform = transform)

valid_dataset = AOIDataset(img_list = X_test, label_list = y_test,
                    transform = valid_transform )

train_set_size = len(train_dataset)
valid_set_size = len(valid_dataset)

In [9]:
train_dataloader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=num_workers)

valid_dataloader = DataLoader(
    valid_dataset, batch_size=64, pin_memory=True, num_workers=num_workers)

# Training

In [10]:
device = torch.device('cuda')
torch.backends.cudnn.benchmark = True

In [11]:
# Efficient Net V1 B0
# model = EfficientNet.from_pretrained(
#             'efficientnet-b0', in_channels=1, num_classes=6)
model = torchvision.models.vgg16(pretrained = True)

# model = torch.nn.Sequential(
#       nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#       nn.ReLU(inplace=True),
#       nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#       nn.ReLU(inplace=True),
#       nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
#       nn.ReLU(),
#       nn.Flatten(),
#       nn.Linear(in_features=262144, out_features=4096, bias=True),
#       nn.ReLU(inplace=True),
#       nn.Dropout(p=0.5, inplace=False),
#       nn.Linear(in_features=4096, out_features=4096, bias=True),
#       nn.ReLU(inplace=True),
#       nn.Dropout(p=0.5, inplace=False),
#       nn.Linear(in_features=4096, out_features=6, bias=True),
#     )
# for param in model.parameters():
#     param.requires_grad = False


model.features = torch.nn.Sequential(
    torch.nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
)

model.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=25088, out_features=4096, bias=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.Dropout(p=0.5, inplace=False),
    torch.nn.Linear(in_features=4096, out_features=4096, bias=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.Dropout(p=0.5, inplace=False),
    torch.nn.Linear(in_features=4096, out_features=6, bias=True),
  )

model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [12]:
loss = nn.CrossEntropyLoss().to(device)
optimizer = optim.AdamW(model.parameters(), lr = lr)
# optimizer = optim.SGD(model.parameters(),
#                     lr=lr,
#                     weight_decay=1e-5,
#                     momentum=0.9,
#                     nesterov=True)



In [13]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max',factor=0.1, patience=patience, verbose=True, threshold=1e-4, min_lr=1e-7)

In [14]:
result_param = {'training_loss': [], 'training_accuracy': [],
                    'validation_loss': [], 'validation_accuracy': []}

for epoch in range(Epoch):

    since = time.time()
    running_training_loss = 0
    running_training_correct = 0
    running_valid_loss = 0
    running_valid_correct = 0
    model.train()
    train_bar = tqdm(train_dataloader)
    for imgs, label in train_bar:
        imgs = imgs.to(device)
        label = label.to(device, dtype=torch.long)

        optimizer.zero_grad()
        out = model(imgs)
        loss_val = loss(out, label)
        _, pred_class = torch.max(out.data, 1)

        running_training_correct += torch.sum(pred_class == label)
        running_training_loss += loss_val

        loss_val.backward()
        optimizer.step()
        train_bar.set_description(desc='[%d/%d] | Train Loss:%.4f' %
                                        (epoch + 1, Epoch, loss_val.item()))

    with torch.no_grad():
            model.eval()
            val_bar = tqdm(valid_dataloader)
            for imgs, label in val_bar:
                imgs = imgs.to(device)
                label = label.to(device, dtype=torch.long)

                out = model(imgs)
                loss_val = loss(out, label)

                val_bar.set_description(desc='[%d/%d] | Validation Loss:%.4f' % (epoch + 1, Epoch, loss_val.item()))
                _, pred_class = torch.max(out.data, 1)
                running_valid_correct += torch.sum(pred_class == label)
                running_valid_loss += loss_val
    
    valid_acc = running_valid_correct.item() /  valid_set_size

    result_param['training_loss'].append(
            running_training_loss.item() / train_set_size)
    result_param['training_accuracy'].append(running_training_correct.item() /
                                                 train_set_size)
    result_param['validation_loss'].append(
            running_valid_loss.item() / valid_set_size)
    result_param['validation_accuracy'].append(valid_acc)

    scheduler.step(valid_acc)

    print(
        "Epoch:{} Train Loss:{:.4f},  Train Accuracy:{:.4f},  Validation Loss:{:.4f},  Validation Accuracy:{:.4f}".format(
                epoch + 1, result_param['training_loss'][-1], result_param['training_accuracy'][-1],
                result_param['validation_loss'][-1], result_param['validation_accuracy'][-1]))

    now_time = time.time() - since
    print("Training time is:{:.0f}m {:.0f}s".format(
        now_time // 60, now_time % 60))

    if valid_acc > 0.995:
        print('Validation Acc is above 99.5!!')
        break

    # torch.save(model.state_dict(), str(
    #     './checkpoints/' + METHOD + '/' + "EPOCH_" + str(epoch) + ".pkl"))
    # out_file = open(str(
    #     './checkpoints/' + METHOD + '/' + 'result_param.json'), "w+")
    # json.dump(result_param, out_file, indent=4)


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:1 Train Loss:0.0707,  Train Accuracy:0.2524,  Validation Loss:0.0264,  Validation Accuracy:0.2650
Training time is:0m 58s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:2 Train Loss:0.0702,  Train Accuracy:0.2571,  Validation Loss:0.0264,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:3 Train Loss:0.0697,  Train Accuracy:0.2650,  Validation Loss:0.0264,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:4 Train Loss:0.0698,  Train Accuracy:0.2666,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:5 Train Loss:0.0698,  Train Accuracy:0.2381,  Validation Loss:0.0262,  Validation Accuracy:0.2722
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:6 Train Loss:0.0698,  Train Accuracy:0.2492,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:7 Train Loss:0.0697,  Train Accuracy:0.2642,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:8 Train Loss:0.0698,  Train Accuracy:0.2714,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:9 Train Loss:0.0698,  Train Accuracy:0.2532,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:10 Train Loss:0.0697,  Train Accuracy:0.2508,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:11 Train Loss:0.0700,  Train Accuracy:0.2587,  Validation Loss:0.0262,  Validation Accuracy:0.2722
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:12 Train Loss:0.0696,  Train Accuracy:0.2658,  Validation Loss:0.0263,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch:13 Train Loss:0.0697,  Train Accuracy:0.2682,  Validation Loss:0.0262,  Validation Accuracy:0.2650
Training time is:0m 16s


  0%|          | 0/53 [00:00<?, ?it/s]

# Draw Result

In [None]:
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
y_true, y_pred = [], []
with torch.no_grad():
    model.eval()

    val_bar = tqdm(valid_dataset)
    for imgs, label in val_bar:
        imgs = imgs.unsqueeze(0).to(device)
        y_true.append(int(label))

        label = label.to(device, dtype=torch.long)
        out = model(imgs)
        _, pred_class = torch.max(out.data, 1)

        val_bar.set_description(desc='[%d/%d]' % (i, len(valid_dataset) ) )
        
        y_pred.append(int(pred_class))

In [None]:
plt.figure()
cnf_matrix = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cnf_matrix, classes=target_names,normalize=False,
                    title='CNN confusion matrix')

plt.show()

# Testing

In [15]:
test_img = []
# img list
for file in os.listdir(test_img_path):
    test_img.append(test_img_path + '/' + file)

In [16]:

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

test_data = AOIDataset(img_list = test_img, label_list = [],
                    transform = transform)

In [17]:
import pandas as pd

In [18]:
submission = pd.read_csv(test_label_file)

In [19]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [20]:
with torch.no_grad():
    test_bar = tqdm(test_data)
    for i, imgs in enumerate(test_bar):
        imgs = imgs.unsqueeze(0).to(device)
        out = model(imgs)
        _, pred_class = torch.max(out.data, 1)
        submission.loc[i, 'Label'] = str(int(pred_class))
        test_bar.set_description(desc='[%d/%d]' % (i, len(test_data) ) )

  0%|          | 0/10142 [00:00<?, ?it/s]

In [21]:
submission.describe()

Unnamed: 0,ID,Label
count,10142,10142
unique,10142,6
top,test_00360.png,0
freq,1,2717


In [22]:
submission.to_csv('submissionVGG16_512sp.csv', index=False)