In [1]:
## Upload Data ##
from google.colab import output
!unzip /content/drive/MyDrive/computer_vision_comp/dataset/dirty_mnist_2nd.zip -d "./dirty_mnist_2nd/"
!unzip /content/drive/MyDrive/computer_vision_comp/dataset/test_dirty_mnist_2nd.zip -d "./test_dirty_mnist_2nd/"
output.clear()

In [2]:
## Import Library ##
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
import imutils
import zipfile
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import KFold

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
print(device)

cuda:0


In [3]:
## Seed ##
import random
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

seed_everything()

In [4]:
## Load Data ##
dirty_mnist_answer = pd.read_csv("/content/drive/MyDrive/computer_vision_comp/dataset/dirty_mnist_2nd_answer.csv")
namelist = os.listdir('./dirty_mnist_2nd/')

In [5]:
## Transform ##
class ToTensor(object):
    """numpy array를 tensor(torch)로 변환합니다."""
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.FloatTensor(image),
                'label': torch.FloatTensor(label)}

to_tensor = T.Compose([
                        ToTensor()
                    ])

augmentation = T.Compose([
                   T.RandomHorizontalFlip(), 
                   T.RandomRotation(degrees=(0, 180))])

In [6]:
## Dataset ##
class DatasetMNIST(torch.utils.data.Dataset):
    def __init__(self,
                 dir_path,
                 meta_df,
                 transforms=to_tensor,
                 augmentations=augmentation):
        
        self.dir_path = dir_path 
        self.meta_df = meta_df 

        self.transforms = transforms# Transform
        self.augmentations = augmentations # Augmentation
        
    def __len__(self):
        return len(self.meta_df)
    
    def __getitem__(self, index):
        image = Image.open(self.dir_path +str(self.meta_df.iloc[index,0]).zfill(5) + '.png').convert("L") # open image using PIL (grayscale)
        image = self.augmentations(image) # augmentation 
        image = np.array(image) # image to numpy array
        image = (image/255).astype('float')[..., np.newaxis] # normalization

        label = self.meta_df.iloc[index, 1:].values.astype('float')
        sample = {'image': image, 'label': label}

        if self.transforms:
            sample = self.transforms(sample) # numpy to tensor

        return sample

In [7]:
## Model ##
class MultiLabelModel(nn.Module):
    def __init__(self):
        super(MultiLabelModel, self).__init__()
        self.conv2d = nn.Conv2d(1, 3, 3, stride=1)
        self.model = models.mobilenet_v3_small(pretrained=True) # pretrained model
        self.FC = nn.Linear(1000, 26)
        self.norm1 = nn.BatchNorm2d(3)
        self.norm2 = nn.BatchNorm2d(1000)
        
    def forward(self, x):
        x = F.relu(self.norm1(self.conv2d(x))) # grayscale(channel=1) > channel 3개로 변환
        x = F.relu(self.model(x)) # Model
        x = torch.sigmoid(self.FC(x)) # Classifier
        return x

In [9]:
## Training ##
kfold = KFold(n_splits=4, shuffle=True, random_state=0)

best_models = [] 
for fold_index, (trn_idx, val_idx) in enumerate(kfold.split(dirty_mnist_answer),1):
    print(f'[fold: {fold_index}]')
    torch.cuda.empty_cache()

    train_answer = dirty_mnist_answer.iloc[trn_idx]
    test_answer  = dirty_mnist_answer.iloc[val_idx]

    # Dataset
    train_dataset = DatasetMNIST(dir_path = "dirty_mnist_2nd/", meta_df = train_answer)
    valid_dataset = DatasetMNIST(dir_path = "dirty_mnist_2nd/", meta_df = test_answer)
    
    # DataLoader 
    train_data_loader = DataLoader(
        train_dataset,
        batch_size = 64,
        shuffle = False,
        num_workers = 2
    )
    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size = 32,
        shuffle = False,
        num_workers = 2
    )

    model = MultiLabelModel()
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = 0.01)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size = 5,
                                                gamma = 0.75)
    criterion = torch.nn.BCELoss()

    valid_acc_max = 0
    for epoch in range(7):
        train_acc_list = []
        with tqdm(train_data_loader,
                total=train_data_loader.__len__(), 
                unit="batch") as train_bar:
            for sample in train_bar:
                train_bar.set_description(f"Train Epoch {epoch}")
                
                optimizer.zero_grad()
                images, labels = sample['image'], sample['label']
                images = images.to(device)
                labels = labels.to(device)
                model.train()

                with torch.set_grad_enabled(True):
                    probs  = model(images)
                    loss = criterion(probs, labels)
                    loss.backward()
                    optimizer.step()

                    probs  = probs.cpu().detach().numpy()
                    labels = labels.cpu().detach().numpy()
                    preds = probs > 0.5
                    batch_acc = (labels == preds).mean()
                    train_acc_list.append(batch_acc)
                    train_acc = np.mean(train_acc_list)

                train_bar.set_postfix(train_loss= loss.item(),
                                      train_acc = train_acc)
        # Validation
        valid_acc_list = []
        with tqdm(valid_data_loader,
                total=valid_data_loader.__len__(),
                unit="batch") as valid_bar:
            for sample in valid_bar:
                valid_bar.set_description(f"Valid Epoch {epoch}")
                optimizer.zero_grad()
                images, labels = sample['image'], sample['label']
                images = images.to(device)
                labels = labels.to(device)

                model.eval()
                with torch.no_grad():
                    probs  = model(images)
                    valid_loss = criterion(probs, labels)

                    probs  = probs.cpu().detach().numpy()
                    labels = labels.cpu().detach().numpy()
                    preds = probs > 0.5
                    batch_acc = (labels == preds).mean()
                    valid_acc_list.append(batch_acc)

                valid_acc = np.mean(valid_acc_list)
                valid_bar.set_postfix(valid_loss = valid_loss.item(),
                                      valid_acc = valid_acc)            
        
        lr_scheduler.step()
        if valid_acc_max < valid_acc:
            valid_acc_max = valid_acc
            best_model = model
            MODEL = "mobilenet"
            path = "/content/drive/MyDrive/computer_vision_comp/output/"
            torch.save(best_model.state_dict(), f'{path}{fold_index}_{MODEL}_{valid_loss.item():2.4f}_epoch_{epoch}.pth')

    best_models.append(best_model)

[fold: 1]


Train Epoch 0:  29%|██▊       | 168/586 [00:49<02:03,  3.37batch/s, train_acc=0.533, train_loss=0.691]


KeyboardInterrupt: ignored

In [None]:
## Test Dataset ##
sample_submission = pd.read_csv("/content/drive/MyDrive/computer_vision_comp/dataset/sample_submission.csv")
test_dataset = DatasetMNIST("./test_dirty_mnist_2nd/", sample_submission)
batch_size = 128
test_data_loader = DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle = False,
    num_workers = 2,
    drop_last = False
)

In [None]:
## Eval ##
predictions_list = []
prediction_df = pd.read_csv("/content/drive/MyDrive/computer_vision_comp/dataset/sample_submission.csv")

for model in best_models:
    prediction_array = np.zeros([prediction_df.shape[0],
                                 prediction_df.shape[1] -1])
    for idx, sample in enumerate(test_data_loader):
        with torch.no_grad():
            model.eval()
            images = sample['image']
            images = images.to(device)
            probs  = model(images)
            probs = probs.cpu().detach().numpy()
            preds = (probs > 0.5)

            batch_index = batch_size * idx
            prediction_array[batch_index: batch_index + images.shape[0],:]\
                         = preds.astype(int)
                         
    predictions_list.append(prediction_array[...,np.newaxis])

In [None]:
predictions_array = np.concatenate(predictions_list, axis = 2)
predictions_mean = predictions_array.mean(axis = 2)
predictions_mean = (predictions_mean > 0.5) * 1

In [None]:
## Submission FIle ##
sample_submission = pd.read_csv("/content/drive/MyDrive/computer_vision_comp/dataset/sample_submission.csv")
sample_submission.iloc[:,1:] = predictions_mean
sample_submission.to_csv("/content/drive/MyDrive/computer_vision_comp/mobilenet_prediction10.csv", index = False)
sample_submission.head(3)