## 작업 환경
- 로컬 (NVIDIA GeForce RTX 3080)
- Google Colab Pro (gpu)

## 학습 방법
- 사용 모델 : Efficientnet-b7
- 5-fold로 나누어 학습하였고, 에포크 하나하나 모델을 저장하였습니다.
- 폴드 별로 생성된 모델의 loss와 accuracy를 비교하면서 폴드당 모델을 11개씩 선정하여 직접 불러와 앙상블을 진행하였습니다.
- Augmentation은 RandomFlip 등을 써서 테스트 해봤지만, 성능이 향상되지 않아 RandomRotation만 적용했습니다.
- 29일 이후 동일한 모델의 에포크를 늘려 학습을 진행했고, 앙상블하는 모델의 개수를 늘려 스코어 향상시켰습니다.

# GPU 사용여부 확인

In [1]:
import torch

USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda:0' if USE_CUDA else 'cpu')

print('CUDA 사용 가능 여부 :', USE_CUDA)
print('현재 사용 device :', device)
print('CUDA Index :', torch.cuda.current_device())
print('GPU 이름 :', torch.cuda.get_device_name())
print('GPU 개수 :', torch.cuda.device_count())

CUDA 사용 가능 여부 : True
현재 사용 device : cuda:0
CUDA Index : 0
GPU 이름 : NVIDIA GeForce RTX 3080
GPU 개수 : 1


# Library Import

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
import imutils
import zipfile
import os
from PIL import Image

import torch
import gc
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from efficientnet_pytorch import EfficientNet
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Dataset 구성

In [3]:
dirty_mnist_answer = pd.read_csv("dirty_mnist_2nd_answer.csv")
# dirty_mnist라는 디렉터리 속에 들어있는 파일들의 이름을
# namelist라는 변수에 저장
namelist = os.listdir('./dirty_mnist/')

# numpy를 tensor로 변환하는 ToTensor 정의
class ToTensor(object):
    """numpy array를 tensor(torch)로 변환합니다."""
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.FloatTensor(image),
                'label': torch.FloatTensor(label)}
# to_tensor 선언

to_tensor = T.Compose([
                      ToTensor()
])

# Augmentation는 RandomRotation를 사용
augmentations = T.Compose([
                           T.ToPILImage(),
                           T.RandomRotation(40),
                           T.ToTensor()
                           
])

class DatasetMNIST(torch.utils.data.Dataset):
    def __init__(self,
                 dir_path,
                 meta_df,
                 transforms=to_tensor,#미리 선언한 to_tensor를 transforms로 받음
                 augmentations=None):
        
        self.dir_path = dir_path # 데이터의 이미지가 저장된 디렉터리 경로
        self.meta_df = meta_df # 데이터의 인덱스와 정답지가 들어있는 DataFrame

        self.transforms = transforms# Transform
        self.augmentations = augmentations # Augmentation
        
    def __len__(self):
        return len(self.meta_df)
    
    def __getitem__(self, index):
        # 폴더 경로 + 이미지 이름 + .png => 파일의 경로
        # 참고) "12".zfill(5) => 000012
        #       "146".zfill(5) => 000145
        # cv2.IMREAD_GRAYSCALE : png파일을 채널이 1개인 GRAYSCALE로 읽음
        image = cv2.imread(self.dir_path +\
                           str(self.meta_df.iloc[index,0]).zfill(5) + '.png',
                           cv2.IMREAD_GRAYSCALE)
        # 0 ~ 255의 값을 갖고 크기가 (256,256)인 numpy array를
        # 0 ~ 1 사이의 실수를 갖고 크기가 (256,256,1)인 numpy array로 변환
        image = (image/255).astype('float32')[..., np.newaxis]

        # 정답 numpy array생성(존재하면 1 없으면 0)
        label = self.meta_df.iloc[index, 1:].values.astype('float')
        sample = {'image': image, 'label': label}

        # transform 적용
        # numpy to tensor
        if self.transforms:
            sample = self.transforms(sample)
        if self.augmentations:
            sample['image'] = self.augmentations(sample['image'])
        # sample 반환
        return sample

# 학습 모델 구성

In [None]:
# nn.Module을 상속 받아 MultiLabelEfficientnet를 정의
class MultiLabelEfficientnet(nn.Module):
    def __init__(self):
        super(MultiLabelEfficientnet, self).__init__()
        self.conv2d = nn.Conv2d(1, 3, 3, stride=1)
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b7')
        self.drop = nn.Dropout(p=0.2) # 일반화 성능 향상을 위해 dropout 레이어 추가
        self.FC = nn.Linear(1000, 26)

    def forward(self, x):
       
        x = F.silu(self.conv2d(x))

        x = F.silu(self.efficientnet(x))

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        x = self.drop(x)
        x = torch.sigmoid(self.FC(x))
        return x
# 모델 선언
model = MultiLabelEfficientnet()
model

# 학습

In [5]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# dirty_mnist_answer에서 train_idx와 val_idx를 생성
for fold_index, (trn_idx, val_idx) in enumerate(kfold.split(dirty_mnist_answer),1):
    print(f'[fold: {fold_index}]')
    # cuda cache 초기화
    torch.cuda.empty_cache()

    #train fold, validation fold 분할
    train_answer = dirty_mnist_answer.iloc[trn_idx]
    test_answer  = dirty_mnist_answer.iloc[val_idx]

    #Dataset 정의
    train_dataset = DatasetMNIST("dirty_mnist/", train_answer, augmentations=augmentations)
    valid_dataset = DatasetMNIST("dirty_mnist/", test_answer)

    #DataLoader 정의
    train_data_loader = DataLoader(
        train_dataset,
        batch_size = 8,
        shuffle = True,
        num_workers = 0
    )
    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size = 8,
        shuffle = False,
        num_workers = 0
    )

    # 모델 선언
    model = MultiLabelEfficientnet()
    
    # cuda cache 초기화
    torch.cuda.empty_cache()
    gc.collect()
    
    model.to(device)# gpu에 모델 할당

    # 훈련 옵션 설정
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = 0.001)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size = 5,
                                                gamma = 0.9)
    criterion = torch.nn.BCELoss()
    # 훈련 시작
    valid_acc_max = 0
    valid_loss_min = float("inf")
    for epoch in range(150):
        # 1개 epoch 훈련
        train_acc_list = []
        train_loss_list = []
        with tqdm(train_data_loader,#train_data_loader를 iterative하게 반환
                total=train_data_loader.__len__(), # train_data_loader의 크기
                unit="batch") as train_bar:# 한번 반환하는 sample의 단위는 "batch"
            for sample in train_bar:
                train_bar.set_description(f"Train Epoch {epoch}")

                optimizer.zero_grad()
                images, labels = sample['image'], sample['label']
                # tensor를 gpu에 올리기 
                images = images.to(device)
                labels = labels.to(device)

                # 모델의 dropout, batchnormalization를 train 모드로 설정
                model.train()
                # .forward()에서 중간 노드의 gradient를 계산
                with torch.set_grad_enabled(True):
                    # cuda cache 초기화
                    torch.cuda.empty_cache()
                    gc.collect()
                    # 모델 예측
                    probs  = model(images)
                    # loss 계산
                    loss = criterion(probs, labels)
                    # 중간 노드의 gradient로
                    # backpropagation을 적용하여
                    # gradient 계산
                    loss.backward()
                    # weight 갱신
                    optimizer.step()

                    # train accuracy 계산
                    probs  = probs.cpu().detach().numpy()
                    labels = labels.cpu().detach().numpy()
                    preds = probs > 0.5
                    batch_acc = (labels == preds).mean()    
                    train_acc_list.append(batch_acc)
                    train_acc = np.mean(train_acc_list)
                    train_loss_list.append(loss.item())
                    train_loss = np.mean(train_loss_list)
                # 현재 progress bar에 현재 미니배치의 loss 결과 출력
                train_bar.set_postfix(train_loss= train_loss,
                                      train_acc = train_acc)


        # 1개 epoch학습 후 Validation 점수 계산
        valid_acc_list = []
        valid_loss_list = []
        with tqdm(valid_data_loader,
                total=valid_data_loader.__len__(),
                unit="batch") as valid_bar:
            for sample in valid_bar:
                valid_bar.set_description(f"Valid Epoch {epoch}")
                optimizer.zero_grad()
                images, labels = sample['image'], sample['label']
                images = images.to(device)
                labels = labels.to(device)

                # 모델의 dropoupt, batchnormalization를 eval모드로 설정
                model.eval()
                # .forward()에서 중간 노드의 gradient를 계산
                with torch.no_grad():
                    # validation loss만을 계산
                    probs  = model(images)
                    valid_loss = criterion(probs, labels)

                    # train accuracy 계산
                    probs  = probs.cpu().detach().numpy()
                    labels = labels.cpu().detach().numpy()
                    preds = probs > 0.5
                    batch_acc = (labels == preds).mean()
                    valid_acc_list.append(batch_acc)
                    valid_loss_list.append(valid_loss.item())
                valid_acc = np.mean(valid_acc_list)
                valid_loss = np.mean(valid_loss_list)
                valid_bar.set_postfix(valid_loss = valid_loss,
                                      valid_acc = valid_acc)

        # Learning rate 조절
        lr_scheduler.step()

        # 모델 저장
        if valid_loss_min > valid_loss:
            valid_loss_min = valid_loss
            best_model = model 

        # 에포크 별로 모델 저장
        MODEL = "efficientnetb7"
        # 모델을 저장할 경로
        path = "./models/"
        torch.save(best_model, f'{path}{MODEL}_{fold_index}_{valid_loss:2.4f}.pth')

[fold: 1]
Loaded pretrained weights for efficientnet-b7


Train Epoch 0:   5%|█▎                         | 247/5000 [01:51<35:44,  2.22batch/s, train_acc=0.52, train_loss=0.698]


KeyboardInterrupt: 

# 앙상블 적용

## 사용된 모델
- 아래의 코드에서 보이듯이 폴드 당 11개씩의 모델을 사용하여 앙상블을 진행했습니다.
- gpu 메모리 문제로 25개와 30개로 나누어 추론을 하고 나온 두 가지의 결과를 바탕으로 최종 제출 파일 생성하였습니다.

In [6]:
# cuda cache 초기화
torch.cuda.empty_cache()
gc.collect()


best_models=[] # 폴드별로 가장 validation loss가 낮은 모델 5개씩 저장

best_models.append(torch.load('./models/efficientnetb7_1_0.1604.pth'))
best_models.append(torch.load('./models/efficientnetb7_2_0.1511.pth'))
best_models.append(torch.load('./models/efficientnetb7_3_0.1512.pth'))
best_models.append(torch.load('./models/efficientnetb7_4_0.1411.pth'))
best_models.append(torch.load('./models/efficientnetb7_5_0.1510.pth'))

best_models.append(torch.load('./models/efficientnetb7_1_0.1610.pth'))
best_models.append(torch.load('./models/efficientnetb7_2_0.1520.pth'))
best_models.append(torch.load('./models/efficientnetb7_3_0.1550.pth'))
best_models.append(torch.load('./models/efficientnetb7_4_0.1421.pth'))
best_models.append(torch.load('./models/efficientnetb7_5_0.1547.pth'))

best_models.append(torch.load('./models/efficientnetb7_1_0.1624.pth'))
best_models.append(torch.load('./models/efficientnetb7_2_0.1532.pth'))
best_models.append(torch.load('./models/efficientnetb7_3_0.1553.pth'))
best_models.append(torch.load('./models/efficientnetb7_4_0.1430.pth'))
best_models.append(torch.load('./models/efficientnetb7_5_0.1615.pth'))

best_models.append(torch.load('./models/efficientnetb7_1_0.1647.pth'))
best_models.append(torch.load('./models/efficientnetb7_2_0.1541.pth'))
best_models.append(torch.load('./models/efficientnetb7_3_0.1560.pth'))
best_models.append(torch.load('./models/efficientnetb7_4_0.1434.pth'))
best_models.append(torch.load('./models/efficientnetb7_5_0.1620.pth'))

best_models.append(torch.load('./models/efficientnetb7_1_0.1661.pth'))
best_models.append(torch.load('./models/efficientnetb7_2_0.1554.pth'))
best_models.append(torch.load('./models/efficientnetb7_3_0.1573.pth'))
best_models.append(torch.load('./models/efficientnetb7_4_0.1435.pth'))
best_models.append(torch.load('./models/efficientnetb7_5_0.1642.pth'))

In [None]:
best_models2=[] # 폴드별로 가장 validation loss가 낮은 모델 6개씩 저장

best_models2.append(torch.load('./models/efficientnetb7_1_0.1662.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1558.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1602.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1447.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1643.pth'))

best_models2.append(torch.load('./models/efficientnetb7_1_0.1664.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1570.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1608.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1450.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1683.pth'))

best_models2.append(torch.load('./models/efficientnetb7_1_0.1671.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1585.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1609.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1459.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1706.pth'))

best_models2.append(torch.load('./models/efficientnetb7_1_0.1675.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1662.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1655.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1477.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1707.pth'))

best_models2.append(torch.load('./models/efficientnetb7_1_0.1681.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1706.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1661.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1487.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1715.pth'))

best_models2.append(torch.load('./models/efficientnetb7_1_0.1720.pth'))
best_models2.append(torch.load('./models/efficientnetb7_2_0.1717.pth'))
best_models2.append(torch.load('./models/efficientnetb7_3_0.1667.pth'))
best_models2.append(torch.load('./models/efficientnetb7_4_0.1490.pth'))
best_models2.append(torch.load('./models/efficientnetb7_5_0.1717.pth'))

In [7]:
# test Dataset 정의
sample_submission = pd.read_csv("sample_submission.csv")
test_dataset = DatasetMNIST("test_dirty_mnist/", sample_submission)
batch_size = 32
test_data_loader = DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle = False,
    num_workers = 0,
    drop_last = False
)

# 추론

In [None]:
# cuda cache 초기화
torch.cuda.empty_cache()
gc.collect()

predictions_list = []
# 배치 단위로 추론
prediction_df = pd.read_csv("sample_submission.csv")

for model in best_models:
    # 0으로 채워진 array 생성
    prediction_array = np.zeros([prediction_df.shape[0],
                                 prediction_df.shape[1] -1])
    for idx, sample in enumerate(test_data_loader):
        with torch.no_grad():
            # 추론
            model.eval()
            images = sample['image']
            images = images.to(device)
            probs  = model(images)
            probs = probs.cpu().detach().numpy()
            preds = (probs > 0.5)

            # 예측 결과를 
            # prediction_array에 입력
            batch_index = batch_size * idx
            prediction_array[batch_index: batch_index + images.shape[0],:]\
                         = preds.astype(int)
                         
    # 채널을 하나 추가하여 list에 append
    predictions_list.append(prediction_array[...,np.newaxis])
    
predictions_array = np.concatenate(predictions_list, axis = 2)
np.save('predictions_array',predictions_array)

In [9]:
predictions_list2 = []
# 배치 단위로 추론
prediction_df2 = pd.read_csv("sample_submission.csv")

for model in best_models2:
    # 0으로 채워진 array 생성
    prediction_array2 = np.zeros([prediction_df2.shape[0],
                                 prediction_df2.shape[1] -1])
    for idx, sample in enumerate(test_data_loader):
        with torch.no_grad():
            # 추론
            model.eval()
            images = sample['image']
            images = images.to(device)
            probs  = model(images)
            probs = probs.cpu().detach().numpy()
            preds = (probs > 0.5)

            # 예측 결과를 
            # prediction_array에 입력
            batch_index = batch_size * idx
            prediction_array2[batch_index: batch_index + images.shape[0],:]\
                         = preds.astype(int)
                         
    # 채널을 하나 추가하여 list에 append
    predictions_list2.append(prediction_array2[...,np.newaxis])

predictions_array2 = np.concatenate(predictions_list2, axis = 2)
np.save('predictions_array2',predictions_array2)

In [12]:
# array 불러오기
predictions_array = np.load('predictions_array.npy')
predictions_array2 = np.load('predictions_array2.npy')

In [13]:
predictions_array_final = np.concatenate([predictions_array, predictions_array2], axis = 2)
predictions_mean = predictions_array_final.mean(axis = 2)

# 평균 값이 0.5보다 클 경우 1 작으면 0
predictions_mean = (predictions_mean > 0.5) * 1
predictions_mean

array([[1, 0, 1, ..., 1, 0, 1],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 1],
       ...,
       [1, 0, 0, ..., 0, 0, 1],
       [0, 0, 1, ..., 0, 0, 1],
       [1, 1, 0, ..., 1, 1, 1]])

# 제출파일 생성

In [14]:
sample_submission = pd.read_csv("sample_submission.csv")
sample_submission.iloc[:,1:] = predictions_mean
sample_submission.to_csv("Lee_Woon_jae_save2.csv", index = False)
sample_submission

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,...,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,1,1,0,1,1,...,0,0,0,1,1,1,0,1,0,1
1,50001,0,1,0,0,1,0,1,0,1,...,0,1,0,0,1,1,0,0,0,0
2,50002,0,0,1,1,1,0,1,0,1,...,0,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,0,0,1,1,1,0,...,0,0,1,1,0,0,0,1,0,1
4,50004,0,0,1,0,1,1,0,0,0,...,1,0,1,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,1,0,0,0,1,0,...,0,1,0,1,0,1,1,0,1,0
4996,54996,1,1,1,0,0,0,0,0,1,...,0,0,1,0,0,1,0,1,0,1
4997,54997,1,0,0,1,0,1,0,0,0,...,0,0,0,1,1,1,1,0,0,1
4998,54998,0,0,1,0,0,0,1,0,1,...,1,1,0,1,0,0,1,0,0,1
