In [22]:
import os
import glob
import time
import datetime
import sys
import io
import base64
from genericpath import isfile
import hashlib
import shutil
import itertools
import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageDraw
from collections import Counter

import torch
from torch.optim import SGD, Adam, AdamW
from adamp import AdamP
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader

import torchvision.models as models
from torchvision import transforms, datasets, models
from torchvision.transforms import Resize, ToTensor, Normalize

import timm
from sklearn.model_selection import StratifiedKFold

In [3]:
from modules.dataset import *
from modules.transformation import *
from modules.imbalancedsampler import *
from modules.loss import *
from modules.config import Config as conf
from modules.config import HyperParameter as params
from modules.utils import *
from modules.train import *
from modules.ensemble import *

In [5]:
seed_everything(2021)

이 실험은 seed 2021로 고정되었습니다.


## 0. Prepare Dataset

In [6]:
dataset = PreprocessedDataset(conf.train_dir + '/train.csv', False, 'Label')

2700it [00:05, 478.39it/s]


In [7]:
dataset.train_df.head(10)

Unnamed: 0,Mask,Gender,Age,Path,Label,Fold
0,Incorrect,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,10,2
1,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,4,2
2,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,4,2
3,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,4,2
4,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,4,2
5,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,4,2
6,Not Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000001_female_...,16,2
7,Incorrect,Female,>=30 and <60,/opt/ml/input/data/train/images/000002_female_...,10,1
8,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000002_female_...,4,1
9,Wear,Female,>=30 and <60,/opt/ml/input/data/train/images/000002_female_...,4,1


## 1. Soft Voting

In [9]:
a = pd.read_csv(os.path.join(conf.ensemble_dir, 'resnet50_cv_best.csv'))
b = pd.read_csv(os.path.join(conf.ensemble_dir, 'resnet50_no_cv_best.csv'))
c = pd.read_csv(os.path.join(conf.ensemble_dir, 'resnext50.csv'))

In [16]:
sv_submission = pd.read_csv(os.path.join(conf.test_dir, 'info.csv'))
image_dir = os.path.join(conf.test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in sv_submission.ImageID]

all_predictions = []

for idx in range(len(image_paths)):
    logit_a = np.array(list(map(float, a['logit'][idx][1:-1].replace("\n", "").split(" "))))
    logit_b = np.array(nn.Softmax(dim=-1)(torch.from_numpy(np.array(list(map(float, list(num for num in b['logit'][idx][1:-1].replace("\n", "").split(" ") if num)))))))
    logit_c = np.array(list(map(float, c['logit'][idx][1:-1].replace("\n", "").split(" "))))
    total_logits = logit_a + logit_b + logit_c
    pred = torch.argmax(torch.from_numpy(total_logits), dim=-1)
    all_predictions.append(int(pred))
    
sv_submission['ans'] = all_predictions
# sv_submission.to_csv(os.path.join(conf.submission_dir, '0408_ensemble.csv'), index=False)

In [17]:
sv_submission.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,2
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,13
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12


## 2. Hard Voting

In [18]:
# 리더보드 기준 가장 성능이 좋은 9개의 csv 파일을 이용하여 hard voting
output1 = pd.read_csv(os.path.join(conf.submission_dir, "0407_resnet50_correct_cv.csv"))
output2 = pd.read_csv(os.path.join(conf.submission_dir, "0406_resnet50_no_crop_no_crop.csv"))
output3 = pd.read_csv(os.path.join(conf.submission_dir, "0407_resnext50_madgrad_cv.csv"))
output4 = pd.read_csv(os.path.join(conf.submission_dir, "0408_jh.csv"))
output5 = pd.read_csv(os.path.join(conf.submission_dir, "0405_resnet_king_sub.csv"))
output6 = pd.read_csv(os.path.join(conf.submission_dir, "0408_resnext50.csv"))
output7 = pd.read_csv(os.path.join(conf.submission_dir, "0406_efficientnet_64.csv"))
output8 = pd.read_csv(os.path.join(conf.submission_dir, "0408_last_soft_voting.csv"))
output9 = pd.read_csv(os.path.join(conf.submission_dir, "0408_ensemble.csv"))

hv_submission = pd.read_csv(os.path.join(conf.test_dir, 'info.csv'))
all_predictions = []

for i in range(len(output1)):
    outputs = [output1["ans"][i], output2["ans"][i], output3["ans"][i], output4["ans"][i],
              output5["ans"][i], output6["ans"][i], output7["ans"][i], output8["ans"][i], output9["ans"][i]]
    ans = Counter(outputs).most_common(1)
    all_predictions.append(ans[0][0])

hv_submission["ans"] = all_predictions
# hv_submission.to_csv(os.path.join(submission_dir, '0408_hard_voting_9.csv'), index=False)

In [20]:
hv_submission.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,2
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,14
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12
