# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [1]:
# 필요한 라이브러리를 설치합니다.
!pip install timm

[0m

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [2]:
import os
import time

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torch.optim.lr_scheduler import CosineAnnealingLR
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [4]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [5]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'datasets_fin/'

# model config
model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 224
LR = 1e-3
EPOCHS = 20
BATCH_SIZE = 64
num_workers = 0

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [6]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [9]:
# Dataset 정의
trn_dataset = ImageDataset(
    "/root/cv_data/train.csv",
    "/root/cv_data/train/",
    transform=trn_transform
)
tst_dataset = ImageDataset(
    "/root/cv_data/sample_submission.csv",
    "/root/cv_data/test/",
    transform=tst_transform
)
print(len(trn_dataset), len(tst_dataset))

1570 3140


In [10]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [11]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [12]:
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 1.2232: 100%|██████████| 25/25 [00:08<00:00,  3.09it/s]


train_loss: 2.0887
train_acc: 0.4790
train_f1: 0.4221
epoch: 0.0000



Loss: 0.4661: 100%|██████████| 25/25 [00:07<00:00,  3.28it/s]


train_loss: 0.6631
train_acc: 0.8236
train_f1: 0.7689
epoch: 1.0000



Loss: 0.1127: 100%|██████████| 25/25 [00:07<00:00,  3.24it/s]


train_loss: 0.2127
train_acc: 0.9363
train_f1: 0.9278
epoch: 2.0000



Loss: 0.1238: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.1102
train_acc: 0.9624
train_f1: 0.9611
epoch: 3.0000



Loss: 0.0643: 100%|██████████| 25/25 [00:07<00:00,  3.26it/s]


train_loss: 0.0598
train_acc: 0.9847
train_f1: 0.9844
epoch: 4.0000



Loss: 0.0069: 100%|██████████| 25/25 [00:07<00:00,  3.27it/s]


train_loss: 0.0170
train_acc: 0.9981
train_f1: 0.9976
epoch: 5.0000



Loss: 0.0093: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.0076
train_acc: 0.9987
train_f1: 0.9985
epoch: 6.0000



Loss: 0.0056: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.0062
train_acc: 0.9994
train_f1: 0.9991
epoch: 7.0000



Loss: 0.0082: 100%|██████████| 25/25 [00:07<00:00,  3.29it/s]


train_loss: 0.0049
train_acc: 0.9994
train_f1: 0.9994
epoch: 8.0000



Loss: 0.0052: 100%|██████████| 25/25 [00:07<00:00,  3.29it/s]


train_loss: 0.0060
train_acc: 1.0000
train_f1: 1.0000
epoch: 9.0000



Loss: 0.0052: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.0129
train_acc: 0.9962
train_f1: 0.9962
epoch: 10.0000



Loss: 0.0078: 100%|██████████| 25/25 [00:07<00:00,  3.24it/s]


train_loss: 0.0314
train_acc: 0.9911
train_f1: 0.9915
epoch: 11.0000



Loss: 0.1222: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.0430
train_acc: 0.9873
train_f1: 0.9873
epoch: 12.0000



Loss: 0.0164: 100%|██████████| 25/25 [00:07<00:00,  3.26it/s]


train_loss: 0.0329
train_acc: 0.9904
train_f1: 0.9903
epoch: 13.0000



Loss: 0.0096: 100%|██████████| 25/25 [00:07<00:00,  3.26it/s]


train_loss: 0.0145
train_acc: 0.9987
train_f1: 0.9985
epoch: 14.0000



Loss: 0.0018: 100%|██████████| 25/25 [00:07<00:00,  3.25it/s]


train_loss: 0.0110
train_acc: 0.9968
train_f1: 0.9968
epoch: 15.0000



Loss: 0.0072: 100%|██████████| 25/25 [00:07<00:00,  3.27it/s]


train_loss: 0.0143
train_acc: 0.9968
train_f1: 0.9969
epoch: 16.0000



Loss: 0.0049: 100%|██████████| 25/25 [00:07<00:00,  3.23it/s]


train_loss: 0.0144
train_acc: 0.9975
train_f1: 0.9971
epoch: 17.0000



Loss: 0.1726: 100%|██████████| 25/25 [00:07<00:00,  3.22it/s]


train_loss: 0.0181
train_acc: 0.9968
train_f1: 0.9967
epoch: 18.0000



Loss: 0.0023: 100%|██████████| 25/25 [00:07<00:00,  3.24it/s]

train_loss: 0.0253
train_acc: 0.9924
train_f1: 0.9924
epoch: 19.0000






# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [13]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 50/50 [00:13<00:00,  3.80it/s]


In [14]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [16]:
sample_submission_df = pd.read_csv("/root/cv_data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [17]:
pred_df.to_csv("code3_pred.csv", index=False)

In [18]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,0
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,0
4,00901f504008d884.jpg,2
