## 1. 사용할 패키지 불러오기

In [None]:
import os
import numpy as np
import pandas as pd
from data_gen.data_gen import dataset_generator
from model.loss_factory import LossFactory
from model.model_factory import ModelFactory
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from model.trainer_factory import SupervisedTraining
from inference.tester import Tester
import torch

## 2. Arguments 정의

#### (1) 경로 Parameter

- train_data_dir: 학습 데이터 경로
- test_data_dir: 테스트 데이터 경로
- save_model_path: 학습된 모델 저장 경로
- inference_result_path: 실험 결과 저장 경로

#### (2) 학습 Parameter
- model_structure: 사용할 모델 (vgg16, resnet50, mobilenet_v2)
- epochs: 학습 Epoch
- batch_size: 배치 사이즈
- learning_rate: 학습률
- weight_decay: 학습 weight decay

In [None]:
train_data_dir = "./data/example/cat_dog_classify/train"
test_data_dir = "./data/example/cat_dog_classify/test"
save_model_path = "./experiment/saved_model/"
inference_result_path = "./experiment/result/"

model_structure = 'resnet50'
epochs = 100
batch_size = 16
learning_rate = 0.001
weight_decay = 1e-6

os.makedirs(save_model_path, exist_ok=True)
os.makedirs(inference_result_path, exist_ok=True)

## 3. Dataset 구성

### (1) File 리스트 불러오기

In [None]:
def prepare_data(data_path):
    """
    Extract file names with target class

    Parameters
    ----------
    train_path : str
        train data path (csv or folders with class name)
    remove_filename_list : list
        file names which should be removed.

    Returns
    --------
    file names, unique label names, target class
    """

    files = []
    for ty in os.listdir(data_path):
        filelist = os.listdir(os.path.join(data_path, ty))
        for i, file in enumerate(filelist):
            files.append(os.path.join(data_path, ty, file))

    labels = [file.split('/')[-2] for file in files]

    uni_label = np.unique(labels)
    print("There are {} classes: {}".format(len(uni_label), uni_label))
    y = np.array([np.eye(len(uni_label))[np.where(uni_label==label)].reshape(-1) for label in labels])

    return files, uni_label, y


In [None]:
train_file_list, uni_label, train_target_list = prepare_data(data_path = train_data_dir)
test_file_list, uni_label, test_target_list = prepare_data(data_path = test_data_dir)

### (2) DataLoader 생성

In [None]:
train_dataset_generator = dataset_generator(train_file_list, train_target_list, batch_size=batch_size, phase= 'train', train_valid_split = False)
train_dataloader = train_dataset_generator.dataloader()

test_dataset_generator = dataset_generator(test_file_list, test_target_list, batch_size=batch_size, phase= 'test', train_valid_split = False)
test_dataloader = test_dataset_generator.dataloader()

## 4. Model 생성

In [None]:
vision_model = ModelFactory(model_name=model_structure,
                                   pretrained=True,
                                   class_num=len(uni_label))

In [None]:
# get loss function from LossFactory
loss_fn = CrossEntropyLoss()

# get optimizer from OptimizerFactory
optimizer = Adam(params = vision_model.parameters(),
                 lr=learning_rate,
                 weight_decay = weight_decay)

## 5. 학습

In [None]:
# get trainer from trainer_factory
trainer = SupervisedTraining(epoch=epochs,
                             result_model_path=save_model_path)

# train
trainer.train(vision_model, train_dataloader['train'], test_dataloader['test'], loss_fn, optimizer, gpu=True)

## 6. Test Inference

### (1) Best model 불러오기

In [None]:
load_model_path = os.path.join(save_model_path, 'Best_model.pth')
vision_model.load_state_dict(torch.load(load_model_path))

### (2) Inference 결과 저장하기

In [None]:
tester = Tester(model = vision_model, test_data_gen = test_dataloader['test'])
prediction = tester.inference(gpu = True)
tester.make_csv_report(test_file_list, prediction, test_target_list, inference_result_path)
tester.plot_confusion_matrix(test_target_list, prediction, inference_result_path)