# 데이콘 Basic Summer
## 서울 랜드마크 이미지 분류 경진대회

## 성능 개선 - <span style="color:red"> ResNet 전이학습

# I. 데이터 살펴보기
## 1. 데이터 준비

In [1]:
import pandas as pd

# train셋 라벨 데이터
label_df = pd.read_csv('dataset/train.csv')
label_df.head()

Unnamed: 0,file_name,label
0,001.PNG,9
1,002.PNG,4
2,003.PNG,1
3,004.PNG,1
4,005.PNG,6


In [14]:
# 이미지 데이터

import os
from glob import glob

def get_train_data(data_dir):
    img_path_list=[]
    label_list=[]
    
    # get image path
    img_path_list.extend(glob(os.path.join(data_dir,'*.PNG')))
    img_path_list= list(map(lambda x: x.replace('\\','/',10),img_path_list))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    
    # get label
    label_list.extend(label_df['label'])
    
    return img_path_list,label_list

def get_test_data(data_dir):
    img_path_list=[]
    
    # get image path
    img_path_list.extend(glob(os.path.join(data_dir,'*.PNG')))
    img_path_list= list(map(lambda x: x.replace('\\','/',10),img_path_list))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    #print(img_path_list)
    
    return img_path_list

In [15]:
label_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 723 entries, 0 to 722
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  723 non-null    object
 1   label      723 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 11.4+ KB


In [16]:
all_img_path,all_label = get_train_data('dataset/train')
test_img_path = get_test_data('dataset/test')

In [17]:
all_label[:5]

[9, 4, 1, 1, 6]

In [18]:
all_img_path[:5]

['dataset/train/001.PNG',
 'dataset/train/002.PNG',
 'dataset/train/003.PNG',
 'dataset/train/004.PNG',
 'dataset/train/005.PNG']

In [19]:
test_img_path[:5]

['dataset/test/001.PNG',
 'dataset/test/002.PNG',
 'dataset/test/003.PNG',
 'dataset/test/004.PNG',
 'dataset/test/005.PNG']

## 환경 설정

In [20]:
import torch
import torch.nn as nn

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [21]:
#GPU 체크 및 할당
if torch.cuda.is_available():    
    #device = torch.device("cuda:0")
    print('Device:', device)
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using the CPU instead.')

No GPU available, using the CPU instead.


In [22]:
#하이퍼 파라미터 튜닝

CFG = {
    'IMG_SIZE':128, #이미지 사이즈
    'EPOCHS':60, #에포크
    'LEARNING_RATE':2e-2, #학습률
    'BATCH_SIZE':32, #배치사이즈
    'SEED':41, #시드
}

## 데이터 전처리

In [23]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, Dataset

import cv2

In [25]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, train_mode = True, transforms = None):
        self.transforms = transforms
        self.train_mode = train_mode
        self.img_path_list = img_path_list
        self.label_list = label_list
        
    def __getitem__(self,index):  #index 번째 data를 return
        img_path = self.img_path_list[index]
        image = cv2.imread(img_path)
        if self.transforms is not None:
            image = self_transforms(image)
            
        if self.train_mode:
            label = self.label_list[index]
            return image, label
        
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)