In [1]:
#!python -m pip install --upgrade pip
#!pip install --upgrade opencv-python

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mDefaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [2]:
#ResNet 모델을 구현하고자 한다. 2024-10-10

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import copy
from collections import namedtuple
import os
import random
import time

# !pip install opencv-python==4.8.0.74
import cv2
from torch.utils.data import DataLoader, Dataset
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class ImageTransform():    
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }
        
    def __call__(self, img, phase):
        return self.data_transform[phase](img)

In [4]:
size = 224 # 정사각형 이미지 기준 한 변의 길이
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [5]:
train_csv_path = './data/train/csv'
val_csv_path = './data/val/csv'
# train 디렉토리의 모든 CSV 파일 읽기
train_csv_files = [os.path.join(train_csv_path, f) for f in os.listdir(train_csv_path) if f.endswith('.csv')]

# 각 CSV 파일을 DataFrame으로 읽고 리스트에 저장
train_dfs = [pd.read_csv(f) for f in train_csv_files]

# train 데이터프레임 합치기
train_df = pd.concat(train_dfs, ignore_index=True)

# val 디렉토리의 모든 CSV 파일 읽기
val_csv_files = [os.path.join(val_csv_path, f) for f in os.listdir(val_csv_path) if f.endswith('.csv')]

# 각 CSV 파일을 DataFrame으로 읽고 리스트에 저장
val_dfs = [pd.read_csv(f) for f in val_csv_files]

# val 데이터프레임 합치기
val_df = pd.concat(val_dfs, ignore_index=True)

In [6]:
train_df.head(1)

Unnamed: 0,inspRejectYn,labelingInfo,metaData.Raw data ID,metaData.copyrighter,metaData.resolution,metaData.date,metaData.breed,metaData.age,metaData.gender,metaData.region,...,metaData.species,metaData.lesions,metaData.diagnosis,metaData.Path,metaData.identifier,metaData.src_path,metaData.label_path,metaData.type,metaData.fileformat,metaData.합성유무
0,N,"[{'polygon': {'color': '#27b73c', 'location': ...",IMG_D_A2_004644.jpg,㈜미소정보기술,1920X1080,2021-11-09,말티즈,3,F,L,...,D,A2,,유증상,피부질환,/라벨링데이터/반려견/피부/일반카메라/유증상/A2_비듬_각질_상피성잔고리,/라벨링데이터/반려견/피부/일반카메라/유증상/A2_비듬_각질_상피성잔고리,json,jpg,


In [7]:
# train data 전처리

train_df['imgID'] = train_df['metaData.Raw data ID']
train_df['breed'] = train_df['metaData.breed']
train_df['age'] = train_df['metaData.age']
train_df['gender'] = train_df['metaData.gender']
train_df['species'] = train_df['metaData.species']
train_df['lesions'] = train_df['metaData.lesions']
train_df['symptoms'] = train_df['metaData.Path']
train_df['identifier'] = train_df['metaData.identifier']

train_df.drop(['metaData.species', 'metaData.gender', 'metaData.lesions', 'metaData.Path', 'metaData.identifier', 'inspRejectYn', 'labelingInfo', 'metaData.copyrighter', 'metaData.resolution', 'metaData.date','metaData.region', 'metaData.camera type', 'metaData.diagnosis', 'metaData.src_path', 'metaData.label_path', 'metaData.type', 'metaData.fileformat', 'metaData.합성유무', 'metaData.Raw data ID', 'metaData.breed', 'metaData.age', 'metaData.gender'], axis=1, inplace=True)
train_df.head(1)

Unnamed: 0,imgID,breed,age,gender,species,lesions,symptoms,identifier
0,IMG_D_A2_004644.jpg,말티즈,3,F,D,A2,유증상,피부질환


In [8]:
# val data 전처리

val_df = val_df.copy()  # 만약 기존에 val_df가 없다면 빈 데이터프레임을 사용

val_df['imgID'] = val_df['metaData.Raw data ID']
val_df['breed'] = val_df['metaData.breed']
val_df['age'] = val_df['metaData.age']
val_df['gender'] = val_df['metaData.gender']
val_df['species'] = val_df['metaData.species']
val_df['lesions'] = val_df['metaData.lesions']
val_df['symptoms'] = val_df['metaData.Path']
val_df['identifier'] = val_df['metaData.identifier']

val_df.drop(['metaData.species', 'metaData.gender', 'metaData.lesions', 'metaData.Path', 'metaData.identifier', 'inspRejectYn', 'labelingInfo', 'metaData.copyrighter', 'metaData.resolution', 'metaData.date','metaData.region', 'metaData.camera type', 'metaData.diagnosis', 'metaData.src_path', 'metaData.label_path', 'metaData.type', 'metaData.fileformat', 'metaData.합성유무', 'metaData.Raw data ID', 'metaData.breed', 'metaData.age', 'metaData.gender'], axis=1, inplace=True)

val_df.head(1)


Unnamed: 0,imgID,breed,age,gender,species,lesions,symptoms,identifier
0,IMG_D_A4_445781.jpg,말티즈,4,F,D,A4,유증상,피부질환


In [19]:
val_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54233 entries, 0 to 54232
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   imgID       54233 non-null  object
 1   breed       54233 non-null  object
 2   age         54233 non-null  int64 
 3   gender      54233 non-null  object
 4   species     54233 non-null  object
 5   lesions     54233 non-null  object
 6   symptoms    54233 non-null  object
 7   identifier  53387 non-null  object
 8   image_path  0 non-null      object
dtypes: int64(1), object(8)
memory usage: 3.7+ MB


In [23]:
def find_image_path(row, phase):
    base_path = f'./data/{phase}/image'
    species = str(row['species'])  # species를 문자열로 변환
    symptoms = row['symptoms']  # 유증상/무증상
    imgID = row['imgID']  # 이미지 파일 이름
    
    # species 폴더 정의
    if species == 'D':  # 반려견
        species_folders = ['반려견_01', '반려견_02'] if phase == 'train' else ['반려견']
    elif species == 'C':  # 반려묘
        species_folders = ['반려묘']
    else:
        return None  # 잘못된 species 값
    
    # 폴더들에서 이미지 경로 탐색
    for folder in species_folders:
        target_folder = os.path.join(base_path, folder, symptoms)
        
        # symptoms 폴더 내의 하위 폴더를 탐색
        if os.path.exists(target_folder):
            # symptoms 폴더 내의 하위 폴더들을 리스트업
            for sub_folder in os.listdir(target_folder):
                sub_folder_path = os.path.join(target_folder, sub_folder)
                if os.path.isdir(sub_folder_path):
                    img_path = os.path.join(sub_folder_path, imgID)
                    if os.path.exists(img_path):
                        return img_path
    return None  # 파일이 없으면 None 반환

# train과 val 데이터에 적용하는 함수
def apply_image_path(df, phase):
    df['image_path'] = df.apply(lambda row: find_image_path(row, phase), axis=1)
    print(f"{phase.capitalize()} None Count:", df['image_path'].isna().sum())
    print(f"{phase.capitalize()} Data")
    print(df[['species', 'imgID', 'image_path']].head())

# train과 val 데이터에 함수 적용
apply_image_path(train_df, phase='train')
apply_image_path(val_df, phase='val')

Train None Count: 5880
Train Data
  species                imgID  \
0       D  IMG_D_A2_004644.jpg   
1       D  IMG_D_A2_004645.jpg   
2       D  IMG_D_A2_004646.jpg   
3       D  IMG_D_A2_004647.jpg   
4       D  IMG_D_A2_004648.jpg   

                                          image_path  
0  ./data/train/image/반려견_01/유증상/A2_비듬_각질_상피성잔고리/...  
1  ./data/train/image/반려견_01/유증상/A2_비듬_각질_상피성잔고리/...  
2  ./data/train/image/반려견_01/유증상/A2_비듬_각질_상피성잔고리/...  
3  ./data/train/image/반려견_01/유증상/A2_비듬_각질_상피성잔고리/...  
4  ./data/train/image/반려견_01/유증상/A2_비듬_각질_상피성잔고리/...  
Val None Count: 1294
Val Data
  species                imgID  \
0       D  IMG_D_A4_445781.jpg   
1       D  IMG_D_A4_445782.jpg   
2       D  IMG_D_A4_445783.jpg   
3       D  IMG_D_A4_445785.jpg   
4       D  IMG_D_A4_445787.jpg   

                                          image_path  
0  ./data/val/image/반려견/유증상/A4_농포_여드름/IMG_D_A4_44...  
1  ./data/val/image/반려견/유증상/A4_농포_여드름/IMG_D_A4_44...  
2  ./data/val/image/반려견/유증상/A4_

In [24]:
train_df.to_csv("./data/train_data.csv", index=False)
val_df.to_csv("./data/val_data.csv", index=False)

In [11]:
class Dataset(Dataset):
    def __init__(self, dataframe, transform=None, phase=train):
        self.dataframe = dataframe
        self.transform = transform
        self.phase = phase
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        
        img_path = find_image_path(row, phase=self.phase)
        
        image = Image.open(img_path).convert('RGB')
        
        label = row['lesions']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

SyntaxError: incomplete input (2950442330.py, line 3)