In [1]:
# import list
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

In [2]:
# 데이터 전처리
def clean_and_validate_images(directory):
    """잘못된 이미지를 제거하는 함수"""
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # 유효성 검사
            except Exception as e:
                print(f"Removing invalid file: {file_path} ({e})")
                os.remove(file_path)

def convert_images_to_jpeg(directory):
    """이미지를 JPEG 형식으로 변환하고 원본 파일을 삭제"""
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img = img.convert("RGB")  # RGB 모드로 변환
                    new_file_path = os.path.splitext(file_path)[0] + ".jpg"
                    img.save(new_file_path, format="JPEG")
                    if file_path != new_file_path:
                        os.remove(file_path)
            except Exception as e:
                print(f"Skipping invalid file: {file_path} ({e})")

In [3]:
# 데이터 경로
train_dir = "img/train"  # 학습 데이터 디렉토리
val_dir = "img/val"  # 검증 데이터 디렉토리

clean_and_validate_images(train_dir)
clean_and_validate_images(val_dir)
convert_images_to_jpeg(train_dir)
convert_images_to_jpeg(val_dir)

In [4]:
# 데이터 로더 생성
batch_size = 32
img_size = (150, 150)

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

# 데이터 증강
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])

# Prefetch for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

Found 1501 files belonging to 48 classes.
Using 1201 files for training.
Found 718 files belonging to 48 classes.
Using 143 files for validation.


In [5]:
# 모델 정의
model = models.Sequential([
    layers.Input(shape=(150, 150, 3)),
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(512, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(48, activation='softmax') 
])

In [None]:
# 모델 컴파일 및 학습
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=60
)

Epoch 1/60
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m549s[0m 15s/step - accuracy: 0.0156 - loss: 5.3886 - val_accuracy: 0.0210 - val_loss: 3.8327
Epoch 2/60
[1m 3/38[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10:24[0m 18s/step - accuracy: 0.0312 - loss: 3.8295

In [None]:
# 시각화
plt.plot(history.history['accuracy'], label='Accuracy')
plt.plot(history.history['val_accuracy'], label = 'Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')
plt.show()

In [None]:
# 저장
model.save('model.h5') 