In [None]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# 데이터셋 경로 설정
dataset_dir = "/home/yycho/deepMUC/202230727_Image/HILOW"
dataset_dir = Path(dataset_dir)

# 이미지 파일 경로 수집
filepaths = list(dataset_dir.glob(r'*/*.jpg'))
print(f"총 {len(filepaths)}개의 이미지 파일을 찾았습니다.")

In [None]:
# 데이터프레임 생성
def create_image_df(filepaths):
    labels = [filepath.parent.name for filepath in filepaths]
    df = pd.DataFrame({'Filepath': filepaths, 'Label': labels})
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)  # 데이터프레임 셔플
    return df

image_df = create_image_df(filepaths)
print(image_df.head())

# 레이블 인코딩
label_encoder_img = LabelEncoder()
image_df['Encoded_Label'] = label_encoder_img.fit_transform(image_df['Label'])  # 'High' => 0, 'Low' => 1

# 원-핫 인코딩
image_df['Categorical_Label'] = list(to_categorical(image_df['Encoded_Label']))


In [None]:

def proc_img(filepath):
    """
   		이미지데이터의 경로와 label데이터로 데이터프레임 만들기 
    """

    labels = [str(filepath[i]).split("/")[-2] \
              for i in range(len(filepath))]

    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    # 경로와 라벨 concatenate
    df = pd.concat([filepath, labels], axis=1)

    # index 재설정
    df = df.sample(frac=1,random_state=0).reset_index(drop = True)
    
    return df
df = proc_img(filepaths)
df.head(2)


In [None]:
from sklearn.model_selection import train_test_split

X_image_paths = image_df['Filepath'].astype(str).values
y_image = np.stack(image_df['Categorical_Label'].values)

# 학습용과 테스트용 데이터 분할
X_image_train_paths, X_image_test_paths, y_image_train, y_image_test = train_test_split(
    X_image_paths, y_image, test_size=0.2, random_state=42, stratify=y_image
)

print(f"학습용 이미지 데이터 수: {len(X_image_train_paths)}")
print(f"테스트용 이미지 데이터 수: {len(X_image_test_paths)}")


from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

# 데이터 증강 및 전처리를 위한 ImageDataGenerator 설정
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.densenet.preprocess_input,
    rotation_range=20,
    zoom_range=0.15,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.densenet.preprocess_input
)


In [None]:
# 제너레이터 함수 정의
def create_generator(filepaths, labels, datagen, batch_size):
    num_samples = len(filepaths)
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            batch_indices = indices[start:end]
            batch_filepaths = filepaths[batch_indices]
            batch_labels = labels[batch_indices]
            batch_images = []
            for filepath in batch_filepaths:
                img = tf.keras.preprocessing.image.load_img(filepath, target_size=(299, 299))
                img_array = tf.keras.preprocessing.image.img_to_array(img)
                batch_images.append(img_array)
            batch_images = np.array(batch_images)
            batch_images = datagen.standardize(batch_images)
            yield batch_images, batch_labels

from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

# 모델 구성
base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(2, activation='softmax')(x)  # 클래스 수에 따라 조정
image_model = Model(inputs=base_model.input, outputs=predictions)

# 모델 컴파일
image_model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# 콜백 설정
checkpoint = ModelCheckpoint('image_model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.1, min_lr=1e-6, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)

# 배치 크기와 스텝 계산
batch_size = 32
train_steps = len(X_image_train_paths) // batch_size
test_steps = len(X_image_test_paths) // batch_size

# 제너레이터 생성
train_generator = create_generator(X_image_train_paths, y_image_train, train_datagen, batch_size)
test_generator = create_generator(X_image_test_paths, y_image_test, test_datagen, batch_size)

# 모델 학습
history_img = image_model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    validation_data=test_generator,
    validation_steps=test_steps,
    epochs=50,
    callbacks=[checkpoint, reduce_lr, early_stop]
)




In [None]:

# 최적의 모델 로드
best_image_model = tf.keras.models.load_model('na_HI_LOW.h5')

# 테스트 데이터 평가
test_loss, test_accuracy = best_image_model.evaluate(test_generator, steps=test_steps)
print(f"테스트 손실: {test_loss}")
print(f"테스트 정확도: {test_accuracy}")
