In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from imblearn.over_sampling import SMOTE
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns

# 경로 설정
train_fail_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train/fail'
train_pass_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train/pass'
test_fail_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test/fail'
test_pass_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test/pass'

# 이미지 로드 및 레이블 생성
def load_images_from_folder(folder, label):
    images = []
    for filename in os.listdir(folder):
        if filename.endswith('.bmp'):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 128))  # 크기 조정
            img = remove_glare(img)  # 난반사 보정
            img = normalize_image(img)  # 이미지 정규화
            img = enhance_contrast(img)  # 대비 향상
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            images.append((img_array, label))
    return images

# 난반사 보정 함수 (CLAHE 사용)
def remove_glare(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    lab = cv2.merge((cl, a, b))
    img_no_glare = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    return img_no_glare

# 이미지 정규화 함수
def normalize_image(img):
    return img / 255.0  # 픽셀 값을 0-1 범위로 정규화

# 대비 향상 함수
def enhance_contrast(img):
    if img.dtype != np.uint8:
        img = (img * 255).astype(np.uint8)  # 데이터 타입 변환
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = cv2.equalizeHist(l)
    lab = cv2.merge((l, a, b))
    img_clahe = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    return img_clahe

# 이미지 증강 (줌과 이동 제외)
datagen = ImageDataGenerator(
    rotation_range=20,         # 회전
    shear_range=0.2,           # 전단 (shear)
    horizontal_flip=True,      # 좌우 반전
    width_shift_range=0.1,     # 이동 범위 제한
    height_shift_range=0.1,    # 이동 범위 제한
    fill_mode='nearest'        # 채워질 영역 처리
)

# Load and preprocess images
train_fail_images = load_images_from_folder(train_fail_dir, 0)
train_pass_images = load_images_from_folder(train_pass_dir, 1)

# 데이터셋 결합
train_dataset = train_fail_images + train_pass_images
np.random.shuffle(train_dataset)

# 이미지와 레이블 분리
X_train, y_train = zip(*train_dataset)
X_train = np.array(X_train)
y_train = np.array(y_train)

# SMOTE를 적용하여 불균형 데이터 처리
X_train_flattened = X_train.reshape(len(X_train), -1)  # SMOTE 적용을 위해 Flatten
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_flattened, y_train)

# Reshape resampled data
X_train_resampled = X_train_resampled.reshape(-1, 128, 128, 3)
y_train_resampled = to_categorical(y_train_resampled)

# 클래스 가중치 계산
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# ResNet-50 모델 설정
def resnet50_model(input_shape):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(2, activation='softmax')(x)  # 출력층: 2개의 클래스
    model = Model(inputs=base_model.input, outputs=x)
    return model

# 모델 정의
model = resnet50_model(input_shape=(128, 128, 3))

# 모델 동결 (학습하지 않도록)
for layer in model.layers:
    layer.trainable = False

# 컴파일
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# 교차검증 설정
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold = 1

for train_index, val_index in kf.split(X_train_resampled):
    print(f"\nTraining fold {fold}...")
    X_train_fold, X_val_fold = X_train_resampled[train_index], X_train_resampled[val_index]
    y_train_fold, y_val_fold = y_train_resampled[train_index], y_train_resampled[val_index]

    # 데이터 증강 적용
    train_generator = datagen.flow(X_train_fold, y_train_fold, batch_size=32)
    val_generator = ImageDataGenerator().flow(X_val_fold, y_val_fold, batch_size=32)

    # 모델 학습
    history = model.fit(train_generator, epochs=10, validation_data=val_generator, class_weight=class_weight_dict)

    # 평가
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold)
    print(f"Fold {fold} Validation Accuracy: {val_accuracy * 100:.2f}%")

    # 임계값 조정 및 성능 평가
    y_val_pred = model.predict(X_val_fold)
    y_val_pred_classes = np.argmax(y_val_pred, axis=1)
    y_val_true = np.argmax(y_val_fold, axis=1)

    cm = confusion_matrix(y_val_true, y_val_pred_classes)
    f1 = f1_score(y_val_true, y_val_pred_classes)
    precision = precision_score(y_val_true, y_val_pred_classes)
    recall = recall_score(y_val_true, y_val_pred_classes)

    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.title(f'Confusion Matrix - Fold {fold}')
    plt.show()

    fold += 1

# 최종 모델 평가
test_fail_images = load_images_from_folder(test_fail_dir, 0)
test_pass_images = load_images_from_folder(test_pass_dir, 1)

test_dataset = test_fail_images + test_pass_images
X_test, y_test = zip(*test_dataset)
X_test = np.array(X_test)
y_test = np.array(y_test)

y_test = to_categorical(y_test)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
