In [None]:
import os
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# 경로 설정
train_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_SHAFT/iteration_1/train'
test_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_SHAFT/iteration_1/Test'
img_height, img_width = 224, 224  # VGG-16의 입력 크기에 맞춤
batch_size = 16

# `train` 폴더 내에서 `pass`와 `augmentationfail` 폴더만 사용
train_datagen = ImageDataGenerator(rescale=1.0/255)
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    classes=['pass', 'augmentationfail'],  # 'fail' 폴더 제외
    shuffle=True
)

# VGG-16 모델 로드 및 커스텀 분류기 추가
def create_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # 이진 분류용 출력층
    ])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1
accuracies = []

# K-Fold Cross Validation
for train_idx, val_idx in kfold.split(train_generator.filenames):
    print(f"Training fold {fold_no}...")

    # 훈련 데이터와 검증 데이터를 나누기
    train_data = [train_generator.filenames[i] for i in train_idx]
    val_data = [train_generator.filenames[i] for i in val_idx]

    # ImageDataGenerator를 통해 훈련과 검증 데이터를 설정
    train_gen = train_datagen.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary',
        classes=['pass', 'augmentationfail'],  # 'fail' 폴더 제외
        shuffle=True
    )

    val_gen = train_datagen.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary',
        classes=['pass', 'augmentationfail'],  # 'fail' 폴더 제외
        shuffle=False
    )

    # 모델 생성
    model = create_model()

    # 모델 학습
    model.fit(train_gen, epochs=10, validation_data=val_gen)

    # 모델 평가
    val_loss, val_accuracy = model.evaluate(val_gen)
    print(f"Validation accuracy for fold {fold_no}: {val_accuracy:.4f}")
    accuracies.append(val_accuracy)

    fold_no += 1

# 평균 정확도 출력
print(f"Average accuracy: {np.mean(accuracies):.4f}")

# 테스트 데이터 평가
test_datagen = ImageDataGenerator(rescale=1.0/255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=1,
    class_mode='binary',
    shuffle=False
)

# 최종 모델을 테스트 데이터에서 평가
predictions = model.predict(test_generator)
predicted_classes = (predictions > 0.5).astype("int32").flatten()
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# 혼돈 행렬 계산 및 시각화
conf_matrix = confusion_matrix(true_classes, predicted_classes)
print("\nConfusion Matrix:")
print(conf_matrix)

plt.figure(figsize=(6, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# 분류 리포트 출력
print("\nClassification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_labels))
