In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# 경로 설정
train_fail_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train/fail'
train_pass_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train/pass'
test_fail_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test/fail'
test_pass_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test/pass'

# 이미지 로드 및 레이블 생성
def load_images_from_folder(folder, label):
    images = []
    for filename in os.listdir(folder):
        img = tf.keras.preprocessing.image.load_img(os.path.join(folder, filename), target_size=(128, 128))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        images.append((img_array, label))
    return images

# 훈련 데이터 준비
train_fail_images = load_images_from_folder(train_fail_dir, 0)  # 0: fail
train_pass_images = load_images_from_folder(train_pass_dir, 1)  # 1: pass
train_dataset = train_fail_images + train_pass_images
np.random.shuffle(train_dataset)
X, y = zip(*train_dataset)
X = np.array(X)
y = np.array(y)

# 테스트 데이터 준비
test_fail_images = load_images_from_folder(test_fail_dir, 0)  # 0: fail
test_pass_images = load_images_from_folder(test_pass_dir, 1)  # 1: pass
test_dataset = test_fail_images + test_pass_images
X_test, y_test = zip(*test_dataset)
X_test = np.array(X_test)
y_test = to_categorical(np.array(y_test))

# 클래스 가중치 계산
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(enumerate(class_weights))

# 모델 정의 함수 (초기화하지 않고 이어서 학습 가능하게)
def create_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# 모델 초기화
model = create_model()

# KFold 교차검증 설정
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1
acc_per_fold = []
loss_per_fold = []

# 교차검증 루프
for train_index, val_index in kf.split(X):
    # 훈련 및 검증 데이터 분리
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    y_train = to_categorical(y_train)
    y_val = to_categorical(y_val)

    # 모델 학습 (기존 가중치 유지)
    print(f"Training for fold {fold_no} ...")
    history = model.fit(X_train, y_train,
                        epochs=5,  # 덧씌울 학습이므로 에포크 수는 적절히 조절
                        batch_size=32,
                        validation_data=(X_val, y_val),
                        class_weight=class_weight_dict,
                        verbose=1)

    # 평가 저장
    scores = model.evaluate(X_val, y_val, verbose=0)
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    print(f"Fold {fold_no} - Loss: {scores[0]} - Accuracy: {scores[1] * 100}%")
    fold_no += 1

# 교차검증 후 테스트 데이터 평가
print("\nEvaluating on test set after cross-validation...")
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Overall Test accuracy: {test_accuracy * 100:.2f}%")

# 테스트 데이터에 대한 예측 및 임계값 조정 후 혼동 행렬 생성
y_pred_probs = model.predict(X_test)
threshold = 0.35
y_pred_classes = (y_pred_probs[:, 0] > threshold).astype(int)  # 0.4를 fail 클래스 기준으로 설정
y_true = np.argmax(y_test, axis=1)

# 혼동 행렬 계산 및 시각화
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix on Test Data')
plt.show()

# 교차검증 결과 출력
print("\nAverage accuracy across folds:", np.mean(acc_per_fold))
print("Average loss across folds:", np.mean(loss_per_fold))
