In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, f1_score
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

def load_data(data_dir):
    images = []
    labels = []
    label_map = {'fail': 0, 'pass': 1}
    for label_str, label_num in label_map.items():
        class_dir = os.path.join(data_dir, label_str)
        print(f"Searching in directory: {class_dir}")
        for root, _, files in os.walk(class_dir):
            for fname in files:
                img_path = os.path.join(root, fname)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                    try:
                        img = image.load_img(img_path, target_size=(150, 150))
                        img_array = image.img_to_array(img)
                        images.append(img_array)
                        labels.append(label_num)
                    except Exception as e:
                        print(f"Error loading {img_path}: {e}")
    return np.array(images), np.array(labels)

def focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.convert_to_tensor(y_true, tf.float32)

        y_pred = tf.convert_to_tensor(y_pred, tf.float32)
        y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
        cross_entropy = -y_true * tf.math.log(y_pred)
        weight = alpha * tf.pow((1 - y_pred), gamma)
        loss = weight * cross_entropy
        return tf.reduce_mean(loss)
    return focal_loss_fixed

# 경로 설정
train_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train'
test_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test'

# 데이터 로드
X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)

# 데이터 정규화
X_train = X_train / 255.0
X_test = X_test / 255.0

# SMOTE를 사용한 데이터 증강
X_train_flat = X_train.reshape((X_train.shape[0], -1))
smote = SMOTE(random_state=42)
X_resampled_flat, y_resampled = smote.fit_resample(X_train_flat, y_train)
X_resampled = X_resampled_flat.reshape((-1, 150, 150, 3))

# CNN 모델 구성
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# 모델 컴파일: 직접 구현한 Focal Loss 사용
model.compile(
    loss=focal_loss(gamma=2.0, alpha=0.25),
    optimizer='adam',
    metrics=['accuracy']
)

# 모델 요약 출력
model.summary()

# 클래스 가중치 설정
class_weight = {0: 3.0, 1: 1.0}  # "fail" 클래스에 더 높은 가중치 부여

# 모델 학습
history = model.fit(
    X_resampled, y_resampled,
    batch_size=32,
    epochs=60,  # 에포크 수 증가
    validation_data=(X_test, y_test),
    class_weight=class_weight  # 클래스 가중치 적용
)

# 모델 평가
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 혼동행렬 계산 및 시각화
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# F1 스코어 계산
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.2f}")

# 정확도 출력
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
