In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, f1_score
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

def load_data(data_dir):
    images = []
    labels = []
    label_map = {'fail': 0, 'pass': 1}
    for label_str, label_num in label_map.items():
        class_dir = os.path.join(data_dir, label_str)
        print(f"Searching in directory: {class_dir}")
        for root, _, files in os.walk(class_dir):
            print(f"Current directory: {root}, Number of files: {len(files)}")
            for fname in files:
                img_path = os.path.join(root, fname)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):  # bmp 확장자 추가
                    try:
                        img = image.load_img(img_path, target_size=(150, 150))
                        img_array = image.img_to_array(img)
                        images.append(img_array)
                        labels.append(label_num)
                        print(f"Loaded: {img_path}")
                    except Exception as e:
                        print(f"Error loading {img_path}: {e}")
                else:
                    print(f"Skipped non-image file: {img_path}")
    return np.array(images), np.array(labels)

# 경로 설정
train_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/train'
test_dir = '/content/drive/MyDrive/data/태림산업 이미지셋/Processed_Data_TUBE/iteration_1/test'

# 데이터 로드
X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)



# 데이터 정규화
X_train = X_train / 255.0
X_test = X_test / 255.0

# SMOTE를 사용한 데이터 증강
X_train_flat = X_train.reshape((X_train.shape[0], -1))  # 2D로 변환
smote = SMOTE(random_state=42)
X_resampled_flat, y_resampled = smote.fit_resample(X_train_flat, y_train)
X_resampled = X_resampled_flat.reshape((-1, 150, 150, 3))  # 원래 이미지 형태로 복원

# CNN 모델 구성
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # 이진 분류를 위한 시그모이드 활성화 함수
])

# 모델 컴파일
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# 모델 요약 출력
model.summary()

from sklearn.utils.class_weight import compute_class_weight

# 현재 클래스가 0이 "fail"이고 1이 "pass"인 것으로 가정
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)

# "fail" 클래스에 더 높은 가중치를 주기 위해 수동으로 가중치 설정
class_weight_dict = {0: 2.0, 1: 1.0}  # "fail" 클래스에 가중치 2.0을 부여

# 모델 학습 (클래스 가중치 적용)
history = model.fit(
    X_resampled, y_resampled,
    batch_size=32,
    epochs=6,
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict  # 클래스 가중치 적용
)
# 모델 평가
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 혼동행렬 계산 및 시각화
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Fail', 'Pass'], yticklabels=['Fail', 'Pass'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# F1 스코어 계산
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.2f}")

# 정확도 출력
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

