In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# 데이터 로드
data = pd.read_csv(r'C:\Users\user\Desktop\DataSet\total.csv')
data = data.dropna()  # 데이터의 결측값을 판단

# Amplitude 열의 수를 확인하여 반복
amplitude_columns = [f'Amplitude_{i + 1}' for i in range(89)]

# 평균, 최대값, 최소값, 표준편차를 저장할 리스트
features = []
labels = []

# 전체 데이터에 대한 통계량 계산
for index, row in data.iterrows():
    # 현재 Amplitude 값들을 가져오기
    amplitudes = [row[col] for col in amplitude_columns]

    # 통계량 계산
    mean_amplitude = sum(amplitudes) / len(amplitudes)
    std_amplitude = pd.Series(amplitudes).std()
    max_amplitude = max(amplitudes)
    min_amplitude = min(amplitudes)

    # 현재 샘플의 TYPE
    material_type = row['TYPE']

    # 특징 벡터 생성
    feature_vector = {
        'mean': mean_amplitude,
        'std': std_amplitude,
        'max': max_amplitude,
        'min': min_amplitude,
        'label': 0 if material_type == 'HARD' else 1  # HARD는 0, SOFT는 1로 라벨링
    }

    features.append(feature_vector)

# DataFrame으로 변환
features_df = pd.DataFrame(features)

# 전처리된 특징을 CSV 파일로 저장
features_df.to_csv(r'C:\Users\user\Desktop\DataSet\processed_features.csv', index=False)

# 데이터 타입 확인
X = features_df.drop(columns=['label']).values
y = features_df['label'].astype(int).values

# CNN 입력 데이터 형태로 변환
X = X.reshape(X.shape[0], X.shape[1], 1)  # (샘플 수, 특성 수, 1)

# K-Fold Cross Validation 설정
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 모델 훈련 및 평가
all_train_loss = []
all_val_loss = []
all_train_accuracy = []
all_val_accuracy = []
all_cm = np.zeros((2, 2))

for fold, (train_index, val_index) in enumerate(kf.split(X)):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 모델 구축
    model = Sequential()
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))  # 입력층
    model.add(MaxPooling1D(pool_size=2))  # 풀링층
    model.add(Flatten())  # 평탄화
    model.add(Dense(32, activation='relu'))  # 은닉층
    model.add(Dense(1, activation='sigmoid'))  # 출력층 (이진 분류)

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # 모델 훈련
    history = model.fit(X_train, y_train, epochs=40, batch_size=16, validation_data=(X_val, y_val))

    # 모델 성능 평가
    loss, accuracy = model.evaluate(X_val, y_val)
    print(f'Fold {fold + 1} - Validation Loss: {loss}, Validation Accuracy: {accuracy}')

    # 혼동 행렬
    y_val_pred = (model.predict(X_val) >= 0.5).astype(int)
    cm = confusion_matrix(y_val, y_val_pred)
    all_cm += cm  # 혼동 행렬 합산

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['HARD', 'SOFT'], yticklabels=['HARD', 'SOFT'])
    plt.title(f'Confusion Matrix for Fold {fold + 1}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

    # 분류 리포트
    print(classification_report(y_val, y_val_pred, target_names=['HARD', 'SOFT']))

    # 손실과 정확도 저장
    all_train_loss.append(history.history['loss'])
    all_val_loss.append(history.history['val_loss'])
    all_train_accuracy.append(history.history['accuracy'])
    all_val_accuracy.append(history.history['val_accuracy'])

# 평균 손실과 정확도 계산
mean_train_loss = np.mean(all_train_loss, axis=0)
mean_val_loss = np.mean(all_val_loss, axis=0)
mean_train_accuracy = np.mean(all_train_accuracy, axis=0)
mean_val_accuracy = np.mean(all_val_accuracy, axis=0)

# 평균 러닝 커브 시각화
plt.figure(figsize=(12, 4))

# 손실 그래프
plt.subplot(1, 2, 1)
plt.plot(mean_train_loss, label='Mean Train Loss')
plt.plot(mean_val_loss, label='Mean Validation Loss')
plt.title('Mean Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# 정확도 그래프
plt.subplot(1, 2, 2)
plt.plot(mean_train_accuracy, label='Mean Train Accuracy')
plt.plot(mean_val_accuracy, label='Mean Validation Accuracy')
plt.title('Mean Accuracy per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

# 평균 혼동 행렬 시각화
average_cm = all_cm / 5  # 5개의 fold에 대한 평균 혼동 행렬
plt.figure(figsize=(8, 6))
sns.heatmap(average_cm, annot=True, fmt='.2f', cmap='Blues', xticklabels=['HARD', 'SOFT'], yticklabels=['HARD', 'SOFT'])
plt.title('Average Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
