In [1]:
import librosa
import pandas as pd
import numpy as np
import random as pyrandom
import tensorflow as tf
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm
from sklearn.metrics import *
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import RobustScaler
from tensorflow.keras import *


# 설정
# 데이터셋 파일
FILE_PATH = "all_dataset.csv"

# 증강 배수 설정
NORMAL_AUG = 4
FALLING_AUG = 6

# 학습 관련 설정
EARLY_STOPPING_PATIENCE = 50
EPOCHS= 50
BATCH_SIZE= 32
LEARNING_RATE = 0.0001

# 기타 하이퍼파라미터
THRESHOLD = 0.5  # 예측시 확률 임계값


# 모델
def build_model():
    model = models.Sequential([
        layers.InputLayer(shape=(250, 13)),
        layers.Conv1D(32, kernel_size=20, padding="same", activation="relu"),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=10, padding="same", activation="relu"),
        layers.MaxPooling1D(pool_size=2),
        layers.Dropout(0.2),
        layers.Bidirectional(layers.LSTM(64, return_sequences=False)),
        layers.Dropout(0.2),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.4),
        layers.Dense(1, activation="sigmoid")
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),
                  loss="binary_crossentropy",
                  metrics=["accuracy", "AUC"])

    return model

In [2]:
# @title 각종 함수들(이것도 꼭 실행해줘야함!)
def load_dataset(file):
    df = pd.read_csv(file)
    df["label"] = df["label"].apply(lambda x: 1 if x == 1 else 0)

    df = df.sample(frac=1).reset_index(drop=True)

    feature_cols = [col for col in df.columns if col.startswith("v")]

    X_all = df[feature_cols].values
    y_all = df["label"].values

    return X_all, y_all

def split_balanced_val(X, y, val_ratio=0.1):
    idx_normal = np.where(y == 0)[0]
    idx_falling = np.where(y == 1)[0]

    np.random.shuffle(idx_normal)
    np.random.shuffle(idx_falling)

    val_len = int(len(y) * val_ratio)

    n = val_len // 2

    val_idx = np.concatenate((idx_normal[:n], idx_falling[:n]))
    train_idx = np.concatenate((idx_normal[n:], idx_falling[n:]))

    np.random.shuffle(val_idx)
    np.random.shuffle(train_idx)

    return X[train_idx], y[train_idx], X[val_idx], y[val_idx]

# 센서 값 위치를 정하면 정해진 윈도우 크기의 앞 센서값을 불러오는 함수
def get_window(v, size, end):
    return v[end - size + 1 : end + 1]

# F0: V(t) - median (window: 50)
def calculate_Detrend(v):
    output = np.zeros(250)

    for t in range(250):
       t_in = t + 50
       window = get_window(v, 50, t_in)

       output[t] = v[t_in] - np.median(window)

    return output

# F1: MovingMAD (window: 50)
def calculate_MovingMAD(v):
    output = np.zeros(250)

    for t in range(250):
        t_in = t + 50
        window = get_window(v, 50, t_in)

        # MAD 계산
        abs_deviations = np.abs(window - np.median(window))
        mad = np.median(abs_deviations)

        output[t] = mad

    return output

# F2: MovingKurtosis (window: 50)
def calculate_MovingKurtosis(v):
    output = np.zeros(250)

    for t in range(250):
        t_in = t + 50
        window = get_window(v, 50, t_in)

        # Kurtosis 계산
        kurtosis = np.sum((((window - np.mean(window)) / np.std(window))**4) / len(window)) - 3

        output[t] = kurtosis

    return output

# F3: 미분값(기울기)
def calculate_Gradient(v):
    output = np.asarray(v, dtype=float)
    output = output[50:300] - output[49:299]

    return output

# F4: 창적분값 (window: 15)
def calculate_Integral(v):
    output = np.asarray(v, dtype=float)
    c = np.cumsum(output)
    sum = c[50:300] - c[35:285]

    return sum

# F5~F12: STFT (window: 50)
def calculate_STFT(v):
    n_fft = 50
    STFT_BANDS = [(1, 2), (2, 3), (3, 4), (4, 5),
                  (5, 6), (6, 7), (7, 8), (8, 15)]

    stft = librosa.stft(y=v.astype(float), n_fft=n_fft, win_length=50, hop_length=1, center=False)
    mag = np.abs(stft)
    mag = mag[:, 1:251]

    # 라이브러리에서 나온 각 밴드가 실제 출력 밴드 어디에 해당하는지 계산
    fft_freqs = librosa.fft_frequencies(sr=50, n_fft=n_fft)
    band_indices_list = []
    for f_min, f_max in STFT_BANDS:
        indices = np.where((fft_freqs >= f_min) & (fft_freqs < f_max))[0]
        band_indices_list.append(indices)

    output_map = np.zeros((250, len(STFT_BANDS)), dtype=np.float32)

    for i, band_indices in enumerate(band_indices_list):
        if len(band_indices) == 0:
            continue
        band_energy = mag[band_indices, :].sum(axis=0)
        output_map[:, i] = band_energy

    return output_map

# (300,) 의 데이터를 받고 (250, 13) 데이터로 바꾸는 처리 함수
def preprocess_data(v):
    feature_map = np.zeros((250, 13))

    feature_map[:, 0] = calculate_Detrend(v)
    feature_map[:, 1] = calculate_MovingMAD(v)
    feature_map[:, 2] = calculate_MovingKurtosis(v)
    feature_map[:, 3] = calculate_Gradient(v)
    feature_map[:, 4] = calculate_Integral(v)
    feature_map[:, 5:] = calculate_STFT(v)

    return feature_map

def create_feature(dataset):
    n_data = dataset.shape[0]

    X_processed = np.zeros((n_data, 250, 13))  # (N, 250, 13)의 빈 3D 텐서 생성
    for i in range(n_data):
        v = dataset[i, :]
        X_processed[i, :, :] = preprocess_data(v)

    return X_processed

def scale(X_train, X_val, X_test):
    scaler = RobustScaler()

    train_shape = X_train.shape
    val_shape = X_val.shape
    test_shape = X_test.shape

    X_train_flat = X_train.reshape(-1, 13)
    X_val_flat = X_val.reshape(-1, 13)
    X_test_flat = X_test.reshape(-1, 13)

    scaler.fit(X_train_flat)

    X_train_scaled = scaler.transform(X_train_flat).reshape(train_shape)
    X_val_scaled = scaler.transform(X_val_flat).reshape(val_shape)
    X_test_scaled = scaler.transform(X_test_flat).reshape(test_shape)

    return X_train_scaled, X_val_scaled, X_test_scaled, scaler

def make_spike_noise(v_data):
    v_new = np.array(v_data, dtype=int)
    data_len = len(v_new)

    # 무작위 위치 선택
    duration = pyrandom.randint(1, 3)
    start_idx = pyrandom.randint(0, data_len - duration)
    end_idx = start_idx + duration

    if pyrandom.random() < 0.5:  # 하한 스파이크
        value = pyrandom.randint(1, 10)
    else:  # 상한 스파이크
        value = pyrandom.randint(2900,3000)


    v_new[start_idx:end_idx] = value

    return v_new

def make_gaussian_noise(v_data):
    v_new = np.array(v_data, dtype=float)

    # 가우시안 노이즈 생성
    noise = np.random.normal(scale=10.0, size=v_new.shape)
    v_new = v_new + noise

    # 데이터 보정(최대/최소, 자료형)
    v_new = np.clip(v_new, 1.0, 3000.0)
    v_new = v_new.astype(int)

    return v_new

def make_time_warping(v_data):
    v_new = np.array(v_data, dtype=float)
    data_len = len(v_new)

    # 배속 설정
    scale = pyrandom.uniform(0.8, 1.2)
    new_len = int(data_len * scale)

    x_axis = np.arange(data_len)
    x_axis_new = np.linspace(0, data_len - 1, new_len)
    v_scaled = np.interp(x_axis_new, x_axis, v_new)

    v_final = np.zeros(data_len, dtype=float)

    if new_len > data_len:
        cut = new_len - data_len
        start_idx = cut // 2
        end_idx = start_idx + data_len

        v_final = v_scaled[start_idx:end_idx]
    else:
        padding = data_len - new_len
        padding_left = padding // 2
        padding_right = padding - padding_left

        v_final[padding_left:padding_left+new_len] = v_scaled
        v_final[:padding_left] = v_scaled[0]
        v_final[padding_left+new_len:] = v_scaled[-1]

    v_final = v_final.astype(int)

    return v_final

def make_time_shift(v_data):
    v_new = np.array(v_data, dtype=int)

    shift = pyrandom.randint(-10, 10)

    if shift == 0:
        return v_new

    v_final = np.zeros_like(v_new, dtype=int)

    if shift > 0:
        v_final[shift:] = v_new[:-shift]
        v_final[:shift] = v_new[0]
    else:
        v_final[:shift] = v_new[-shift:]
        v_final[shift:] = v_new[-1]

    return v_final

def make_scale_different(v_data):
    v_new = np.array(v_data, dtype=float)

    mean = np.mean(v_data)
    v_new = v_new - mean

    distance_new = pyrandom.uniform(1, 2.5)
    scale_factor = (1.5 / distance_new)**2
    v_new = v_new * scale_factor

    v_new = v_new + mean
    v_new = np.clip(v_new, 1.0, pyrandom.uniform(2950,3000))
    v_new = v_new.astype(int)

    return v_new

def augment(X, factor, label):
    n_to_add = len(X) * (factor - 1)
    X_augmented = []

    aug_functions = [make_spike_noise, make_gaussian_noise, make_time_warping, make_time_shift, make_scale_different]

    for _ in range(n_to_add):
        sample_data = X[pyrandom.randint(0, len(X) - 1)].copy()

        n_to_augment = pyrandom.randint(1, 3)
        selected_functions = pyrandom.sample(aug_functions, n_to_augment)

        for func in selected_functions:
            sample_data = func(sample_data)

        X_augmented.append(sample_data)

    X_augmented = np.array(X_augmented)
    y_augmented = np.full(len(X_augmented), label, dtype=int)

    return X_augmented, y_augmented

def augment_dataset(X, y, normal_aug, falling_aug):
    # 클래스별로 데이터 분리
    X_normal = X[y == 0]
    X_falling = X[y == 1]

    X_augmented = [X]
    y_augmented = [y]

    # 증강
    X_normal_augmented, y_normal_augmented = augment(X_normal, normal_aug, 0)
    X_augmented.append(X_normal_augmented)
    y_augmented.append(y_normal_augmented)

    X_falling_augmented, y_falling_augmented = augment(X_falling, falling_aug, 1)
    X_augmented.append(X_falling_augmented)
    y_augmented.append(y_falling_augmented)

    # 증강된 데이터셋 정리 및 셔플
    X_final = np.vstack(X_augmented)
    y_final = np.concatenate(y_augmented)

    rand_idx = np.random.permutation(len(X_final))
    X_final = X_final[rand_idx]
    y_final = y_final[rand_idx]

    return X_final, y_final

In [3]:
# 메인 코드
# 데이터셋 로드
X_all, y_all = load_dataset(FILE_PATH)

# 5겹 K-폴드 준비
k_fold = StratifiedKFold(n_splits=5, shuffle=True)

# 결과 저장용
history_results = []
metrics_results = {"acc": [], "auc": [], "recall": [], "f1": []}
cm_results = np.zeros((2, 2))  # 혼동행렬

# K-Fold 시작
print("\n5-Fold 검증 시작...")

for fold, (train_idx, test_idx) in tqdm(enumerate(k_fold.split(X_all, y_all))):
    print(f"\n========== Fold {fold + 1} / 5 ==========")

    # 데이터셋 분할
    X_train = X_all[train_idx]
    y_train = y_all[train_idx]
    X_test = X_all[test_idx]
    y_test = y_all[test_idx]

    X_train, y_train, X_val, y_val = split_balanced_val(X_train, y_train, 0.1)
    print(f"  - Split 결과:")
    print(f"    > Train: {len(y_train)}")
    print(f"    > Val: {len(y_val)}")
    print(f"    > Test: {len(y_test)}")

    # Train 데이터 증강
    print("  - Train 데이터 증강 중...")
    X_train_aug, y_train_aug = augment_dataset(X_train, y_train, NORMAL_AUG, FALLING_AUG)

    # 데이터 전처리 및 스케일링
    X_train_feature = create_feature(X_train_aug)
    X_val_feature = create_feature(X_val)
    X_test_feature = create_feature(X_test)

    X_train_scaled, X_val_scaled, X_test_scaled, scaler = scale(X_train_feature, X_val_feature, X_test_feature)

    print("  - 모델 학습 시작...")
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_AUC', mode='max', patience=EARLY_STOPPING_PATIENCE,
                                                  restore_best_weights=True, verbose=0)
    model = build_model()
    history = model.fit(
        X_train_scaled, y_train_aug,
        validation_data=(X_val_scaled, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stop],
        verbose=0
    )

    # 모델 평가
    history_results.append(history.history)

    y_pred_prob = model.predict(X_test_scaled, verbose=0).ravel()
    y_pred = (y_pred_prob >= THRESHOLD).astype(int)

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_prob)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    metrics_results["acc"].append(acc)
    metrics_results["auc"].append(auc)
    metrics_results["recall"].append(recall)
    metrics_results["f1"].append(f1)
    cm_results += confusion_matrix(y_test, y_pred)

    print(f"  >> Fold {fold + 1}: ACC={acc:.4f}, AUC={auc:.4f}\n")

# 데이터 시각화 부분은 AI의 도움을 받았습니다
min_epochs = min([len(h['loss']) for h in history_results])
epochs_range = range(1, min_epochs + 1)

avg_loss = np.mean([h['loss'][:min_epochs] for h in history_results], axis=0)
avg_val_loss = np.mean([h['val_loss'][:min_epochs] for h in history_results], axis=0)
avg_acc = np.mean([h['accuracy'][:min_epochs] for h in history_results], axis=0)
avg_val_acc = np.mean([h['val_accuracy'][:min_epochs] for h in history_results], axis=0)
avg_auc = np.mean([h['AUC'][:min_epochs] for h in history_results], axis=0)
avg_val_auc = np.mean([h['val_AUC'][:min_epochs] for h in history_results], axis=0)

avg_cm = cm_results / 5.0
labels = ['Accuracy', 'AUC', 'Recall', 'F1-Score']
means = [np.mean(metrics_results[k]) for k in ['acc', 'auc', 'recall', 'f1']]
stds = [np.std(metrics_results[k]) for k in ['acc', 'auc', 'recall', 'f1']]

# 2. ★ 화면 레이아웃 잡기 (여기가 핵심!) ★
fig = plt.figure(figsize=(18, 12))  # 전체 창 크기 넉넉하게
gs = fig.add_gridspec(2, 6)  # 2행 6열로 바둑판 쪼개기

# --- [상단 1행] 3개 그래프 (각각 2칸씩 차지) ---
ax1 = fig.add_subplot(gs[0, 0:2])  # 0~2칸
ax1.plot(epochs_range, avg_loss, 'b-', label='Train')
ax1.plot(epochs_range, avg_val_loss, 'r--', label='Val')
ax1.set_title('Average Loss')
ax1.legend();
ax1.grid(True, alpha=0.3)

ax2 = fig.add_subplot(gs[0, 2:4])  # 2~4칸
ax2.plot(epochs_range, avg_acc, 'b-', label='Train')
ax2.plot(epochs_range, avg_val_acc, 'r--', label='Val')
ax2.set_title('Average Accuracy')
ax2.legend();
ax2.grid(True, alpha=0.3)

ax3 = fig.add_subplot(gs[0, 4:6])  # 4~6칸
ax3.plot(epochs_range, avg_auc, 'b-', label='Train')
ax3.plot(epochs_range, avg_val_auc, 'r--', label='Val')
ax3.set_title('Average AUC')
ax3.legend();
ax3.grid(True, alpha=0.3)

# --- [하단 2행] 2개 그래프 (각각 3칸씩 차지 -> 큼직하게!) ---
# 1. 혼동 행렬 (왼쪽 절반)
ax4 = fig.add_subplot(gs[1, 0:3])  # 0~3칸
sns.heatmap(avg_cm, annot=True, fmt='.1f', cmap='Purples', ax=ax4, annot_kws={"size": 14},  # 글자도 좀 키움
            xticklabels=['Pred Normal', 'Pred Fall'],
            yticklabels=['Act Normal', 'Act Fall'])
ax4.set_title('Average Confusion Matrix', fontsize=14)
ax4.set_ylabel('True Label')
ax4.set_xlabel('Predicted Label')

# 2. 성능 지표 막대 (오른쪽 절반)
ax5 = fig.add_subplot(gs[1, 3:6])  # 3~6칸
bars = ax5.bar(labels, means, yerr=stds, capsize=5, color='skyblue', edgecolor='black')
for bar in bars:
    height = bar.get_height()
    ax5.text(bar.get_x() + bar.get_width() / 2., height + 0.01,
             f'{height:.3f}', ha='center', va='bottom', fontsize=12, fontweight='bold')
ax5.set_title('Final Performance Metrics', fontsize=14)
ax5.set_ylim(0, 1.15)
ax5.grid(axis='y', linestyle='--', alpha=0.5)


5-Fold 검증 시작...


0it [00:00, ?it/s]


  - Split 결과:
    > Train: 440
    > Val: 48
    > Test: 122
  - Train 데이터 증강 중...
  - 모델 학습 시작...


0it [01:22, ?it/s]


KeyboardInterrupt: 