In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install nptdms

Collecting nptdms
  Downloading nptdms-1.10.0.tar.gz (181 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/181.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━[0m [32m153.6/181.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.5/181.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: nptdms
  Building wheel for nptdms (pyproject.toml) ... [?25l[?25hdone
  Created wheel for nptdms: filename=nptdms-1.10.0-py3-none-any.whl size=108456 sha256=7225cab5b06df343f1b68f8fc0e04c35735db8149a65481adb3424cf3e37becd
  Stored in directory: /root/.cache/pip/wheels/1b/4b/17/21e8b03b37ea51ce7ec9f5570cdf0decca93f537d61c06880f
Successfully b

전처리 파이프라인

In [7]:
import numpy as np
import os
import pandas as pd
from scipy.fft import rfft
import pywt
from nptdms import TdmsFile
import joblib
from tensorflow.keras.models import load_model

In [5]:
def load_tdms_file(file_path):
    tdms_file = TdmsFile.read(file_path)

    group_name_vibration = tdms_file.groups()[0].name
    group_name_operation = tdms_file.groups()[1].name

    vib_channels = tdms_file[group_name_vibration].channels()
    vib_data = {ch.name.strip(): ch.data for ch in vib_channels}

    operation_channels = tdms_file[group_name_operation].channels()
    operation_data = {ch.name.strip(): ch.data for ch in operation_channels}

    return vib_data, operation_data

In [6]:
# 슬라이딩 윈도우 분할 (window_size 샘플, overlap 비율)
def sliding_window(data, window_size=25600, overlap=0.5):
    step = int(window_size * (1 - overlap))
    return np.array([
        data[start:start + window_size]
        for start in range(0, len(data) - window_size + 1, step)
    ])  # (윈도우 수, window_size, 채널 수)

# 1차원 신호에 대해 WPT+FFT top_k 에너지 특징 추출
def extract_wpt_fft_features(signal, wavelet='db4', level=3, top_k=10):
    wp = pywt.WaveletPacket(data=signal, wavelet=wavelet, mode='symmetric', maxlevel=level)
    nodes = [node.path for node in wp.get_level(level, 'freq')]
    features = []
    for node in nodes:
        coeffs = wp[node].data
        fft_vals = np.abs(rfft(coeffs))
        top_features = np.sort(fft_vals)[-top_k:]
        features.extend(top_features)
    return np.array(features)  # (노드 수 × top_k, )

# TDMS 파일 경로로부터 진동 데이터 읽어, 윈도우별 WPT+FFT 특징 추출 함수
def extract_vibration_array_with_features(file_path, window_size=25600, overlap=0.5, wavelet='db4', level=3, top_k=10):
    vib_data, _ = load_tdms_file(file_path)
    channels = ["CH1", "CH2", "CH3", "CH4"]
    vib_arrays = [vib_data[ch] for ch in channels if ch in vib_data]
    vib_matrix = np.vstack(vib_arrays).T  # (샘플 수, 4)

    windows = sliding_window(vib_matrix, window_size=window_size, overlap=overlap)  # (윈도우 수, window_size, 4)

    all_features = []
    for window in windows:
        window_features = []
        for ch_idx in range(window.shape[1]):
            signal = window[:, ch_idx]
            feat = extract_wpt_fft_features(signal, wavelet=wavelet, level=level, top_k=top_k)
            window_features.extend(feat)
        all_features.append(window_features)

    return np.array(all_features)  # (윈도우 수, 채널 수 * 특징 수)

def process_all_train_folders(base_path, folder_names):
    all_feature_rows = []

    for folder_name in folder_names:
        folder_path = os.path.join(base_path, folder_name)
        tdms_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.tdms')])

        for tdms_file in tdms_files:
            file_path = os.path.join(folder_path, tdms_file)
            features = extract_vibration_array_with_features(file_path)

            for i, feature in enumerate(features):
                all_feature_rows.append({
                    'file_name': tdms_file,
                    'window_index': i,
                    'features': feature,
                    'folder': folder_name
                })

    return pd.DataFrame(all_feature_rows)


# --- 설정값 ---
base_path = "/content/drive/MyDrive/KSPHM-data-challenge/Validation Set"
folder_names = [f"Validation{i}" for i in range(1, 7)]  # Train1 ~ Train8

# --- 특징 추출 수행 ---
features_df = process_all_train_folders(base_path, folder_names)

features_df['file_name'] = features_df['file_name'].str.replace(r'\.tdms$', '', regex=True)

print(f"총 윈도우 수: {len(features_df)}")
print(type(features_df['features'].iloc[0]))

총 윈도우 수: 5947
<class 'numpy.ndarray'>


모델 로드 및 예측 수행 후 평가 점수 출력

In [11]:
# 저장된 모델 로드
model_path = "/content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model.h5"
model = load_model(model_path, compile=False)
print("모델 로드 완료:", model_path)

# Validation 데이터셋 폴더 경로
validation_base_path = "/content/drive/MyDrive/KSPHM-data-challenge/Validation Set"
validation_folders = [f"Validation{i}" for i in range(1, 7)]

# Scaler 로드
X_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/X_scaler.pkl")
y_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/y_scaler.pkl")

# Validation 특징 추출
X_val = np.stack(features_df['features'].values)  # (samples, features)
X_val = X_val[..., np.newaxis]

# Train에서 사용했던 스케일러 사용
samples_val, feat_dim, channels = X_val.shape
X_val_reshaped = X_val.reshape(samples_val, feat_dim * channels)  # (samples, feat_dim)

X_val_scaled = X_scaler.transform(X_val_reshaped)  # Train에서 fit된 scaler로 transform만

# 모델 입력 형식으로 reshape
X_val_final = X_val_scaled.reshape(samples_val, feat_dim, channels)

# 모델 예측
y_val_pred_scaled = model.predict(X_val_final, verbose=1)

# 역스케일링
y_val_pred = y_scaler.inverse_transform(y_val_pred_scaled)

# 결과 저장
features_df['RUL_pred_sec'] = y_val_pred.flatten()

# 확인
print(features_df[['file_name', 'window_index', 'RUL_pred_sec']].head())

모델 로드 완료: /content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model.h5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step
                                           file_name  window_index  \
0  modified_KIMM Simulator_KIMM Bearing Test_2016...             0   
1  modified_KIMM Simulator_KIMM Bearing Test_2016...             1   
2  modified_KIMM Simulator_KIMM Bearing Test_2016...             2   
3  modified_KIMM Simulator_KIMM Bearing Test_2016...             3   
4  modified_KIMM Simulator_KIMM Bearing Test_2016...             4   

   RUL_pred_sec  
0  11948.326172  
1  10991.068359  
2  11946.134766  
3  12097.870117  
4  13621.589844  


In [12]:
# 각 Validation 폴더별 마지막 윈도우의 예측 RUL 선택
final_rul_scores = (
    features_df.groupby('folder')
    .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우
    .reset_index(drop=True)
)

# 원하는 형식으로 컬럼 구성
result_df = pd.DataFrame({
    'File': final_rul_scores['folder'],
    'RUL_Score(sec)': final_rul_scores['RUL_pred_sec']
})

# 엑셀 파일로 저장
output_path = "/content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores1.xlsx"
result_df.to_excel(output_path, index=False)

print("엑셀 저장 완료:", output_path)
print(result_df)

  .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우


엑셀 저장 완료: /content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores1.xlsx
          File  RUL_Score(sec)
0  Validation1    12023.895508
1  Validation2    71448.632812
2  Validation3    48502.957031
3  Validation4    73356.601562
4  Validation5    74936.945312
5  Validation6    29772.058594


평가 파이프라인

In [14]:
# 오차 계산
def calculate_error(ActRUL, PredRUL):
    error = 100 * (ActRUL - PredRUL) / ActRUL
    return error

# 정확도 점수 계산
def calculate_accuracy_score(error):
    ln_0_5 = np.log(0.5)
    score = np.where(
        error <= 0,
        np.exp(-ln_0_5 * error / 20),
        np.exp(+ln_0_5 * error / 20)
    )
    return score

# 최종 점수 계산
def calculate_final_score(accuracy_scores):
    return np.mean(accuracy_scores)

# 전체 평가 파이프라인
def evaluate_rul_prediction(ActRUL, PredRUL):
    error = calculate_error(ActRUL, PredRUL)
    accuracy_scores = calculate_accuracy_score(error)
    final_score = calculate_final_score(accuracy_scores)
    return {
        "Error": error,
        "Accuracy_scores": accuracy_scores,
        "Final_Score": final_score
    }

In [None]:
# 예측 결과 예시 (features_df에서 정리한 최종 결과 요약)
pred_df = pd.DataFrame({
    "File": ["Validation1", "Validation2", "Validation3", "Validation4", "Validation5", "Validation6"],
    "RUL_Score(sec)": [12023.895508, 71448.632812, 48502.957031, 73356.601562, 74936.945312, 29772.058594]
})

# 실제 RUL 값 (예시, 실제 값으로 바꿔야 함)
true_rul_dict = {
    "Validation1": 12423,
    "Validation2": 70200,
    "Validation3": 48800,
    "Validation4": 72000,
    "Validation5": 75500,
    "Validation6": 29500,
}

# 실제 RUL 추가
pred_df["Actual_RUL"] = pred_df["File"].map(true_rul_dict)

# 오차 및 정확도 점수 계산
pred_df["Error(%)"] = calculate_error(pred_df["Actual_RUL"], pred_df["RUL_Score(sec)"])
pred_df["Accuracy_Score"] = calculate_accuracy_score(pred_df["Error(%)"])

# 최종 평균 점수 계산
final_score = calculate_final_score(pred_df["Accuracy_Score"])

# 결과 출력
print(pred_df)
print("\n📊 Final Accuracy Score:", round(final_score, 4))
