In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install nptdms

Collecting nptdms
  Downloading nptdms-1.10.0.tar.gz (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.5/181.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: nptdms
  Building wheel for nptdms (pyproject.toml) ... [?25l[?25hdone
  Created wheel for nptdms: filename=nptdms-1.10.0-py3-none-any.whl size=108456 sha256=01694b188df59e9cd6f64bf912c220e6c85f82d8a12e34f2e9154237aac31bcb
  Stored in directory: /root/.cache/pip/wheels/1b/4b/17/21e8b03b37ea51ce7ec9f5570cdf0decca93f537d61c06880f
Successfully built nptdms
Installing collected packages: nptdms
Successfully installed nptdms-1.10.0


전처리 파이프라인

In [3]:
import numpy as np
import os
import pandas as pd
from scipy.fft import rfft
import pywt
from nptdms import TdmsFile
import joblib
from tensorflow.keras.models import load_model

from tensorflow.keras import regularizers
from tqdm import tqdm

In [4]:
def load_tdms_file(file_path):
    tdms_file = TdmsFile.read(file_path)

    group_name_vibration = tdms_file.groups()[0].name
    group_name_operation = tdms_file.groups()[1].name

    vib_channels = tdms_file[group_name_vibration].channels()
    vib_data = {ch.name.strip(): ch.data for ch in vib_channels}

    operation_channels = tdms_file[group_name_operation].channels()
    operation_data = {ch.name.strip(): ch.data for ch in operation_channels}

    return vib_data, operation_data

operation

In [5]:
# tdms 파일에서 operation data 불러오기
def load_summary_from_tdms(train_root, target_folders):
    summary = {}

    for folder in target_folders:
        folder_path = os.path.join(train_root, folder)
        tdms_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".tdms")])

        records = []

        for idx, tdms_file in enumerate(tdms_files):
            file_path = os.path.join(folder_path, tdms_file)
            _, operation_data = load_tdms_file(file_path)

            time_sec = idx * 600  # 10분 간격

            record = {
                "time_sec": time_sec,
                "TC SP Front[℃]": operation_data["TC SP Front[℃]"][0],
                "TC SP Rear[℃]": operation_data["TC SP Rear[℃]"][0],
                "Torque[Nm]": operation_data["Torque[Nm]"][0],
                "file_name": os.path.splitext(tdms_file)[0],  # 확장자 제거
                "folder": folder
            }
            records.append(record)

        df = pd.DataFrame(records)
        summary[folder] = df

    return summary

# slope 계산
def compute_slope_feature(df, column_name, N, interval_sec=600):
    """
    주어진 데이터프레임에서 column_name에 대한 slope 피처 계산
    """
    slope_col = f"{column_name}_Slope_{N}cycle"
    slopes = [np.nan] * len(df)
    for i in range(N, len(df)):
        delta_y = df[column_name].iloc[i] - df[column_name].iloc[i - N]
        delta_x = df["time_sec"].iloc[i] - df["time_sec"].iloc[i - N]
        slopes[i] = delta_y / delta_x
    df[slope_col] = slopes
    return df

# TC_slope x Torque_slope
def compute_combined_slope(df, temp_col, torque_col, N):
    """
    온도 slope와 토크 slope의 곱 (복합 지표)
    """
    temp_slope_col = f"{temp_col}_Slope_{N}cycle"
    torque_slope_col = f"{torque_col}_Slope_{N}cycle"
    combined_col = f"Combined_Slope_{N}cycle"
    if temp_slope_col in df.columns and torque_slope_col in df.columns:
        df[combined_col] = df[temp_slope_col] * df[torque_slope_col]
    return df

def apply_slope_features(summary, estimation_targets, N_list=[3, 5], interval_sec=600):
    """
    모든 summary 데이터프레임에 대해 slope 및 combined slope 피처 추가
    """
    for train_id, df in summary.items():
        front_col, rear_col = estimation_targets.get(train_id, ("TC SP Front[℃]", "TC SP Rear[℃]"))
        df = df.copy()
        for N in N_list:
            df = compute_slope_feature(df, "Torque[Nm]", N, interval_sec)
            df = compute_slope_feature(df, front_col, N, interval_sec)
            df = compute_slope_feature(df, rear_col, N, interval_sec)
            df = compute_combined_slope(df, front_col, "Torque[Nm]", N)
            df = compute_combined_slope(df, rear_col, "Torque[Nm]", N)

        df = df.dropna().reset_index(drop=True)  # NaN 행 제거
        summary[train_id] = df
    return summary

In [6]:
# vibration data와 합치기 위해 펼치기
def expand_summary_to_windows(summary_df, window_size=25600, overlap=0.5, fixed_total_samples=256000):
    step = int(window_size * (1 - overlap))
    num_windows = (fixed_total_samples - window_size) // step + 1
    expanded_rows = []

    for idx, row in summary_df.iterrows():
        file_name = row.get("file_name", None)
        folder = row.get("folder", None)

        if file_name is None:
            raise ValueError("summary_df에 'file_name' 컬럼이 없습니다.")
        if folder is None:
            raise ValueError("summary_df에 'folder' 컬럼이 없습니다.")

        for w in range(num_windows):
            new_row = row.copy()
            new_row["window_index"] = w
            new_row["file_name"] = file_name
            new_row["folder"] = folder
            expanded_rows.append(new_row)

    expanded_df = pd.DataFrame(expanded_rows)
    return expanded_df

In [7]:
base_path = "/content/drive/MyDrive/KSPHM-data-challenge/Validation Set"
folder_names = [f"Validation{i}" for i in range(1, 7)]  # Train1 ~ Train8
targets = {folder: ("TC SP Front[℃]", "TC SP Rear[℃]") for folder in folder_names}

summary = load_summary_from_tdms(base_path, folder_names)
operation_summary = apply_slope_features(summary, targets, N_list=[3, 5], interval_sec=600)

# summary는 dict이므로 하나의 DataFrame으로 병합
summary_df = pd.concat(
    [df.assign(folder=train_id) for train_id, df in operation_summary.items()],
    ignore_index=True
)

# 윈도우 단위로 확장
summary_expanded = expand_summary_to_windows(summary_df)

In [8]:
print("summary_expanded columns:", summary_expanded.columns.tolist())

summary_expanded columns: ['time_sec', 'TC SP Front[℃]', 'TC SP Rear[℃]', 'Torque[Nm]', 'file_name', 'folder', 'Torque[Nm]_Slope_3cycle', 'TC SP Front[℃]_Slope_3cycle', 'TC SP Rear[℃]_Slope_3cycle', 'Combined_Slope_3cycle', 'Torque[Nm]_Slope_5cycle', 'TC SP Front[℃]_Slope_5cycle', 'TC SP Rear[℃]_Slope_5cycle', 'Combined_Slope_5cycle', 'window_index']


vibration

In [9]:
# 고장 주파수 정의 (단위: Hz)
FAULT_FREQUENCIES = {
    "BPFI": 140,
    "BPFO": 93,
    "BSF": 78,
    "Cage": 6.7
}

# 슬라이딩 윈도우 생성
# (window_size 샘플, overlap 비율)
def sliding_window(data, window_size=25600, overlap=0.5):
    step = int(window_size * (1 - overlap))
    return np.array([
        data[start:start + window_size]
        for start in range(0, len(data) - window_size + 1, step)
    ])  # (윈도우 수, window_size, 채널 수)

# WPT+FFT 특징 추출 함수
def extract_wpt_fft_features(signal, wavelet='db4', level=3, top_k=10):
    wp = pywt.WaveletPacket(data=signal, wavelet=wavelet, mode='symmetric', maxlevel=level)
    nodes = [node.path for node in wp.get_level(level, 'freq')]
    features = []
    for node in nodes:
        coeffs = wp[node].data
        fft_vals = np.abs(rfft(coeffs))
        top_features = np.sort(fft_vals)[-top_k:]
        features.extend(top_features)
    return np.array(features)  # (노드 수 × top_k, )

# TDMS 파일 경로로부터 진동 데이터 읽어, 윈도우별 WPT+FFT 특징 추출 함수
def extract_vibration_array_with_features(file_path, window_size=25600, overlap=0.5, wavelet='db4', level=3, top_k=10):
    vib_data, _ = load_tdms_file(file_path)
    channels = ["CH1", "CH2", "CH3", "CH4"]
    vib_arrays = [vib_data[ch] for ch in channels if ch in vib_data]
    vib_matrix = np.vstack(vib_arrays).T  # (샘플 수, 4)

    windows = sliding_window(vib_matrix, window_size=window_size, overlap=overlap)  # (윈도우 수, window_size, 4)
    all_features = []

    for window in windows:
        window_features = []
        for ch_idx in range(window.shape[1]):
            signal = window[:, ch_idx]
            feat = extract_wpt_fft_features(signal, wavelet=wavelet, level=level, top_k=top_k)
            window_features.extend(feat)
        all_features.append(window_features)

    return np.array(all_features)  # (윈도우 수, 채널 수 * 특징 수)

# 고장 주파수 기반 FFT 특징 추출
# 단일 채널 고장 주파수 특징 추출(FFT)
def extract_fault_frequency_features(signal, Fs, fault_freqs=FAULT_FREQUENCIES, bandwidth=5):
    N = len(signal)
    freqs = np.fft.fftfreq(N, d=1/Fs)[:N//2]
    fft_magnitude = np.abs(np.fft.fft(signal))[:N//2]

    features = {}

    for fault_name, f_center in fault_freqs.items():
        lower = f_center - bandwidth
        upper = f_center + bandwidth
        band_mask = (freqs >= lower) & (freqs <= upper)
        band_energy = np.sum(fft_magnitude[band_mask] ** 2)

        if np.any(band_mask):
            peak_idx = np.argmax(fft_magnitude[band_mask])
            band_mags = fft_magnitude[band_mask]
            peak_amp = band_mags[peak_idx]
        else:
            peak_amp = 0.0

        features[f"{fault_name}_Energy"] = band_energy
        features[f"{fault_name}_Peak_Amplitude"] = peak_amp

    return features


# TDMS 파일 단일 처리, 멀티 채널 고장 주파수 특징 추출
def extract_fault_features_from_tdms(vib_data, duration_sec=10, bandwidth=5):
    """
    TDMS 진동 데이터(CH1~CH4)에서 고장 주파수 특징 추출
    - vib_data: TDMS에서 읽은 dict ({"CH1": array, ...})
    - duration_sec: 시퀀스 길이 (기본 10초)
    """
    features = {}
    channels = ["CH1", "CH2", "CH3", "CH4"]

    # 샘플 수 → Fs 계산
    sample_counts = [len(vib_data[ch]) for ch in channels if ch in vib_data]
    if not sample_counts:
        raise ValueError("No valid vibration channels found.")
    N = sample_counts[0]
    Fs = N / duration_sec  # 샘플링 주파수

    for ch in channels:
        if ch not in vib_data:
            continue
        signal = vib_data[ch]
        fault_feats = extract_fault_frequency_features(signal, Fs, bandwidth=bandwidth)
        for k, v in fault_feats.items():
            features[f"{ch}_{k}"] = v

    return features

# WPT+FFT 윈도우별 특징 추출
def process_all_train_folders(base_path, folder_names):
    all_feature_rows = []

    for folder_name in folder_names:
        folder_path = os.path.join(base_path, folder_name)
        tdms_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.tdms')])

        for tdms_file in tdms_files:
            file_path = os.path.join(folder_path, tdms_file)
            features = extract_vibration_array_with_features(file_path)

            for i, feature in enumerate(features):
                all_feature_rows.append({
                    'file_name': tdms_file,
                    'window_index': i,
                    'features': feature,
                    'folder': folder_name
                })

    return pd.DataFrame(all_feature_rows)

# 모든 train 폴더 적용
def extract_fault_features_all(train_root, target_folders=None, bandwidth=5):
    """
    Train Set 전체 TDMS 파일에 대해 고장 주파수 기반 FFT 피처 추출
    """
    if target_folders is None:
        target_folders = sorted(os.listdir(train_root))

    all_records = []

    for folder in tqdm(target_folders, desc="Extracting fault features"):
        folder_path = os.path.join(train_root, folder)
        tdms_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".tdms")])

        for tdms_file in tdms_files:
            file_path = os.path.join(folder_path, tdms_file)
            try:
                vib_data, _ = load_tdms_file(file_path)
                features = extract_fault_features_from_tdms(vib_data, bandwidth=bandwidth)
                features["folder"] = folder
                features["file_name"] = tdms_file.replace('.tdms', '')
                all_records.append(features)
            except Exception as e:
                print(f"Failed to process {file_path}: {e}")

    df = pd.DataFrame(all_records)
    return df

# 파일 단위 summary로 변환(평균, 표준편차)
def summarize_window_features_compact(features_df):
    summary_records = []

    for (folder, file_name), group in features_df.groupby(['folder', 'file_name']):
        feature_array = np.stack(group['features'].values)  # (윈도우 수, 피처 수)
        feature_mean = np.mean(feature_array, axis=0)
        feature_std = np.std(feature_array, axis=0)

        record = {
            'folder': folder,
            'file_name': file_name,
            'WPTFFT_Feature_mean': feature_mean,
            'WPTFFT_Feature_std': feature_std,
        }

        summary_records.append(record)

    return pd.DataFrame(summary_records)

In [10]:
# --- 설정값 ---
base_path = "/content/drive/MyDrive/KSPHM-data-challenge/Validation Set"
folder_names = [f"Validation{i}" for i in range(1, 7)]  # Train1 ~ Train8

# --- WPT+FFT 윈도우 특징 추출 수행 ---
features_df = process_all_train_folders(base_path, folder_names)
features_df['file_name'] = features_df['file_name'].str.replace(r'\.tdms$', '', regex=True)

wptfft_summary_df = summarize_window_features_compact(features_df)
fault_summary_df = extract_fault_features_all(base_path, folder_names, bandwidth=5)

# --- 두 summary 병합 ---
summary_df = pd.merge(wptfft_summary_df, fault_summary_df, on=['folder', 'file_name'], how='inner')

print(f"요약된 파일 수: {len(summary_df)}")
print(f"총 윈도우 수: {len(features_df)}")


Extracting fault features: 100%|██████████| 6/6 [00:22<00:00,  3.69s/it]

요약된 파일 수: 313
총 윈도우 수: 5947





In [11]:
print("features_df columns:", features_df.columns.tolist())

features_df columns: ['file_name', 'window_index', 'features', 'folder']


병합

In [12]:
# 1) 먼저 features_df와 summary_expanded가 모두 만들어졌다고 가정
# 2) key: ['folder', 'file_name', 'window_index']

merged_df = pd.merge(
    summary_expanded,
    features_df,
    on=['folder', 'file_name', 'window_index'],
    how='inner'  # 윈도우 단위로 완전히 겹치는 데이터만 남음
)

print("병합된 데이터 개수:", len(merged_df))
print(merged_df.columns)


병합된 데이터 개수: 5377
Index(['time_sec', 'TC SP Front[℃]', 'TC SP Rear[℃]', 'Torque[Nm]',
       'file_name', 'folder', 'Torque[Nm]_Slope_3cycle',
       'TC SP Front[℃]_Slope_3cycle', 'TC SP Rear[℃]_Slope_3cycle',
       'Combined_Slope_3cycle', 'Torque[Nm]_Slope_5cycle',
       'TC SP Front[℃]_Slope_5cycle', 'TC SP Rear[℃]_Slope_5cycle',
       'Combined_Slope_5cycle', 'window_index', 'features'],
      dtype='object')


모델 로드 및 예측 수행 후 평가 점수 출력

### 초기

In [None]:
# --- 모델 로드 ---
model_path = "/content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5"
model = load_model(model_path, compile=False)
print("모델 로드 완료:", model_path)

# --- 스케일러 로드 ---
X_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/X_scaler.pkl")
y_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/y_scaler.pkl")

# --- 특징 배열 준비 ---
X_val = np.stack(merged_df['features'].values)  # (samples, features)
X_val = X_val[..., np.newaxis]  # (samples, features, 1)

# --- reshape 후 스케일링 ---
samples_val, feat_dim, channels = X_val.shape
X_val_reshaped = X_val.reshape(samples_val, feat_dim * channels)  # 2D (samples, features)

X_val_scaled = X_scaler.transform(X_val_reshaped)

# --- 스케일된 데이터를 원래 형태로 reshape ---
X_val_final = X_val_scaled.reshape(samples_val, feat_dim, channels)  # 3D 입력

# --- 예측 ---
y_val_pred_scaled = model.predict(X_val_final, verbose=1)

# --- 역스케일링 ---
y_val_pred = y_scaler.inverse_transform(y_val_pred_scaled)

# --- 결과 병합 ---
merged_df['RUL_pred_sec'] = y_val_pred.flatten()

# --- 예측 결과 확인 ---
print(merged_df[['folder', 'file_name', 'window_index', 'RUL_pred_sec']].head())

모델 로드 완료: /content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step
        folder                                          file_name  \
0  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
1  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
2  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
3  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
4  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   

   window_index  RUL_pred_sec  
0             0  30558.949219  
1             1  28211.976562  
2             2  30811.535156  
3             3  30996.062500  
4             4  28681.941406  


In [None]:
# 각 Validation 폴더별 마지막 윈도우의 예측 RUL 선택
final_rul_scores = (
    merged_df.groupby('folder')
    .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우
    .reset_index(drop=True)
)

# 원하는 형식으로 컬럼 구성
result_df = pd.DataFrame({
    'File': final_rul_scores['folder'],
    'RUL_Score(sec)': final_rul_scores['RUL_pred_sec']
})

# 엑셀 파일로 저장
output_path = "/content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores2.xlsx"
result_df.to_excel(output_path, index=False)

print("엑셀 저장 완료:", output_path)
print(result_df)

  .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우


엑셀 저장 완료: /content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores2.xlsx
          File  RUL_Score(sec)
0  Validation1    16482.927734
1  Validation2    61926.699219
2  Validation3    42437.132812
3  Validation4    73929.242188
4  Validation5    69461.437500
5  Validation6    37952.039062


### 예측값 클리핑

In [None]:
# --- 모델 로드 ---
model_path = "/content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5"
model = load_model(model_path, compile=False)
print("모델 로드 완료:", model_path)

# --- 스케일러 로드 ---
X_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/X_scaler.pkl")
y_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/y_scaler.pkl")

# --- 특징 배열 준비 ---
X_val = np.stack(merged_df['features'].values)  # (samples, features)
X_val = X_val[..., np.newaxis]  # (samples, features, 1)

# --- reshape 후 스케일링 ---
samples_val, feat_dim, channels = X_val.shape
X_val_reshaped = X_val.reshape(samples_val, feat_dim * channels)  # 2D (samples, features)

X_val_scaled = X_scaler.transform(X_val_reshaped)

# --- 스케일된 데이터를 원래 형태로 reshape ---
X_val_final = X_val_scaled.reshape(samples_val, feat_dim, channels)  # 3D 입력

# --- 예측 ---
y_val_pred_scaled = model.predict(X_val_final, verbose=1)

# --- 역스케일링 ---
y_val_pred = y_scaler.inverse_transform(y_val_pred_scaled)

# 예: 예측값 클리핑
y_val_pred = np.minimum(y_val_pred, 60000)

# --- 결과 병합 ---
merged_df['RUL_pred_sec'] = y_val_pred.flatten()

# --- 예측 결과 확인 ---
print(merged_df[['folder', 'file_name', 'window_index', 'RUL_pred_sec']].head(6))

모델 로드 완료: /content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step
        folder                                          file_name  \
0  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
1  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
2  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
3  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
4  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
5  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   

   window_index  RUL_pred_sec  
0             0  30558.949219  
1             1  28211.976562  
2             2  30811.535156  
3             3  30996.062500  
4             4  28681.941406  
5             5  30581.914062  


### 예측값 비율 보정

In [16]:
# --- 모델 로드 ---
model_path = "/content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5"
model = load_model(model_path, compile=False)
print("모델 로드 완료:", model_path)

# --- 스케일러 로드 ---
X_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/X_scaler.pkl")
y_scaler = joblib.load("/content/drive/MyDrive/KSPHM-data-challenge/scalers/y_scaler.pkl")

# --- 특징 배열 준비 ---
X_val = np.stack(merged_df['features'].values)  # (samples, features)
X_val = X_val[..., np.newaxis]  # (samples, features, 1)

# --- reshape 후 스케일링 ---
samples_val, feat_dim, channels = X_val.shape
X_val_reshaped = X_val.reshape(samples_val, feat_dim * channels)  # 2D (samples, features)

X_val_scaled = X_scaler.transform(X_val_reshaped)

# --- 스케일된 데이터를 원래 형태로 reshape ---
X_val_final = X_val_scaled.reshape(samples_val, feat_dim, channels)  # 3D 입력

# --- 예측 ---
y_val_pred_scaled = model.predict(X_val_final, verbose=1)

# --- 역스케일링 ---
y_val_pred = y_scaler.inverse_transform(y_val_pred_scaled)

# 예: 예측값 비율 보정
y_val_pred_corrected = y_val_pred * 0.85

# --- 결과 병합 ---
merged_df['RUL_pred_sec'] = y_val_pred_corrected .flatten()

# --- 예측 결과 확인 ---
print(merged_df[['folder', 'file_name', 'window_index', 'RUL_pred_sec']].head(6))

모델 로드 완료: /content/drive/MyDrive/KSPHM-data-challenge/model/simple_cnn_lstm_model_FE.h5
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step
        folder                                          file_name  \
0  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
1  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
2  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
3  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
4  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   
5  Validation1  modified_KIMM Simulator_KIMM Bearing Test_2016...   

   window_index  RUL_pred_sec  
0             0  25975.107422  
1             1  23980.181641  
2             2  26189.804688  
3             3  26346.654297  
4             4  24379.650391  
5             5  25994.626953  


In [17]:
# 각 Validation 폴더별 마지막 윈도우의 예측 RUL 선택
final_rul_scores = (
    merged_df.groupby('folder')
    .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우
    .reset_index(drop=True)
)

# 원하는 형식으로 컬럼 구성
result_df = pd.DataFrame({
    'File': final_rul_scores['folder'],
    'RUL_Score(sec)': final_rul_scores['RUL_pred_sec']
})

# 엑셀 파일로 저장
output_path = "/content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores3.xlsx"
result_df.to_excel(output_path, index=False)

print("엑셀 저장 완료:", output_path)
print(result_df)

엑셀 저장 완료: /content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores3.xlsx
          File  RUL_Score(sec)
0  Validation1    14010.489258
1  Validation2    52637.695312
2  Validation3    36071.562500
3  Validation4    62839.859375
4  Validation5    59042.222656
5  Validation6    32259.234375


  .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우


### window offset 적용

In [24]:
import os
from nptdms import TdmsFile

def load_tdms_file(file_path):
    tdms_file = TdmsFile.read(file_path)

    group_name_vibration = tdms_file.groups()[0].name
    group_name_operation = tdms_file.groups()[1].name

    vib_channels = tdms_file[group_name_vibration].channels()
    vib_data = {ch.name.strip(): ch.data for ch in vib_channels}

    operation_channels = tdms_file[group_name_operation].channels()
    operation_data = {ch.name.strip(): ch.data for ch in operation_channels}

    return vib_data, operation_data

validation_folder = "/content/drive/MyDrive/KSPHM-data-challenge/Validation Set/Validation6"  # 실제 경로로 바꿔주세요

total_samples = 0

for file_name in os.listdir(validation_folder):
    if file_name.endswith(".tdms"):
        file_path = os.path.join(validation_folder, file_name)
        vib_data, _ = load_tdms_file(file_path)

        # 진동 데이터의 첫 번째 채널 길이 합산
        first_channel_data = next(iter(vib_data.values()))
        samples = len(first_channel_data)
        total_samples += samples

print(f"Validation1 폴더 전체 샘플 수: {total_samples}")

Validation1 폴더 전체 샘플 수: 5376000


In [25]:
# 파라미터 정의 (기존 슬라이딩 윈도우 기준)
window_size = 25600           # 윈도우 샘플 수
overlap = 0.5                # 오버랩 비율
step = int(window_size * (1 - overlap))  # 윈도우 이동 간격 (샘플 수)
sampling_rate = 25600         # 샘플링 주파수 (Hz), 예: 25600 샘플/초

data_length_samples_dict = {
    'Validation1': 38400000,  # 예시 샘플 수
    'Validation2': 12800000,
    'Validation3': 9728000,
    'Validation4': 7936000,
    'Validation5': 5888000,
    'Validation6': 5376000,
}


# merged_df의 마지막 window_index를 구한다 (폴더별)
last_window_indices = merged_df.groupby('folder')['window_index'].max()

offset_dict = {}

for folder, last_idx in last_window_indices.items():
    data_length = data_length_samples_dict[folder]  # 폴더별 실제 raw 데이터 길이 (샘플 수)

    last_window_start_sample = last_idx * step
    last_window_end_sample = last_window_start_sample + window_size

    offset_samples = data_length - last_window_end_sample
    offset_samples = max(0, offset_samples)  # 음수 방지

    offset_sec = offset_samples / sampling_rate

    offset_dict[folder] = offset_sec

print(offset_dict)

{'Validation1': 1490.0, 'Validation2': 490.0, 'Validation3': 370.0, 'Validation4': 300.0, 'Validation5': 220.0, 'Validation6': 200.0}


In [26]:
# 기존 마지막 윈도우별 예측 RUL값을 담은 DataFrame
final_rul_scores = (
    merged_df.groupby('folder')
    .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우
    .reset_index(drop=True)
)

# offset 값을 열로 추가
final_rul_scores['offset_sec'] = final_rul_scores['folder'].map(offset_dict)

# offset을 더한 보정 RUL 계산
final_rul_scores['RUL_pred_sec_corrected'] = final_rul_scores['RUL_pred_sec'] + final_rul_scores['offset_sec']

# 결과 확인
print(final_rul_scores[['folder', 'RUL_pred_sec', 'offset_sec', 'RUL_pred_sec_corrected']])


        folder  RUL_pred_sec  offset_sec  RUL_pred_sec_corrected
0  Validation1  14010.489258      1490.0            15500.489258
1  Validation2  52637.695312       490.0            53127.695312
2  Validation3  36071.562500       370.0            36441.562500
3  Validation4  62839.859375       300.0            63139.859375
4  Validation5  59042.222656       220.0            59262.222656
5  Validation6  32259.234375       200.0            32459.234375


  .apply(lambda df: df.sort_values('window_index').iloc[-1])  # 마지막 윈도우


In [27]:
import pandas as pd

# offset 더한 보정 RUL 계산 (이미 계산했다고 가정)
final_rul_scores['offset_sec'] = final_rul_scores['folder'].map(offset_dict)
final_rul_scores['RUL_pred_sec_corrected'] = final_rul_scores['RUL_pred_sec'] + final_rul_scores['offset_sec']

# 제출용 데이터프레임 생성
result_df = pd.DataFrame({
    'File': final_rul_scores['folder'],
    'RUL_Score(sec)': final_rul_scores['RUL_pred_sec_corrected']
})

# 엑셀 파일 경로 지정
output_path = "/content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores_offset.xlsx"

# 엑셀 저장
result_df.to_excel(output_path, index=False)

print("보정된 RUL 점수 엑셀 저장 완료:", output_path)
print(result_df)


보정된 RUL 점수 엑셀 저장 완료: /content/drive/MyDrive/KSPHM-data-challenge/RUL_Score/rul_scores_offset.xlsx
          File  RUL_Score(sec)
0  Validation1    15500.489258
1  Validation2    53127.695312
2  Validation3    36441.562500
3  Validation4    63139.859375
4  Validation5    59262.222656
5  Validation6    32459.234375


### 평가 파이프라인

In [None]:
# 오차 계산
def calculate_error(ActRUL, PredRUL):
    error = 100 * (ActRUL - PredRUL) / ActRUL
    return error

# 정확도 점수 계산
def calculate_accuracy_score(error):
    ln_0_5 = np.log(0.5)
    score = np.where(
        error <= 0,
        np.exp(-ln_0_5 * error / 20),
        np.exp(+ln_0_5 * error / 20)
    )
    return score

# 최종 점수 계산
def calculate_final_score(accuracy_scores):
    return np.mean(accuracy_scores)

# 전체 평가 파이프라인
def evaluate_rul_prediction(ActRUL, PredRUL):
    error = calculate_error(ActRUL, PredRUL)
    accuracy_scores = calculate_accuracy_score(error)
    final_score = calculate_final_score(accuracy_scores)
    return {
        "Error": error,
        "Accuracy_scores": accuracy_scores,
        "Final_Score": final_score
    }