1단계 전처리

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
import sys

def calculate_hi_from_csv(file_path):
    """
    단일 CSV 파일에서 수평 진동 신호(5번째 열)를 읽어
    Kurtosis와 RMS를 계산합니다.
    """
    try:
        # 파일 헤더가 없으므로 header=None
        # 5번째 열 (인덱스 4)의 데이터만 읽어옵니다.
        data = pd.read_csv(file_path, header=None).iloc[:, 4]
        
        # Kurtosis 계산 (pandas 기본 함수 사용)
        kurt = data.kurt()
        
        # RMS 계산 (Numpy 사용)
        rms = np.sqrt(np.mean(data**2))
        
        return kurt, rms
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None, None

# --- 1. 경로 설정 ---
# 스크립트를 실행하는 현재 폴더를 기준으로 합니다.
BASE_PATH = Path(".") 
LEARNING_SET_PATH = BASE_PATH / "Learning_set"
FULL_TEST_SET_PATH = BASE_PATH / "Full_Test_Set" # (Ground Truth RUL 계산을 위해 Full_Test_Set 사용)

# 전처리된 HI 데이터를 저장할 새 폴더 (현재 폴더 하위에 생성)
OUTPUT_PATH = BASE_PATH / "HI_preprocessed"
OUTPUT_PATH.mkdir(parents=True, exist_ok=True) # 폴더가 없으면 생성

# --- 2. 대상 베어링 정의 (Condition 1) ---
learning_bearings = ["Bearing1_1", "Bearing1_2"]
test_bearings = ["Bearing1_3", "Bearing1_4", "Bearing1_5", "Bearing1_6", "Bearing1_7"]
all_bearings = learning_bearings + test_bearings

print(f"Starting HI extraction for {len(all_bearings)} bearings...")
print(f"Base path is: {BASE_PATH.resolve()}")
print(f"Data will be saved in: {OUTPUT_PATH.resolve()}")
print("-" * 30)

# --- 3. 메인 처리 루프 ---
for bearing_name in tqdm(all_bearings, desc="Overall Progress"):
    
    # 베어링이 학습셋에 있는지 테스트셋에 있는지에 따라 경로 설정
    if bearing_name in learning_bearings:
        bearing_path = LEARNING_SET_PATH / bearing_name
    else:
        bearing_path = FULL_TEST_SET_PATH / bearing_name

    if not bearing_path.exists():
        print(f"Warning: Directory not found for {bearing_name} at {bearing_path.resolve()}. Skipping.")
        continue

    # 'acc_*.csv' 파일 목록을 가져오고, 이름순으로 정렬 (필수!)
    file_list = sorted(list(bearing_path.glob("acc_*.csv")))
    
    if not file_list:
        print(f"Warning: No 'acc_*.csv' files found in {bearing_path}. Skipping.")
        continue

    hi_data = [] # (Kurtosis, RMS) 결과를 저장할 리스트

    # 개별 파일 처리 (tqdm으로 진행률 표시)
    for file_path in tqdm(file_list, desc=f"Processing {bearing_name}", leave=False):
        kurt, rms = calculate_hi_from_csv(file_path)
        if kurt is not None:
            hi_data.append({"kurtosis": kurt, "rms": rms})

    # --- 4. 결과 저장 ---
    if hi_data:
        # 리스트를 Pandas DataFrame으로 변환
        hi_df = pd.DataFrame(hi_data)
        
        # 각 스냅샷(파일)의 순서를 인덱스로 하여 CSV로 저장
        save_file_path = OUTPUT_PATH / f"{bearing_name}_hi.csv"
        hi_df.to_csv(save_file_path, index_label="snapshot_index")

print("-" * 30)
print("Step 1: HI (Kurtosis, RMS) extraction complete!")
print(f"Check the results in {OUTPUT_PATH.resolve()}")

Starting HI extraction for 7 bearings...
Base path is: /home/work/RUL_AL/Pronostia dataset
Data will be saved in: /home/work/RUL_AL/Pronostia dataset/HI_preprocessed
------------------------------


Overall Progress:   0% 0/7 [00:00<?, ?it/s]
[Acessing Bearing1_1:   0% 0/2803 [00:00<?, ?it/s]
[Acessing Bearing1_1:   1% 35/2803 [00:00<00:08, 341.14it/s]
[Acessing Bearing1_1:   2% 70/2803 [00:00<00:08, 326.02it/s]
[Acessing Bearing1_1:   4% 103/2803 [00:00<00:08, 324.46it/s]
[Acessing Bearing1_1:   5% 137/2803 [00:00<00:08, 329.15it/s]
[Acessing Bearing1_1:   6% 170/2803 [00:00<00:08, 327.75it/s]
[Acessing Bearing1_1:   7% 204/2803 [00:00<00:07, 330.52it/s]
[Acessing Bearing1_1:   8% 238/2803 [00:00<00:07, 327.86it/s]
[Acessing Bearing1_1:  10% 271/2803 [00:00<00:07, 326.72it/s]
[Acessing Bearing1_1:  11% 305/2803 [00:00<00:07, 326.70it/s]
[Acessing Bearing1_1:  12% 339/2803 [00:01<00:07, 330.23it/s]
[Acessing Bearing1_1:  13% 373/2803 [00:01<00:07, 330.41it/s]
[Acessing Bearing1_1:  15% 407/2803 [00:01<00:07, 331.10it/s]
[Acessing Bearing1_1:  16% 441/2803 [00:01<00:07, 329.92it/s]
[Acessing Bearing1_1:  17% 474/2803 [00:01<00:07, 328.43it/s]
[Acessing Bearing1_1:  

Error processing file Full_Test_Set/Bearing1_4/acc_00001.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00002.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00003.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00004.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00005.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00006.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00007.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00008.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00009.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0001


[Acessing Bearing1_4:   5% 66/1428 [00:00<00:04, 306.08it/s]
[Acessing Bearing1_4:   7% 97/1428 [00:00<00:04, 305.59it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00064.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00065.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00066.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00067.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00068.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00069.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00070.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00071.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00072.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0007


[Acessing Bearing1_4:   9% 128/1428 [00:00<00:04, 303.84it/s]
[Acessing Bearing1_4:  11% 159/1428 [00:00<00:04, 305.24it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00125.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00126.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00127.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00128.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00129.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00130.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00131.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00132.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00133.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0013


[Acessing Bearing1_4:  13% 192/1428 [00:00<00:03, 311.99it/s]
[Acessing Bearing1_4:  16% 224/1428 [00:00<00:03, 311.27it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00189.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00190.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00191.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00192.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00193.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00194.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00195.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00196.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00197.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0019


[Acessing Bearing1_4:  18% 257/1428 [00:00<00:03, 316.77it/s]
[Acessing Bearing1_4:  20% 289/1428 [00:00<00:03, 316.55it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00254.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00255.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00256.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00257.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00258.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00259.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00260.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00261.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00262.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0026


[Acessing Bearing1_4:  22% 321/1428 [00:01<00:03, 314.29it/s]
[Acessing Bearing1_4:  25% 353/1428 [00:01<00:03, 311.11it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00318.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00319.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00320.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00321.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00322.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00323.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00324.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00325.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00326.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0032


[Acessing Bearing1_4:  27% 385/1428 [00:01<00:03, 312.44it/s]
[Acessing Bearing1_4:  29% 419/1428 [00:01<00:03, 318.51it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00380.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00381.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00382.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00383.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00384.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00385.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00386.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00387.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00388.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0038


[Acessing Bearing1_4:  32% 451/1428 [00:01<00:03, 316.95it/s]
[Acessing Bearing1_4:  34% 483/1428 [00:01<00:02, 315.94it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00445.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00446.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00447.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00448.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00449.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00450.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00451.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00452.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00453.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0045


[Acessing Bearing1_4:  36% 516/1428 [00:01<00:02, 318.54it/s]
[Acessing Bearing1_4:  38% 548/1428 [00:01<00:02, 315.60it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00510.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00511.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00512.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00513.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00514.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00515.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00516.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00517.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00518.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0051


[Acessing Bearing1_4:  41% 580/1428 [00:01<00:02, 316.48it/s]
[Acessing Bearing1_4:  43% 612/1428 [00:01<00:02, 313.91it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00574.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00575.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00576.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00577.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00578.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00579.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00580.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00581.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00582.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0058


[Acessing Bearing1_4:  45% 644/1428 [00:02<00:02, 305.24it/s]
[Acessing Bearing1_4:  47% 675/1428 [00:02<00:02, 298.64it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00634.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00635.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00636.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00637.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00638.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00639.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00640.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00641.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00642.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0064


[Acessing Bearing1_4:  49% 705/1428 [00:02<00:02, 294.72it/s]
[Acessing Bearing1_4:  51% 735/1428 [00:02<00:02, 291.42it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00693.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00694.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00695.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00696.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00697.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00698.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00699.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00700.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00701.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0070


[Acessing Bearing1_4:  54% 765/1428 [00:02<00:02, 290.79it/s]
[Acessing Bearing1_4:  56% 795/1428 [00:02<00:02, 287.21it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00751.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00752.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00753.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00754.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00755.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00756.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00757.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00758.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00759.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0076


[Acessing Bearing1_4:  58% 825/1428 [00:02<00:02, 288.69it/s]
[Acessing Bearing1_4:  60% 858/1428 [00:02<00:01, 299.31it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00809.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00810.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00811.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00812.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00813.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00814.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00815.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00816.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00817.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0081


[Acessing Bearing1_4:  62% 888/1428 [00:02<00:01, 298.23it/s]
[Acessing Bearing1_4:  64% 920/1428 [00:03<00:01, 303.69it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00871.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00872.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00873.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00874.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00875.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00876.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00877.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00878.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00879.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0088


[Acessing Bearing1_4:  67% 952/1428 [00:03<00:01, 308.32it/s]
[Acessing Bearing1_4:  69% 983/1428 [00:03<00:01, 307.41it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00936.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00937.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00938.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00939.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00940.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00941.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00942.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00943.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00944.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0094


[Acessing Bearing1_4:  71% 1014/1428 [00:03<00:01, 304.24it/s]
[Acessing Bearing1_4:  73% 1045/1428 [00:03<00:01, 303.32it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_00997.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00998.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_00999.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01000.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01001.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01002.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01003.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01004.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01005.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0100


[Acessing Bearing1_4:  75% 1076/1428 [00:03<00:01, 298.02it/s]
[Acessing Bearing1_4:  77% 1106/1428 [00:03<00:01, 297.01it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01058.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01059.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01060.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01061.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01062.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01063.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01064.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01065.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01066.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0106


[Acessing Bearing1_4:  80% 1136/1428 [00:03<00:00, 294.34it/s]
[Acessing Bearing1_4:  82% 1166/1428 [00:03<00:00, 291.08it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01116.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01117.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01118.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01119.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01120.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01121.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01122.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01123.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01124.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0112


[Acessing Bearing1_4:  84% 1196/1428 [00:03<00:00, 290.18it/s]
[Acessing Bearing1_4:  86% 1226/1428 [00:04<00:00, 286.26it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01175.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01176.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01177.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01178.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01179.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01180.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01181.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01182.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01183.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0118


[Acessing Bearing1_4:  88% 1255/1428 [00:04<00:00, 279.44it/s]
[Acessing Bearing1_4:  90% 1283/1428 [00:04<00:00, 274.86it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01232.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01233.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01234.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01235.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01236.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01237.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01238.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01239.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01240.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0124


[Acessing Bearing1_4:  92% 1312/1428 [00:04<00:00, 277.39it/s]
[Acessing Bearing1_4:  94% 1343/1428 [00:04<00:00, 284.86it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01286.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01287.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01288.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01289.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01290.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01291.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01292.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01293.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01294.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0129


[Acessing Bearing1_4:  96% 1372/1428 [00:04<00:00, 283.46it/s]
[Acessing Bearing1_4:  98% 1401/1428 [00:04<00:00, 283.67it/s]

Error processing file Full_Test_Set/Bearing1_4/acc_01345.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01346.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01347.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01348.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01349.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01350.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01351.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01352.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01353.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0135


Overall Progress:  57% 4/7 [00:25<00:17,  5.93s/it]            

Error processing file Full_Test_Set/Bearing1_4/acc_01402.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01403.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01404.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01405.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01406.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01407.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01408.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01409.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_01410.csv: single positional indexer is out-of-bounds
Error processing file Full_Test_Set/Bearing1_4/acc_0141


[Acessing Bearing1_5:   0% 0/2463 [00:00<?, ?it/s]
[Acessing Bearing1_5:   1% 30/2463 [00:00<00:08, 299.27it/s]
[Acessing Bearing1_5:   2% 61/2463 [00:00<00:08, 300.10it/s]
[Acessing Bearing1_5:   4% 92/2463 [00:00<00:08, 285.19it/s]
[Acessing Bearing1_5:   5% 121/2463 [00:00<00:08, 285.62it/s]
[Acessing Bearing1_5:   6% 154/2463 [00:00<00:07, 297.31it/s]
[Acessing Bearing1_5:   7% 184/2463 [00:00<00:08, 276.51it/s]
[Acessing Bearing1_5:   9% 213/2463 [00:00<00:08, 277.69it/s]
[Acessing Bearing1_5:  10% 243/2463 [00:00<00:07, 283.99it/s]
[Acessing Bearing1_5:  11% 274/2463 [00:00<00:07, 291.30it/s]
[Acessing Bearing1_5:  12% 307/2463 [00:01<00:07, 301.28it/s]
[Acessing Bearing1_5:  14% 338/2463 [00:01<00:07, 299.92it/s]
[Acessing Bearing1_5:  15% 371/2463 [00:01<00:06, 306.99it/s]
[Acessing Bearing1_5:  16% 402/2463 [00:01<00:06, 305.63it/s]
[Acessing Bearing1_5:  18% 433/2463 [00:01<00:07, 285.33it/s]
[Acessing Bearing1_5:  19% 464/2463 [00:01<00:06, 290.91it/s]
[Ace

------------------------------
Step 1: HI (Kurtosis, RMS) extraction complete!
Check the results in /home/work/RUL_AL/Pronostia dataset/HI_preprocessed





2단계 FPT 계산 코드

In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
import json

# --- 1. 경로 및 파라미터 설정 ---
BASE_PATH = Path(".") 
HI_PATH = BASE_PATH / "HI_preprocessed"
OUTPUT_PATH = BASE_PATH # FPT 결과를 저장할 위치 (동일 폴더)

# FPT 탐지 파라미터 (논문 [55] 기반)
NORMAL_PERIOD_SIZE = 100 # 정상 상태로 간주할 초기 스냅샷 개수 (임의 설정, 조절 가능)
CONSECUTIVE_THRESHOLD = 3  # 3-시그마를 연속으로 3회 초과해야 함 (논문에서 l=2, 즉 l+1=3)

# 1단계에서 처리한 베어링 리스트
bearing_names = [
    "Bearing1_1", "Bearing1_2", "Bearing1_3", "Bearing1_4", 
    "Bearing1_5", "Bearing1_6", "Bearing1_7"
]

fpt_results = {} # FPT 결과를 저장할 딕셔너리

print("Starting Step 2: FPT Identification...")
print(f"Loading HI data from: {HI_PATH.resolve()}")
print(f"Normal period size: {NORMAL_PERIOD_SIZE} snapshots")
print(f"Consecutive threshold: {CONSECUTIVE_THRESHOLD} times")
print("-" * 30)

# --- 2. FPT 탐지 루프 ---
for bearing_name in bearing_names:
    hi_file_path = HI_PATH / f"{bearing_name}_hi.csv"
    
    if not hi_file_path.exists():
        print(f"Warning: HI file not found for {bearing_name}. Skipping.")
        continue

    try:
        # 1단계에서 저장한 HI 파일 로드
        hi_df = pd.read_csv(hi_file_path, index_col="snapshot_index")
        kurtosis_series = hi_df["kurtosis"]

        # 1. '정상' 구간 데이터 정의
        normal_data = kurtosis_series.iloc[:NORMAL_PERIOD_SIZE]

        # 2. 3-시그마 임계값 계산
        mu = normal_data.mean()
        sigma = normal_data.std()
        
        # 논문 [55]는 |kurtosis - mu| > 3*sigma 를 확인
        upper_threshold = mu + 3 * sigma
        lower_threshold = mu - 3 * sigma

        # 3. FPT 탐지
        consecutive_count = 0
        fpt = None # FPT (First Predicting Time)

        # 전체 데이터를 순회 (정상 구간 포함)
        for i, kurt_value in enumerate(kurtosis_series):
            
            # 3-시그마 범위를 벗어났는지 확인
            if not (lower_threshold <= kurt_value <= upper_threshold):
                consecutive_count += 1
            else:
                consecutive_count = 0 # 범위 안에 들어오면 리셋

            # 4. FPT 확정
            # 연속 횟수가 임계값에 도달했는지 확인
            if consecutive_count >= CONSECUTIVE_THRESHOLD:
                # FPT는 *연속 이탈이 시작된 첫 시점*
                fpt = i - CONSECUTIVE_THRESHOLD + 1
                break # 첫 번째 FPT를 찾았으므로 루프 중단
        
        # EoL (End of Life)는 마지막 스냅샷의 인덱스
        eol = len(kurtosis_series) - 1
        
        if fpt is not None:
            print(f"✅ Success: {bearing_name.ljust(12)} | FPT = {fpt} (EoL = {eol})")
            fpt_results[bearing_name] = {"fpt": fpt, "eol": eol}
        else:
            # FPT를 찾지 못한 경우 (EoL에 도달하기 전에 고장 신호가 명확하지 않음)
            print(f"⚠️ Failed:  {bearing_name.ljust(12)} | FPT not found. Using FPT = 0.")
            # FPT를 못찾으면 임시로 0 (첫번째 스냅샷)을 FPT로 설정
            fpt_results[bearing_name] = {"fpt": 0, "eol": eol}

    except Exception as e:
        print(f"Error processing {bearing_name}: {e}")

# --- 3. FPT 결과 저장 ---
output_file = OUTPUT_PATH / "fpt_results.json"
with open(output_file, 'w') as f:
    json.dump(fpt_results, f, indent=4)

print("-" * 30)
print("Step 2: FPT Identification complete!")
print(f"FPT results saved to: {output_file.resolve()}")

Starting Step 2: FPT Identification...
Loading HI data from: /home/work/RUL_AL/Pronostia dataset/HI_preprocessed
Normal period size: 100 snapshots
Consecutive threshold: 3 times
------------------------------
✅ Success: Bearing1_1   | FPT = 1313 (EoL = 2802)
✅ Success: Bearing1_2   | FPT = 152 (EoL = 870)
✅ Success: Bearing1_3   | FPT = 1732 (EoL = 2374)
✅ Success: Bearing1_5   | FPT = 2411 (EoL = 2462)
✅ Success: Bearing1_6   | FPT = 605 (EoL = 2447)
✅ Success: Bearing1_7   | FPT = 1191 (EoL = 2258)
------------------------------
Step 2: FPT Identification complete!
FPT results saved to: /home/work/RUL_AL/Pronostia dataset/fpt_results.json


3단계 CWT(웨이블릿 변환) 코드

In [4]:
import pandas as pd
import numpy as np
from pathlib import Path
import json
from tqdm import tqdm
import h5py
import pywt
from skimage.transform import resize
from scipy import stats

# --- 1. 피처 추출 함수 정의 ---

def calculate_9_features(data):
    """
    하나의 스냅샷(raw_data, 2560개 샘플)에서
    Huang et al. [56] 기준의 9개 피처를 계산합니다.
    """
    features = {}
    
    # f1: RMS
    features['f1_rms'] = np.sqrt(np.mean(data**2))
    
    # f2: Energy
    features['f2_energy'] = np.sum(data**2)
    
    # f3: Entropy
    # 데이터를 100개의 구간으로 나눠 확률 분포를 근사
    counts, _ = np.histogram(data, bins=100)
    probs = counts / len(data)
    features['f3_entropy'] = stats.entropy(probs)
    
    # f4: Max absolute
    features['f4_max_abs'] = np.max(np.abs(data))
    
    # f5: Mean absolute
    features['f5_mean_abs'] = np.mean(np.abs(data))
    
    # f6: Standard deviation
    features['f6_std'] = np.std(data)
    
    # f7: Clearance factor
    # (np.mean(np.sqrt(np.abs(data)))**2)가 0이 되는 것을 방지
    clearance_den = np.mean(np.sqrt(np.abs(data)))**2
    if clearance_den == 0:
        features['f7_clearance'] = 0
    else:
        features['f7_clearance'] = np.max(np.abs(data)) / clearance_den

    # f_skew (f24: arccosh가 모호하여 skewness로 대체)
    features['f_skew'] = stats.skew(data)
    
    # f25: std(arcsinh)
    features['f25_std_arcsinh'] = np.std(np.arcsinh(data))
    
    # 9개 피처를 순서대로 정렬하여 리스트로 반환
    return list(features.values())

def calculate_cwt_image(data, wavelet='morl', scales=np.arange(1, 101), target_size=(80, 80)):
    """
    하나의 스냅샷(raw_data)에서 CWT를 적용하고
    80x80 이미지로 리사이징합니다.
    """
    # 1. CWT 수행 (Morlet 웨이블릿 사용)
    coefficients, _ = pywt.cwt(data, scales, wavelet)
    
    # 2. 크기(magnitude) 계산
    cwt_magnitude = np.abs(coefficients)
    
    # 3. 80x80 크기로 리사이징 (Bilinear Interpolation)
    # anti_aliasing=True로 설정하여 리샘플링 품질 향상
    resized_image = resize(cwt_magnitude, target_size, anti_aliasing=True)
    
    return resized_image

# --- 2. 경로 및 파라미터 설정 ---
BASE_PATH = Path(".")
LEARNING_SET_PATH = BASE_PATH / "Learning_set"
FULL_TEST_SET_PATH = BASE_PATH / "Full_Test_Set"
FPT_FILE = BASE_PATH / "fpt_results.json"
OUTPUT_FILE = BASE_PATH / "PRONOSTIA_preprocessed.h5"

# CWT 파라미터 (논문 [56] 기반)
CWT_WAVELET = 'morl' # Morlet 웨이블릿
CWT_SCALES = np.arange(1, 101) # 1부터 100까지의 스케일

print("Starting Step 3: Feature Extraction and Labeling...")
print(f"Loading FPT/EoL data from: {FPT_FILE.resolve()}")

# --- 3. FPT/EoL 데이터 로드 ---
with open(FPT_FILE, 'r') as f:
    fpt_results = json.load(f)

print(f"Found data for {len(fpt_results)} bearings: {list(fpt_results.keys())}")
print(f"Output will be saved to: {OUTPUT_FILE.resolve()}")
print("-" * 30)

# --- 4. HDF5 파일 열기 ---
# 'w' 모드: 파일이 이미 존재하면 덮어씁니다.
with h5py.File(OUTPUT_FILE, 'w') as hf:
    
    # --- 5. 메인 처리 루프 (베어링별) ---
    for bearing_name, info in fpt_results.items():
        fpt = info['fpt']
        eol = info['eol']
        
        # 열화 구간의 총 스냅샷 수
        total_life_deg = eol - fpt
        if total_life_deg <= 0:
            print(f"Warning: {bearing_name} has no degradation period (fpt={fpt}, eol={eol}). Skipping.")
            continue

        print(f"Processing {bearing_name}: {total_life_deg + 1} snapshots (from {fpt} to {eol})...")
        
        # 베어링 경로 찾기
        if bearing_name in ["Bearing1_1", "Bearing1_2"]:
            bearing_path = LEARNING_SET_PATH / bearing_name
        else:
            bearing_path = FULL_TEST_SET_PATH / bearing_name
            
        # 이 베어링의 데이터를 저장할 리스트 초기화
        X1_images_list = []
        X2_vectors_list = []
        y_labels_list = []
        
        # --- 6. 스냅샷별 루프 (FPT ~ EoL) ---
        for snapshot_idx in tqdm(range(fpt, eol + 1), desc=f"  Extracting {bearing_name}", leave=False):
            try:
                # 1. 레이블 (y) 생성
                # RULP (Remaining Useful Life Percentage)
                current_life_deg = snapshot_idx - fpt
                rul_percentage = 1.0 - (current_life_deg / total_life_deg)
                y_labels_list.append(rul_percentage)
                
                # 2. 원본 데이터 로드
                # 스냅샷 인덱스는 0부터 시작, 파일 이름은 1부터 시작
                file_name = f"acc_{snapshot_idx + 1:05d}.csv"
                file_path = bearing_path / file_name
                
                # 수평 진동(5번째 열) 데이터만 읽어서 Numpy 배열로 변환
                raw_data = pd.read_csv(file_path, header=None).iloc[:, 4].values
                
                # 3. Input 1 (이미지 X1) 생성
                img = calculate_cwt_image(raw_data, CWT_WAVELET, CWT_SCALES)
                X1_images_list.append(img)
                
                # 4. Input 2 (벡터 X2) 생성
                vec = calculate_9_features(raw_data)
                X2_vectors_list.append(vec)

            except Exception as e:
                print(f"Error processing {bearing_name}, file {file_name}: {e}")
                # 리스트에 불일치가 발생하지 않도록, 에러 시 3개 리스트 모두에서 마지막 요소 제거
                if y_labels_list: y_labels_list.pop()
                if X1_images_list: X1_images_list.pop()
                if X2_vectors_list: X2_vectors_list.pop()

        # --- 7. HDF5 파일에 저장 ---
        if X1_images_list: # 처리된 데이터가 있을 경우에만 저장
            # 리스트를 Numpy 배열로 변환
            X1_array = np.array(X1_images_list, dtype=np.float32)
            X2_array = np.array(X2_vectors_list, dtype=np.float32)
            y_array = np.array(y_labels_list, dtype=np.float32)
            
            # HDF5 파일에 베어링 이름으로 그룹 생성
            bearing_group = hf.create_group(bearing_name)
            
            # 각 데이터셋을 그룹에 저장
            bearing_group.create_dataset("X1_images", data=X1_array)
            bearing_group.create_dataset("X2_vectors", data=X2_array)
            bearing_group.create_dataset("y_labels", data=y_array)
            
            print(f"  > Saved {bearing_name} data to HDF5 (Samples: {len(y_array)})")

print("-" * 30)
print("Step 3: Feature Extraction complete!")
print(f"All data saved in: {OUTPUT_FILE.resolve()}")

Starting Step 3: Feature Extraction and Labeling...
Loading FPT/EoL data from: /home/work/RUL_AL/Pronostia dataset/fpt_results.json
Found data for 6 bearings: ['Bearing1_1', 'Bearing1_2', 'Bearing1_3', 'Bearing1_5', 'Bearing1_6', 'Bearing1_7']
Output will be saved to: /home/work/RUL_AL/Pronostia dataset/PRONOSTIA_preprocessed.h5
------------------------------
Processing Bearing1_1: 1489 snapshots (from 1314 to 2802)...


                                                                

  > Saved Bearing1_1 data to HDF5 (Samples: 1489)
Processing Bearing1_2: 45 snapshots (from 826 to 870)...


                                                            

  > Saved Bearing1_2 data to HDF5 (Samples: 45)
Processing Bearing1_3: 649 snapshots (from 1726 to 2374)...


                                                              

  > Saved Bearing1_3 data to HDF5 (Samples: 649)
Processing Bearing1_5: 51 snapshots (from 2412 to 2462)...


                                                            

  > Saved Bearing1_5 data to HDF5 (Samples: 51)
Processing Bearing1_6: 817 snapshots (from 1631 to 2447)...


                                                              

  > Saved Bearing1_6 data to HDF5 (Samples: 817)
Processing Bearing1_7: 49 snapshots (from 2210 to 2258)...


                                                            

  > Saved Bearing1_7 data to HDF5 (Samples: 49)
------------------------------
Step 3: Feature Extraction complete!
All data saved in: /home/work/RUL_AL/Pronostia dataset/PRONOSTIA_preprocessed.h5




4단계(정규화)를 진행하는 코드

In [5]:
import h5py
import numpy as np
import json
from pathlib import Path

# --- 1. 경로 설정 ---
BASE_PATH = Path(".")
INPUT_FILE = BASE_PATH / "PRONOSTIA_preprocessed.h5"
SCALER_FILE = BASE_PATH / "x2_scaler.json" # X2 피처의 min/max 저장
OUTPUT_FILE = BASE_PATH / "PRONOSTIA_normalized.h5" # 최종 정규화된 파일

# --- 4.1: 스케일러(Min/Max) 계산 ---
print("Starting Step 4.1: Calculating global min/max scaler...")
print(f"Loading data from: {INPUT_FILE.resolve()}")

# 9개 피처의 global min/max를 저장할 변수
global_min = np.full(9, np.inf)
global_max = np.full(9, -np.inf)

try:
    with h5py.File(INPUT_FILE, 'r') as hf:
        bearing_names = list(hf.keys())
        if not bearing_names:
            print(f"Error: HDF5 file '{INPUT_FILE.name}' is empty.")
            exit() # 프로그램 중단

        for bearing_name in bearing_names:
            if 'X2_vectors' in hf[bearing_name]:
                x2_data = hf[bearing_name]['X2_vectors'][:]
                
                if x2_data.shape[0] > 0:
                    # 현재 베어링의 min/max 계산 (axis=0 : 피처별)
                    local_min = np.min(x2_data, axis=0)
                    local_max = np.max(x2_data, axis=0)
                    
                    # global min/max 업데이트
                    global_min = np.minimum(global_min, local_min)
                    global_max = np.maximum(global_max, local_max)
                else:
                    print(f"Warning: {bearing_name} X2_vectors is empty.")
            else:
                print(f"Warning: {bearing_name} has no X2_vectors dataset.")

    # --- 스케일러 저장 ---
    # 나중에 0으로 나누는 것을 방지하기 위해 (max - min) 계산
    data_range = global_max - global_min
    
    # 만약 min==max인 피처가 있다면 (값이 모두 동일), range를 1로 설정
    data_range[data_range == 0] = 1.0 
    
    scaler_data = {
        'min': global_min.tolist(),
        'range': data_range.tolist()
    }

    with open(SCALER_FILE, 'w') as f:
        json.dump(scaler_data, f, indent=4)
        
    print(f"Global Min (9 features): {global_min}")
    print(f"Global Max (9 features): {global_max}")
    print(f"Scaler data saved to: {SCALER_FILE.resolve()}")

except FileNotFoundError:
    print(f"Error: Input file not found at {INPUT_FILE.resolve()}. Run Step 3 first.")
    exit()
except Exception as e:
    print(f"An error occurred during scaler calculation: {e}")
    exit()

print("-" * 30)

# --- 4.2: 정규화 적용 및 최종 파일 저장 ---
print("Starting Step 4.2: Applying normalization...")
print(f"Loading scaler from: {SCALER_FILE.resolve()}")

try:
    # 1. 스케일러 로드
    min_vals = np.array(scaler_data['min'])
    range_vals = np.array(scaler_data['range'])

    # 2. 정규화 적용 및 새 HDF5 파일 저장
    with h5py.File(INPUT_FILE, 'r') as hf_in, h5py.File(OUTPUT_FILE, 'w') as hf_out:
        bearing_names = list(hf_in.keys())
        
        for bearing_name in bearing_names:
            if bearing_name not in hf_in or 'X1_images' not in hf_in[bearing_name]:
                print(f"Skipping {bearing_name}, data not found in source file.")
                continue

            # 1. 원본 데이터 로드
            x1_images = hf_in[bearing_name]['X1_images'][:]
            x2_vectors = hf_in[bearing_name]['X2_vectors'][:]
            y_labels = hf_in[bearing_name]['y_labels'][:]
            
            # 2. X2_vectors 정규화 (Min-Max Scaling)
            x2_vectors_norm = (x2_vectors - min_vals) / range_vals
            
            # 3. 새 HDF5 파일에 그룹 생성 및 저장
            bearing_group = hf_out.create_group(bearing_name)
            
            bearing_group.create_dataset("X1_images", data=x1_images, dtype='float32')
            bearing_group.create_dataset("X2_vectors_norm", data=x2_vectors_norm, dtype='float32')
            bearing_group.create_dataset("y_labels", data=y_labels, dtype='float32')

    print(f"Normalization complete!")
    print(f"Final normalized dataset saved to: {OUTPUT_FILE.resolve()}")

except Exception as e:
    print(f"An error occurred during normalization: {e}")

print("-" * 30)
print("Step 4 complete. You are now ready to load this data and train your model.")

Starting Step 4.1: Calculating global min/max scaler...
Loading data from: /home/work/RUL_AL/Pronostia dataset/PRONOSTIA_preprocessed.h5
Global Min (9 features): [  0.28064013 201.62272644   2.06824088   1.04200006   0.22076094
   0.28056908   4.73460436  -6.49813223   0.27056485]
Global Max (9 features): [1.03396826e+01 2.73687125e+05 4.17076254e+00 4.81479988e+01
 7.83344221e+00 1.03220959e+01 4.03467064e+01 1.67827237e+00
 2.55994296e+00]
Scaler data saved to: /home/work/RUL_AL/Pronostia dataset/x2_scaler.json
------------------------------
Starting Step 4.2: Applying normalization...
Loading scaler from: /home/work/RUL_AL/Pronostia dataset/x2_scaler.json
Normalization complete!
Final normalized dataset saved to: /home/work/RUL_AL/Pronostia dataset/PRONOSTIA_normalized.h5
------------------------------
Step 4 complete. You are now ready to load this data and train your model.


5단계 reference 논문 Hybrid DCNN-based model 구축

In [7]:
import h5py
import numpy as np
import json
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, Dropout, BatchNormalization, Flatten, 
    Dense, Concatenate, ReLU
)
from sklearn.model_selection import train_test_split

# --- 1. 라이브러리 임포트 (위와 동일) ---

# --- 2. 데이터 로드 함수 ---
def load_data_from_hdf5(h5_path):
    """
    정규화된 HDF5 파일에서 6개 베어링 데이터를 모두 로드하여
    하나의 큰 학습 데이터셋으로 합칩니다.
    """
    X1_list = [] # 이미지 (80, 80)
    X2_list = [] # 벡터 (9,)
    y_list = []  # 레이블 (RULP)

    with h5py.File(h5_path, 'r') as hf:
        bearing_names = list(hf.keys())
        for bearing_name in bearing_names:
            X1_list.append(hf[bearing_name]['X1_images'][:])
            X2_list.append(hf[bearing_name]['X2_vectors_norm'][:])
            y_list.append(hf[bearing_name]['y_labels'][:])
    
    # 모든 베어링 데이터를 NumPy 배열로 결합
    X1_all = np.concatenate(X1_list, axis=0)
    X2_all = np.concatenate(X2_list, axis=0)
    y_all = np.concatenate(y_list, axis=0)
    
    # Keras Conv2D는 (batch, height, width, channels) 형식을 기대
    # 채널(grayscale) 차원(1)을 추가
    X1_all = np.expand_dims(X1_all, axis=-1)
    
    return X1_all, X2_all, y_all

# --- 3. DCNN-MLP 듀얼 네트워크 구축 함수 ---
def create_dcnn_mlp_model(input_shape_1=(80, 80, 1), input_shape_2=(9,)):
    """
    참조 논문 [56]의 Figure 8(c) 아키텍처를 기반으로 
    DCNN-MLP 듀얼 입력 모델을 생성합니다. 
    """
    
    # --- Input 1: DCNN (이미지) 타워 ---
    input_1 = Input(shape=input_shape_1, name='Input_1_Image') # [cite: 1373]

    x = Conv2D(12, (4, 4), strides=(2, 2), padding='same', activation='relu')(input_1) # [cite: 1377]
    x = Dropout(0.2)(x) # [cite: 1380]

    x = Conv2D(24, (4, 4), strides=(2, 2), padding='same', activation='relu')(x) # [cite: 1388]
    x = BatchNormalization()(x) # [cite: 1389]
    x = Dropout(0.2)(x) # [cite: 1390]

    x = Conv2D(48, (4, 4), strides=(5, 5), padding='same', activation='relu')(x) # [cite: 1393]
    x = BatchNormalization()(x) # [cite: 1397]
    x = Dropout(0.2)(x) # [cite: 1398]

    x = Conv2D(120, (4, 4), strides=(1, 1), padding='valid', activation='relu')(x) # [cite: 1403]
    
    cnn_out = Flatten(name='cnn_flatten')(x) # [cite: 1406]

    # --- Input 2: MLP (벡터) 타워 ---
    # (참고: 논문 그림 는 25개 피처 기준, 우리는 PRONOSTIA용 9개 피처 사용 )
    input_2 = Input(shape=input_shape_2, name='Input_2_Vector')

    y = Dense(50, activation='relu')(input_2) # [cite: 1417]
    y = Dropout(0.2)(y) # [cite: 1418]
    y = Dense(50, activation='relu')(y) # [cite: 1419]
    mlp_out = y

    # --- 융합 (Concatenate) ---
    concat = Concatenate(name='concatenate')([cnn_out, mlp_out]) # 

    # --- 출력 레이어 ---
    # RULP (0~1 사이)를 예측하므로 'sigmoid' 활성화 함수 사용
    output = Dense(1, activation='sigmoid', name='Output_RULP')(concat) # 

    # 모델 생성
    model = Model(inputs=[input_1, input_2], outputs=output)
    
    return model

# --- 4. 메인 실행 (로드, 빌드, 학습) ---
def main():
    # 4-1. 경로 설정
    HDF5_FILE = Path("PRONOSTIA_normalized.h5")
    
    # 4-2. 데이터 로드
    print("Loading normalized data...")
    if not HDF5_FILE.exists():
        print(f"Error: {HDF5_FILE.name} not found. Please run Step 3 & 4 first.")
        return
        
    X1_data, X2_data, y_data = load_data_from_hdf5(HDF5_FILE)
    print(f"Data loaded successfully.")
    print(f"  X1 (Images) shape: {X1_data.shape}")
    print(f"  X2 (Vectors) shape: {X2_data.shape}")
    print(f"  y (Labels) shape: {y_data.shape}")

    # 4-3. 학습/검증 데이터 분리 (80% 학습, 20% 검증)
    print("Splitting data into train/validation sets...")
    X1_train, X1_val, X2_train, X2_val, y_train, y_val = train_test_split(
        X1_data, X2_data, y_data, 
        test_size=0.2, 
        random_state=42 # 재현성을 위한 random_state
    )

    # 4-4. 모델 빌드 및 컴파일
    print("Building DCNN-MLP model...")
    # 우리 데이터는 9개 피처를 사용 
    model = create_dcnn_mlp_model(input_shape_1=(80, 80, 1), input_shape_2=(9,))
    model.summary() # 모델 구조 출력

    # 논문 [56]의 하이퍼파라미터(Table 4) 참조 
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    
    # RULP 예측은 회귀(regression) 문제이므로 'mean_squared_error' 사용
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

    # 4-5. 모델 학습
    print("Starting model training...")
    history = model.fit(
        [X1_train, X2_train], # 입력은 리스트 형태
        y_train,
        epochs=1500, # 논문은 1500 epoch, 여기선 100으로 축소
        batch_size=20, # [cite: 1441]
        validation_data=([X1_val, X2_val], y_val),
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
    )

    print("Training complete.")

    # 4-6. 모델 저장
    model.save("dcnn_mlp_baseline_model.h5")
    print("Model saved as 'dcnn_mlp_baseline_model.h5'")

if __name__ == "__main__":
    main()

Loading normalized data...
Data loaded successfully.
  X1 (Images) shape: (3100, 80, 80, 1)
  X2 (Vectors) shape: (3100, 9)
  y (Labels) shape: (3100,)
Splitting data into train/validation sets...
Building DCNN-MLP model...


Starting model training...
Epoch 1/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - loss: 0.0685 - mae: 0.2119 - val_loss: 0.0570 - val_mae: 0.1985
Epoch 2/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0554 - mae: 0.1877 - val_loss: 0.0477 - val_mae: 0.1815
Epoch 3/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0494 - mae: 0.1761 - val_loss: 0.0427 - val_mae: 0.1593
Epoch 4/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0448 - mae: 0.1649 - val_loss: 0.0346 - val_mae: 0.1457
Epoch 5/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0451 - mae: 0.1665 - val_loss: 0.0339 - val_mae: 0.1436
Epoch 6/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0420 - mae: 0.1602 - val_loss: 0.0325 - val_mae: 0.1387
Epoch 7/1500
[1m124/124[0m [32m━━━━━━━━━━━━━━━



Training complete.
Model saved as 'dcnn_mlp_baseline_model.h5'


6단계 mc Dropout

In [9]:
import h5py
import numpy as np
import json
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# --- 1. 데이터 로드 함수 (동일) ---
def load_data_from_hdf5(h5_path):
    X1_list = [] # 이미지 (80, 80)
    X2_list = [] # 벡터 (9,)
    y_list = []  # 레이블 (RULP)

    with h5py.File(h5_path, 'r') as hf:
        bearing_names = list(hf.keys())
        for bearing_name in bearing_names:
            X1_list.append(hf[bearing_name]['X1_images'][:])
            X2_list.append(hf[bearing_name]['X2_vectors_norm'][:])
            y_list.append(hf[bearing_name]['y_labels'][:])
    
    X1_all = np.concatenate(X1_list, axis=0)
    X2_all = np.concatenate(X2_list, axis=0)
    y_all = np.concatenate(y_list, axis=0)
    
    # 채널 차원 추가
    X1_all = np.expand_dims(X1_all, axis=-1)
    
    return X1_all, X2_all, y_all

# --- 2. MC Dropout 예측 함수 (수정됨) ---
def get_mc_predictions(model, inputs_list, T=50):
    """
    모델의 Dropout을 활성화(training=True)하여
    T번 반복 예측을 수행합니다.
    """
    all_preds = []
    print(f"Running MC Dropout with T={T} iterations...")
    
    # T번 반복 예측
    for _ in tqdm(range(T)):
        # training=True로 설정하여 Dropout을 강제로 활성화!
        preds = model(inputs_list, training=True)
        all_preds.append(preds)
    
    # (T, N_samples, 1) 형태의 텐서를 (T, N_samples)로 변경
    all_preds_stacked = tf.stack(all_preds)
    all_preds_squeezed = tf.squeeze(all_preds_stacked, axis=-1)
    
    # axis=0 (T축)에 대해 평균과 분산을 계산
    mean_preds = tf.reduce_mean(all_preds_squeezed, axis=0).numpy()
    
    # [수정된 부분] tf.reduce_var -> tf.math.reduce_variance
    variance_preds = tf.math.reduce_variance(all_preds_squeezed, axis=0).numpy()
    
    return mean_preds, variance_preds

# --- 3. 메인 실행 (동일) ---
def main():
    # 3-1. 경로 및 파일 설정
    HDF5_FILE = Path("PRONOSTIA_normalized.h5")
    MODEL_FILE = Path("dcnn_mlp_baseline_model.h5")
    T_ITERATIONS = 50 # 몬테카를로 반복 횟수

    # 3-2. 모델 로드
    print(f"Loading baseline model from: {MODEL_FILE.resolve()}")
    if not MODEL_FILE.exists():
        print("Error: Model file not found. Please run Step 5 first.")
        return
    model = load_model(MODEL_FILE)

    # 3-3. 데이터 로드 및 분리 (5단계와 동일하게)
    print(f"Loading data from: {HDF5_FILE.resolve()}")
    X1_data, X2_data, y_data = load_data_from_hdf5(HDF5_FILE)
    
    # 5단계와 동일한 검증셋을 얻기 위해 random_state=42 유지
    _, X1_val, _, X2_val, _, y_val = train_test_split(
        X1_data, X2_data, y_data, 
        test_size=0.2, 
        random_state=42
    )
    print(f"Using {len(y_val)} samples from validation set for demonstration.")

    # 3-4. MC Dropout 실행
    # 전체 검증 데이터셋에 대해 한 번에 계산
    mean_predictions, variance_predictions = get_mc_predictions(
        model, 
        [X1_val, X2_val], 
        T=T_ITERATIONS
    )

    # 3-5. 결과 샘플 확인
    print("\n--- MC Dropout Prediction Results (Sample 0) ---")
    sample_idx = 0
    
    print(f"True RULP:              {y_val[sample_idx]:.6f}")
    print(f"Predicted RULP (Mean):  {mean_predictions[sample_idx]:.6f}")
    print(f"Uncertainty (Variance): {variance_predictions[sample_idx]:.6f}")
    
    print("\n--- MC Dropout Prediction Results (Sample 10) ---")
    sample_idx = 10
    
    print(f"True RULP:              {y_val[sample_idx]:.6f}")
    print(f"Predicted RULP (Mean):  {mean_predictions[sample_idx]:.6f}")
    print(f"Uncertainty (Variance): {variance_predictions[sample_idx]:.6f}")
    
    # 3-6. Active Learning을 위한 데이터 준비
    # Active Learning 시나리오에서는 이 '분산' 값을 기준으로 데이터를 정렬합니다.
    # 분산이 가장 큰 샘플의 인덱스를 찾기
    highest_uncertainty_idx = np.argmax(variance_predictions)
    
    print("\n--- Active Learning Query ---")
    print(f"Sample with highest uncertainty (Index): {highest_uncertainty_idx}")
    print(f"  > True RULP:              {y_val[highest_uncertainty_idx]:.6f}")
    print(f"  > Predicted RULP (Mean):  {mean_predictions[highest_uncertainty_idx]:.6f}")
    print(f"  > Uncertainty (Variance): {variance_predictions[highest_uncertainty_idx]:.6f} (Max)")

if __name__ == "__main__":
    main()



Loading baseline model from: /home/work/RUL_AL/Pronostia dataset/dcnn_mlp_baseline_model.h5
Loading data from: /home/work/RUL_AL/Pronostia dataset/PRONOSTIA_normalized.h5
Using 620 samples from validation set for demonstration.
Running MC Dropout with T=50 iterations...


100% 50/50 [00:01<00:00, 43.91it/s]


--- MC Dropout Prediction Results (Sample 0) ---
True RULP:              0.517473
Predicted RULP (Mean):  0.406294
Uncertainty (Variance): 0.003002

--- MC Dropout Prediction Results (Sample 10) ---
True RULP:              0.931452
Predicted RULP (Mean):  0.924740
Uncertainty (Variance): 0.000378

--- Active Learning Query ---
Sample with highest uncertainty (Index): 471
  > True RULP:              0.000000
  > Predicted RULP (Mean):  0.214359
  > Uncertainty (Variance): 0.018519 (Max)



