In [1]:
%reset -f

import gc
gc.collect()  # 強制 Python 回收記憶體

0

In [2]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [3]:
date = 250423
target_points = 1024
# 設定目錄
summary_file = f"./combine/combined_{date}.csv"  # 你的總表
waveform_dir = "./waveforms"  # 波形檔案存放處
output_dir = f"./processed_data/{date}"  # 輸出目錄
output_dir_train = f"./processed_data/{date}/train"
output_dir_test = f"./processed_data/{date}/test"
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_dir_train, exist_ok=True)
os.makedirs(output_dir_test, exist_ok=True)


In [4]:
# # 定義自適應下採樣函式
# def adaptive_downsample_indices(signal, target_points):
#     """
#     根據信號的離散差分計算累積分佈函數 (CDF)，
#     並於 CDF 上等間距抽取 target_points 個點作為下採樣索引，
#     使得信號變化大的區段取得較多取樣點。
#     """
#     # 計算相鄰點差分與其絕對值
#     diff_signal = np.diff(signal)
#     abs_diff = np.abs(diff_signal)

#     # 計算累積分佈函數 (CDF)
#     cdf = np.cumsum(abs_diff)
#     if cdf[-1] == 0:
#         # 若信號變化極小，則採均勻取樣
#         indices = np.linspace(0, len(signal) - 1, target_points, dtype=int)
#         return indices
#     cdf = cdf / cdf[-1]  # 正規化至 [0, 1]

#     # 在 [0, 1] 區間均勻分布 target_points 個數值
#     desired_vals = np.linspace(0, 1, target_points)
#     indices = np.searchsorted(cdf, desired_vals)

#     # 保證索引不超出範圍且最後一點被選取
#     indices[indices >= len(signal)] = len(signal) - 1
#     if indices[-1] != len(signal) - 1:
#         indices[-1] = len(signal) - 1

#     # 若平坦區段出現重複索引，保留唯一索引（依需求調整）
#     indices = np.unique(indices)
#     return indices


In [5]:
# 讀取總表
df = pd.read_csv(summary_file)

# 讀取總表
df = pd.read_csv(summary_file)

# 建立空的列表來存儲機器學習格式的數據
B_field_list = []
H_field_list = []
Duty_P_list = []
Duty_N_list = []
Turns_list = []
Hdc_list = []
frequency_list = []
temperature_list = []
volumetric_loss_list = []

# 處理每一筆測試點資料
for index, row in tqdm(df.iterrows(), total=len(df), desc="處理測試點"):
    wave_file = str(row["Wave_file_name"])  # 取得對應的波形檔案名稱

    if wave_file.lower() == "none" or wave_file.strip() == "":
        continue  # 跳過無效數據

    wave_path = os.path.join(waveform_dir, wave_file)  # 取得完整路徑

    # 檢查檔案是否存在
    if not os.path.exists(wave_path):
        print(f"檔案 {wave_file} 找不到，跳過...")
        continue

    # 讀取 B、H 波形數據
    wave_data = pd.read_csv(wave_path)

    # 假設 CSV 檔案格式固定：
    # H DATA(A/m) | B DATA(T) | I DATA(A) | V DATA(V)

    H_wave = wave_data.iloc[:, 0].values  # H DATA(A/m)
    B_wave = wave_data.iloc[:, 1].values  # B DATA(T)

    # # 將波形資料降階成 1024 點
    # num_points = 1024
    # n_org = len(H_wave)
    # if n_org != num_points:
    #     x_old = np.linspace(0, 1, n_org)
    #     x_new = np.linspace(0, 1, num_points)
    #     H_wave = np.interp(x_new, x_old, H_wave)
    #     B_wave = np.interp(x_new, x_old, B_wave)

    # 存入列表
    H_field_list.append(H_wave)
    B_field_list.append(B_wave)

    # 存入其他數據
    frequency_list.append(row["Frequency(kHz)"])
    temperature_list.append(row["Temp"])
    volumetric_loss_list.append(row["Core_Loss(Pcv(kW/m3))"])
    Duty_P_list.append(row["Duty_P"])
    Duty_N_list.append(row["Duty_N"])
    Hdc_list.append(row["Hdc(A/m)"])
    Turns_list.append(row["N1"])


處理測試點:   0%|          | 0/2688 [00:00<?, ?it/s]

處理測試點:  53%|█████▎    | 1437/2688 [00:17<00:13, 94.83it/s] 

檔案 CH467160_DC_TRI_100k_20mT_N20_D0.6_ALL-3_Norm..csv 找不到，跳過...


處理測試點: 100%|██████████| 2688/2688 [00:32<00:00, 81.61it/s] 


### 全部合併一個大檔案

In [6]:
# 轉換為 DataFrame，並讓 **每行代表一筆測試點**
H_field_df = pd.DataFrame(H_field_list)
B_field_df = pd.DataFrame(B_field_list)
frequency_df = pd.DataFrame(frequency_list)
temperature_df = pd.DataFrame(temperature_list)
Duty_P_df = pd.DataFrame(Duty_P_list)
Duty_N_df = pd.DataFrame(Duty_N_list)
Turns_df = pd.DataFrame(Turns_list)
Hdc_df = pd.DataFrame(Hdc_list)
volumetric_loss_df = pd.DataFrame(volumetric_loss_list)

# 儲存 CSV
H_field_df.to_csv(os.path.join(output_dir, "H_Field.csv"),
                  index=False,
                  header=False)
B_field_df.to_csv(os.path.join(output_dir, "B_Field.csv"),
                  index=False,
                  header=False)
frequency_df.to_csv(os.path.join(output_dir, "Frequency.csv"),
                    index=False,
                    header=False)
temperature_df.to_csv(os.path.join(output_dir, "Temperature.csv"),
                      index=False,
                      header=False)
Duty_P_df.to_csv(os.path.join(output_dir, "Duty_P.csv"),
                 index=False,
                 header=False)
Duty_N_df.to_csv(os.path.join(output_dir, "Duty_N.csv"),
                 index=False,
                 header=False)
Turns_df.to_csv(os.path.join(output_dir, "Turns.csv"),
                index=False,
                header=False)
Hdc_df.to_csv(os.path.join(output_dir, "Hdc.csv"), index=False, header=False)
volumetric_loss_df.to_csv(os.path.join(output_dir, "Volumetric_Loss.csv"),
                          index=False,
                          header=False)

print(f"數據拆分完成，已存入 ./processed_data/{date} 資料夾")

數據拆分完成，已存入 ./processed_data/250423 資料夾


### 分成Train 跟 Test


In [7]:
# 合併成一個大的DataFrame
full_data = pd.DataFrame({
    'H_field': H_field_list,
    'B_field': B_field_list,
    'Frequency': frequency_list,
    'Temperature': temperature_list,
    'Duty_P': Duty_P_list,
    'Duty_N': Duty_N_list,
    'Turns': Turns_list,
    'Hdc': Hdc_list,
    'Volumetric_Loss': volumetric_loss_list
})

# 隨機打亂並分割數據 (80%訓練，20%測試)
train_df, test_df = train_test_split(full_data,
                                     test_size=0.1,
                                     random_state=42,
                                     shuffle=True)

# 儲存資料
for dataset, output_dir in zip([train_df, test_df],
                               [output_dir_train, output_dir_test]):
    pd.DataFrame(dataset['H_field'].tolist()).to_csv(os.path.join(
        output_dir, "H_Field.csv"),
                                                     index=False,
                                                     header=False)
    pd.DataFrame(dataset['B_field'].tolist()).to_csv(os.path.join(
        output_dir, "B_Field.csv"),
                                                     index=False,
                                                     header=False)
    dataset[['Frequency']].to_csv(os.path.join(output_dir, "Frequency.csv"),
                                  index=False,
                                  header=False)
    dataset[['Temperature']].to_csv(os.path.join(output_dir,
                                                 "Temperature.csv"),
                                    index=False,
                                    header=False)
    dataset[['Duty_P']].to_csv(os.path.join(output_dir, "Duty_P.csv"),
                               index=False,
                               header=False)
    dataset[['Duty_N']].to_csv(os.path.join(output_dir, "Duty_N.csv"),
                               index=False,
                               header=False)
    dataset[['Turns']].to_csv(os.path.join(output_dir, "Turns.csv"),
                              index=False,
                              header=False)
    dataset[['Hdc']].to_csv(os.path.join(output_dir, "Hdc.csv"),
                            index=False,
                            header=False)
    dataset[['Volumetric_Loss']].to_csv(os.path.join(output_dir,
                                                     "Volumetric_Loss.csv"),
                                        index=False,
                                        header=False)

print(
    f"數據拆分完成，已存入 ./processed_data/{date}/train 與 ./processed_data/{date}/test 資料夾"
)


數據拆分完成，已存入 ./processed_data/250423/train 與 ./processed_data/250423/test 資料夾
