In [1]:
import pandas as pd
import numpy as np
from dtaidistance import dtw
import matplotlib.pyplot as plt
import fastdtw
from scipy.spatial.distance import euclidean

In [2]:
data = pd.read_csv('C://python//智慧製造大數據競賽//2024-pre-train//combined_data.csv')

In [3]:
data_test =pd.read_csv("C://python//智慧製造大數據競賽//2024-pre-train//2024-testing.csv")

In [4]:
data_test_subset = data_test.iloc[:50, :11]

In [41]:
results = []
# 獲取所有的 Material 和 Condition 組合
materials = data['Material'].unique()
conditions = data['Condition'].unique()

In [18]:
# 逐一對 y01~y10 進行獨立 DTW 檢測
for y_col in ['y01', 'y02', 'y03', 'y04', 'y05', 'y06', 'y07', 'y08', 'y09', 'y10']:
    for material in materials:
        for condition in conditions:
            # 篩選出對應的 material 和 condition
            data_subset = data[(data['Material'] == material) & (data['Condition'] == condition)]
            data_subset = data_subset.iloc[:50]  # 取前50筆數據
            if data_subset.shape[0] < 50:
                continue  # 如果該 material-condition 沒有足夠的數據，跳過

            # 進行 DTW 比較（將每個 y 序列作為 1-D 向量）
            test_values = data_test_subset[y_col].values  # 1-D array
            train_values = data_subset[y_col]  # 1-D array
            # print(y_col,material,condition,test_values.shape,train_values.shape)
            dist, path = fastdtw(test_values, train_values, dist=2)

            # 保存結果
            results.append({
                'y_col': y_col,
                'material': material,
                'condition': condition,
                'dtw_distance': dist
            })

# 將結果轉換為 DataFrame 並按 DTW 距離排序
results_df = pd.DataFrame(results)
sorted_results = results_df.sort_values(by='dtw_distance')

# 顯示結果
print(sorted_results.head(10))  # 查看DTW距離最小的前10個結果

     y_col  material condition  dtw_distance
81     y02         4         b         109.0
841    y02         4         b         109.0
373    y02         4         b         109.0
386    y02         7         c         163.0
854    y02         7         c         163.0
94     y02         7         c         163.0
1114   y07         7         c         187.0
646    y07         7         c         187.0
438    y03         7         c         203.0
906    y03         7         c         203.0


In [21]:
# 按 Condition 分組，並對每個 Condition 的 DTW 距離求和
sum_dtw_by_condition = results_df.groupby('condition')['dtw_distance'].sum().reset_index()

# 顯示每個 Condition 的 DTW 距離總和
print("每個 Condition 的 DTW 距離總和為:")
print(sum_dtw_by_condition)

每個 Condition 的 DTW 距離總和為:
  condition  dtw_distance
0         a    19653739.0
1         b    16251980.0
2         c    14151841.0
3         d    15997521.0
