In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
import json
import os 
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
import joblib  # 用於保存模型

# 訓練模型

In [2]:
selected_columns = ['Label',
                        'AP1_Distance (mm)','AP3_Distance (mm)',
                        'AP1_StdDev (mm)','AP3_StdDev (mm)',
                                'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi'
                                ]  

what_data = "DNN_3Layer_2AP_Best_combine_2FTM_20241214_usingearlystopepchoc10000"
# what_data = "DNN_3Layer_1AP_Best_combine_F_S_R_20241214_usingearlystopepchoc10000"

# 'AP1_Distance (mm)',
# 'AP1_StdDev (mm)',

In [3]:
# 1. 讀取資料
file_path = "timestamp_allignment_Balanced_2024_12_14_rtt_logs.csv"

date = "2024_12_14"

data = pd.read_csv(file_path, usecols=selected_columns)

print(data.head())

  Label  AP1_Distance (mm)  AP3_Distance (mm)  AP1_Rssi  AP2_Rssi  AP3_Rssi  \
0  1-11              718.0             7432.0     -60.0     -68.0     -61.0   
1  1-11              639.0             6930.0     -62.0     -70.0     -59.0   
2  1-11              678.0             7471.0     -61.0     -69.0     -62.0   
3  1-11              718.0             7627.0     -62.0     -69.0     -63.0   
4  1-11              561.0             7471.0     -61.0     -70.0     -62.0   

   AP4_Rssi  AP1_StdDev (mm)  AP3_StdDev (mm)  
0     -59.0            264.0            325.0  
1     -59.0            418.0            799.0  
2     -55.0            311.0            385.0  
3     -55.0            324.0            268.0  
4     -55.0            343.0            312.0  


In [4]:
label_to_coordinates = {
    "1-1": (0, 0), "1-2": (0.6, 0), "1-3": (1.2, 0), "1-4": (1.8, 0), "1-5": (2.4, 0), "1-6": (3.0, 0),"1-7": (3.6, 0), "1-8": (4.2, 0), "1-9": (4.8, 0), "1-10": (5.4, 0), "1-11": (6.0, 0),
    "2-1": (0, 0.6), "2-11": (6.0, 0.6),
    "3-1": (0, 1.2), "3-11": (6.0, 1.2),
    "4-1": (0, 1.8), "4-11": (6.0, 1.8),
    "5-1": (0, 2.4), "5-11": (6.0, 2.4),
    "6-1": (0, 3.0), "6-2": (0.6, 3.0), "6-3": (1.2, 3.0), "6-4": (1.8, 3.0), "6-5": (2.4, 3.0),"6-6": (3.0, 3.0), "6-7": (3.6, 3.0), "6-8": (4.2, 3.0), "6-9": (4.8, 3.0), "6-10": (5.4, 3.0), "6-11": (6.0, 3.0),
    "7-1": (0, 3.6), "7-11": (6.0, 3.6),
    "8-1": (0, 4.2), "8-11": (6.0, 4.2),
    "9-1": (0, 4.8), "9-11": (6.0, 4.8),
    "10-1": (0, 5.4), "10-11": (6.0, 5.4),
    "11-1": (0, 6.0), "11-2": (0.6, 6.0), "11-3": (1.2, 6.0), "11-4": (1.8, 6.0), "11-5": (2.4, 6.0),"11-6": (3.0, 6.0), "11-7": (3.6, 6.0), "11-8": (4.2, 6.0), "11-9": (4.8, 6.0), "11-10": (5.4, 6.0), "11-11": (6.0, 6.0)
}

In [5]:
label_mapping = {
    '11': '1-1','10': '1-2','9': '1-3','8': '1-4','7': '1-5','6': '1-6','5': '1-7','4': '1-8','3': '1-9','2': '1-10','1': '1-11',
    '12': '2-1','30': '2-11',
    '13': '3-1','29': '3-11',
    '14': '4-1','28': '4-11',
    '15': '5-1','27': '5-11',
    '16': '6-1','17': '6-2','18': '6-3','19': '6-4','20': '6-5','21': '6-6','22': '6-7','23': '6-8','24': '6-9','25': '6-10','26': '6-11',
    '49': '7-1','31': '7-11',
    '48': '8-1','32': '8-11',
    '47': '9-1','33': '9-11',
    '46': '10-1','34': '10-11',
    '45': '11-1','44': '11-2','43': '11-3','42': '11-4','41': '11-5','40': '11-6','39': '11-7','38': '11-8','37': '11-9','36': '11-10','35': '11-11'
}

In [6]:
# target_column = 'Label'  # 替換成目標欄位名稱
# # 替換目標欄位的 label
# data[target_column] = data[target_column].astype(str).map(label_mapping)
# # 確認是否有未映射的值
# unmapped_values = data[target_column][data[target_column].isna()].unique()
# if len(unmapped_values) > 0:
#     print("以下值未找到對應的映射：", unmapped_values)

In [7]:
test_size=0.2
val_size=0.1

target_column = 'Label'  # 替換成目標欄位名稱

In [8]:
label_column = 'Label'

In [9]:
# 資料前處理 (一): 刪除前後n筆資料
n = 1
# 確保依據Label排序
data = data.sort_values(by=label_column).reset_index(drop=True)

# 建立一個空的 DataFrame 用於存放處理後的資料
processed_data = pd.DataFrame(columns=data.columns)

# 針對每個Label群組進行處理
for label, group in data.groupby(label_column):
    # 刪除前n筆和後n筆資料
    if len(group) > 2 * n:  # 確保群組資料足夠
        group = group.iloc[n:-n]
    else:
        group = pd.DataFrame()  # 若資料不足，刪除整個群組
    # 將處理後的群組資料加入
    processed_data = pd.concat([processed_data, group], ignore_index=True)

processed_data


  processed_data = pd.concat([processed_data, group], ignore_index=True)


Unnamed: 0,Label,AP1_Distance (mm),AP3_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP1_StdDev (mm),AP3_StdDev (mm)
0,1-1,3877.0,11494.0,-51.0,-73.0,-67.0,-55.0,895.0,607.0
1,1-1,4155.0,11807.0,-63.0,-73.0,-67.0,-56.0,447.0,434.0
2,1-1,3682.0,11533.0,-53.0,-73.0,-67.0,-55.0,887.0,626.0
3,1-1,4311.0,11728.0,-58.0,-73.0,-67.0,-56.0,420.0,501.0
4,1-1,4155.0,12354.0,-61.0,-73.0,-65.0,-53.0,584.0,206.0
...,...,...,...,...,...,...,...,...,...
19546,9-11,1733.0,3555.0,-58.0,-69.0,-57.0,-69.0,932.0,252.0
19547,9-11,1694.0,2978.0,-57.0,-69.0,-51.0,-65.0,1247.0,304.0
19548,9-11,1577.0,2979.0,-58.0,-70.0,-52.0,-65.0,955.0,373.0
19549,9-11,1850.0,2861.0,-57.0,-69.0,-51.0,-64.0,982.0,434.0


In [10]:
# Calculate the number of rows with NaN values
nan_rows = processed_data.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = processed_data[processed_data.isnull().any(axis=1)]

# 印出這些列
print("Rows with NaN values:")
print(rows_with_nan)

Number of rows with NaN values: 1040
Rows with NaN values:
      Label  AP1_Distance (mm)  AP3_Distance (mm)  AP1_Rssi  AP2_Rssi  \
5       1-1             4116.0            11807.0     -64.0     -73.0   
43      1-1             4311.0                NaN     -63.0     -72.0   
65      1-1             4116.0            11963.0     -62.0     -72.0   
68      1-1                NaN                NaN       NaN       NaN   
79      1-1                NaN            12354.0       NaN       NaN   
...     ...                ...                ...       ...       ...   
19399  9-11                NaN                NaN       NaN     -69.0   
19418  9-11             1538.0                NaN     -58.0     -70.0   
19440  9-11                NaN                NaN       NaN       NaN   
19500  9-11                NaN                NaN       NaN       NaN   
19526  9-11             1577.0             2783.0     -57.0       NaN   

       AP3_Rssi  AP4_Rssi  AP1_StdDev (mm)  AP3_StdDev (mm)  
5 

In [11]:
data_imputed = processed_data.groupby(label_column).apply(
    lambda group: group.fillna(group.mean())
).reset_index()
data_imputed

Unnamed: 0,Label,level_1,AP1_Distance (mm),AP3_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP1_StdDev (mm),AP3_StdDev (mm)
0,1-1,0,3877.0,11494.0,-51.0,-73.0,-67.0,-55.0,895.0,607.0
1,1-1,1,4155.0,11807.0,-63.0,-73.0,-67.0,-56.0,447.0,434.0
2,1-1,2,3682.0,11533.0,-53.0,-73.0,-67.0,-55.0,887.0,626.0
3,1-1,3,4311.0,11728.0,-58.0,-73.0,-67.0,-56.0,420.0,501.0
4,1-1,4,4155.0,12354.0,-61.0,-73.0,-65.0,-53.0,584.0,206.0
...,...,...,...,...,...,...,...,...,...,...
19546,9-11,19546,1733.0,3555.0,-58.0,-69.0,-57.0,-69.0,932.0,252.0
19547,9-11,19547,1694.0,2978.0,-57.0,-69.0,-51.0,-65.0,1247.0,304.0
19548,9-11,19548,1577.0,2979.0,-58.0,-70.0,-52.0,-65.0,955.0,373.0
19549,9-11,19549,1850.0,2861.0,-57.0,-69.0,-51.0,-64.0,982.0,434.0


In [12]:
# 建立 Label 映射
y = data_imputed[target_column]
# label_mapping = {str(i): label for i, label in enumerate(y.unique())}
reverse_label_mapping = {v: int(k) - 1 for k, v in label_mapping.items()}  # 讓數字標籤 -1
y_numeric = y.map(reverse_label_mapping)

print("Final reverse_label_mapping in DNN:", reverse_label_mapping)
print("y_numeric unique values in DNN:", y_numeric.unique())

Final reverse_label_mapping in DNN: {'1-1': 10, '1-2': 9, '1-3': 8, '1-4': 7, '1-5': 6, '1-6': 5, '1-7': 4, '1-8': 3, '1-9': 2, '1-10': 1, '1-11': 0, '2-1': 11, '2-11': 29, '3-1': 12, '3-11': 28, '4-1': 13, '4-11': 27, '5-1': 14, '5-11': 26, '6-1': 15, '6-2': 16, '6-3': 17, '6-4': 18, '6-5': 19, '6-6': 20, '6-7': 21, '6-8': 22, '6-9': 23, '6-10': 24, '6-11': 25, '7-1': 48, '7-11': 30, '8-1': 47, '8-11': 31, '9-1': 46, '9-11': 32, '10-1': 45, '10-11': 33, '11-1': 44, '11-2': 43, '11-3': 42, '11-4': 41, '11-5': 40, '11-6': 39, '11-7': 38, '11-8': 37, '11-9': 36, '11-10': 35, '11-11': 34}
y_numeric unique values in DNN: [10  1  0  9  8  7  6  5  4  3  2 45 33 44 35 34 43 42 41 40 39 38 37 36
 11 29 12 28 13 27 14 26 15 24 25 16 17 18 19 20 21 22 23 48 30 47 31 46
 32]


In [13]:
# 把label部分拿掉
X = data_imputed.drop(columns=['level_1','Label'])

scaler = StandardScaler()
columns_to_scale = selected_columns.copy()  # 建立副本，避免影響原始變數
columns_to_scale.remove('Label')  # 在副本上移除 'Label'
X_scaled = scaler.fit_transform(X[columns_to_scale])

# 保存標準化器
joblib.dump(scaler, f'scaler_{what_data}.pkl')

print(X_scaled[0])

X

[ 0.46727709  1.76386251 -0.02946558 -0.18632946  2.00996662 -1.05985233
 -1.6741561   0.41595499]


Unnamed: 0,AP1_Distance (mm),AP3_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP1_StdDev (mm),AP3_StdDev (mm)
0,3877.0,11494.0,-51.0,-73.0,-67.0,-55.0,895.0,607.0
1,4155.0,11807.0,-63.0,-73.0,-67.0,-56.0,447.0,434.0
2,3682.0,11533.0,-53.0,-73.0,-67.0,-55.0,887.0,626.0
3,4311.0,11728.0,-58.0,-73.0,-67.0,-56.0,420.0,501.0
4,4155.0,12354.0,-61.0,-73.0,-65.0,-53.0,584.0,206.0
...,...,...,...,...,...,...,...,...
19546,1733.0,3555.0,-58.0,-69.0,-57.0,-69.0,932.0,252.0
19547,1694.0,2978.0,-57.0,-69.0,-51.0,-65.0,1247.0,304.0
19548,1577.0,2979.0,-58.0,-70.0,-52.0,-65.0,955.0,373.0
19549,1850.0,2861.0,-57.0,-69.0,-51.0,-64.0,982.0,434.0


In [14]:
# 設定 KFold 交叉驗證
kf = KFold(n_splits=5, shuffle=True, random_state=42)
all_y_test, all_y_test_pred = [], []
overall_accuracy, overall_mde = [], []
mde_report, fold_reports = {}, []
fold_index = 1

mde_report_per_fold = {}

# 初始化最佳 MDE 變數
best_mde = float('inf')  # 設為無限大，確保第一個 fold 一定會更新
best_model = None  # 儲存最佳模型

In [15]:
print("X type")
print(X.dtypes)
print("y type")
print(y.dtypes)

X type
AP1_Distance (mm)    float64
AP3_Distance (mm)    float64
AP1_Rssi             float64
AP2_Rssi             float64
AP3_Rssi             float64
AP4_Rssi             float64
AP1_StdDev (mm)      float64
AP3_StdDev (mm)      float64
dtype: object
y type
object


### 5 fold

In [16]:
# 進行 5-Fold 交叉驗證
for train_index, test_index in kf.split(X_scaled):
    #original
    # X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    # y_train, y_test = y_numeric.iloc[train_index], y_numeric.iloc[test_index]

    X_train_full, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train_full, y_test = y_numeric.iloc[train_index], y_numeric.iloc[test_index]

    # # 從 Train Data 再切 10% 給 Validation Set
    # X_train, X_val, y_train, y_val = train_test_split(
    #     X_train_full, y_train_full, test_size=0.1, random_state=42, stratify=y_train_full
    # )
    X_train = X_train_full
    y_train = y_train_full

    

    # 建立 DNN 模型
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(len(label_mapping), activation='softmax')
    ])

    # 編譯模型
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # 設定 EarlyStopping 回呼函數
    early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

    # 訓練模型，包含驗證集
    model.fit(X_train, y_train, 
              
              epochs=10000, batch_size=32, verbose=0, callbacks=[early_stop])
    #  validation_data=(X_val, y_val),  # ✅ 這裡新增驗證集
    

    # 進行預測
    y_test_pred_numeric = model.predict(X_test)
    y_pred_classes = np.argmax(y_test_pred_numeric, axis=1)
    y_test_pred_labels = [label_mapping[str(num + 1)] for num in y_pred_classes]  # 補回 +1


    # 計算 MDE
    y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_test_pred_labels])
    y_test_coordinates = np.array([label_to_coordinates[label_mapping[str(label + 1)]] for label in y_test])


    distances = np.linalg.norm(y_test_pred_coordinates - y_test_coordinates, axis=1)
    fold_mde = np.mean(distances)
    overall_mde.append(fold_mde)

   



    # 檢查是否為最佳模型
    if fold_mde < best_mde:
        best_mde = fold_mde
        best_model = model  # 更新最佳模型
        print(f"-> New best model found at Fold {fold_index} with MDE: {best_mde:.4f}")

        mde_report_per_fold = {}

         # 記錄每個 RP 在當前 fold 的 MDE
        for true_label, distance in zip(y_test, distances):
            if true_label not in mde_report_per_fold:
                mde_report_per_fold[true_label] = []
            mde_report_per_fold[true_label].append(distance)  # 存所有 fold 的 MDE

        ## mde every RP
        mde_report_per_fold_each = {}
        # 記錄每個 RP 在當前 fold 的 MDE
        for true_label, distance in zip(y_test, distances):
            true_label = int(true_label)  # 確保鍵是 int
            if true_label not in mde_report_per_fold_each:
                mde_report_per_fold_each[true_label] = []
            mde_report_per_fold_each[true_label].append(distance)  # 存所有 fold 的 MDE

        # 計算 5-Fold 平均 MDE，並記錄 MDE 不為 0 的 error 值
        mde_report_avg_each = {}
        
        for label, dist_list in mde_report_per_fold_each.items():
            mean_dist = np.mean(dist_list)
            count = len(dist_list)

            # 如果 mean_dist > 0，則記錄個別的 error 值
            error_dict = {str(idx + 1): float(d) for idx, d in enumerate(dist_list) if d > 0}

            # 建構最終輸出格式
            mde_report_avg_each[int(label)] = {
                "mde": mean_dist,
                "count": count
            }

            # 只有當 MDE 大於 0 時才存儲 error 值
            if error_dict:
                mde_report_avg_each[int(label)]["error"] = error_dict


    

    fold_index += 1
    

# 計算 5-Fold 平均 MDE
# mde_report_avg = {label: {"mde": np.mean(distances), "count": len(distances)} for label, distances in mde_report_per_fold.items()}

mde_report_avg = {int(label): {"mde": np.mean(distances), "count": len(distances)} 
                   for label, distances in mde_report_per_fold.items()}

# 儲存到 JSON 檔案
file_path = "Testing_5fold_mde"
with open(file_path, "w") as f:
    json.dump(mde_report_avg, f, indent=4)

print(f"5-Fold Average MDE report saved to: {file_path}")


# 儲存到 JSON 檔案
file_path = f"Testing_5fold_mde_each.json"
with open(file_path, "w") as f:
    json.dump(mde_report_avg_each, f, indent=4)

print(f"MDE report saved to: {file_path}")

# 儲存最佳模型
if best_model:
    best_model.save(f"DNN_best_model{what_data}.h5")
    print(f"Best DNN model saved with MDE: {best_mde:.4f}")
else:
    print("No best model found!")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 899us/step
-> New best model found at Fold 1 with MDE: 0.0144
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 955us/step
-> New best model found at Fold 2 with MDE: 0.0094
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 951us/step
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 951us/step
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
-> New best model found at Fold 5 with MDE: 0.0064




5-Fold Average MDE report saved to: Testing_5fold_mde
MDE report saved to: Testing_5fold_mde_each.json
Best DNN model saved with MDE: 0.0064


In [17]:
# 計算 5-Fold 平均 MDE
mean_mde = np.mean(overall_mde)
print(f"\n5-Fold Mean MDE: {mean_mde:.4f} meters")


5-Fold Mean MDE: 0.0108 meters


# 測試模型

In [18]:
X_testing_selected_columns = ['AP1_Distance (mm)','AP3_Distance (mm)',
                              'AP1_StdDev (mm)','AP3_StdDev (mm)',

                                'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']  
# 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
# 'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)',

In [19]:
# 讀取測試資料
test_file_path = "timestamp_allignment_Balanced_2025_02_28_rtt_logs.csv"  # 測試資料的檔案名稱
date_test = "2025_02_28"
modelname = "DNN only distance mcAPs"
test_data = pd.read_csv(test_file_path, usecols=selected_columns)
# test_data

In [20]:
# 資料前處理 (一): 刪除前後n筆資料
n = 20
# 確保依據Label排序
test_data = test_data.sort_values(by=label_column).reset_index(drop=True)

# 建立一個空的 DataFrame 用於存放處理後的資料
test_processed_data = pd.DataFrame(columns=test_data.columns)

# 針對每個Label群組進行處理
for label, group in test_data.groupby(label_column):
    # 刪除前n筆和後n筆資料
    if len(group) > 2 * n:  # 確保群組資料足夠
        group = group.iloc[n:-n]
    else:
        group = pd.DataFrame()  # 若資料不足，刪除整個群組
    # 將處理後的群組資料加入
    test_processed_data = pd.concat([test_processed_data, group], ignore_index=True)

# test_processed_data

  test_processed_data = pd.concat([test_processed_data, group], ignore_index=True)


In [21]:
# Calculate the number of rows with NaN values
nan_rows = test_processed_data.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = test_processed_data[test_processed_data.isnull().any(axis=1)]

# # 印出這些列
# print("Rows with NaN values:")
# print(rows_with_nan)

Number of rows with NaN values: 1026


In [22]:
test_data_imputed = test_processed_data.groupby(label_column).apply(
    lambda group: group.fillna(group.mean())
).reset_index()

# Calculate the number of rows with NaN values
nan_rows = test_data_imputed.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = test_data_imputed[test_data_imputed.isnull().any(axis=1)]

# test_data_imputed

Number of rows with NaN values: 0


In [23]:
# 建立 Label 映射
y_test = test_data_imputed[target_column]
y_test_numeric = y_test.map(reverse_label_mapping)

print("Final reverse_label_mapping in DNN:", reverse_label_mapping)
print("y_numeric unique values in DNN:", y_test_numeric.unique())

# 把label部分拿掉
X_test = test_data_imputed.drop(columns=['level_1','Label'])
# 確保測試資料的特徵與訓練資料的特徵一致
X_test = X_test[X_testing_selected_columns]  # 選取相同的特徵


# y_test_numeric

Final reverse_label_mapping in DNN: {'1-1': 10, '1-2': 9, '1-3': 8, '1-4': 7, '1-5': 6, '1-6': 5, '1-7': 4, '1-8': 3, '1-9': 2, '1-10': 1, '1-11': 0, '2-1': 11, '2-11': 29, '3-1': 12, '3-11': 28, '4-1': 13, '4-11': 27, '5-1': 14, '5-11': 26, '6-1': 15, '6-2': 16, '6-3': 17, '6-4': 18, '6-5': 19, '6-6': 20, '6-7': 21, '6-8': 22, '6-9': 23, '6-10': 24, '6-11': 25, '7-1': 48, '7-11': 30, '8-1': 47, '8-11': 31, '9-1': 46, '9-11': 32, '10-1': 45, '10-11': 33, '11-1': 44, '11-2': 43, '11-3': 42, '11-4': 41, '11-5': 40, '11-6': 39, '11-7': 38, '11-8': 37, '11-9': 36, '11-10': 35, '11-11': 34}
y_numeric unique values in DNN: [10  1  0  9  8  7  6  5  4  3  2 45 33 44 35 34 43 42 41 40 39 38 37 36
 11 29 12 28 13 27 14 26 15 24 25 16 17 18 19 20 21 22 23 48 30 47 31 46
 32]


In [24]:
# 使用之前訓練時的標準化器 (scaler) 來標準化測試數據
X_test_scaled = scaler.transform(X_test)

print(len(X_test_scaled))

17934


### 預測


In [25]:
best_model.evaluate(X_test_scaled, y_test_numeric)  # 確保模型在測試模式

[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 982us/step - accuracy: 0.0870 - loss: 21.7661


[20.836740493774414, 0.12534849345684052]

In [26]:
# 預測測試資料
y_test_pred_numeric = best_model.predict(X_test_scaled)
y_pred_classes = np.argmax(y_test_pred_numeric, axis=1)

[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 731us/step


In [27]:
# 轉換為原本的 Label
y_test_pred_labels = [label_mapping[str(num + 1)] for num in y_pred_classes]  # 補回 +1
# y_test_pred_labels

In [28]:
# 讀取測試資料的實際 Label
y_test_actual = test_data_imputed[target_column]
# test_data_imputed

In [29]:
# 取得預測與實際座標
y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_test_pred_labels])
y_test_actual_coordinates = np.array([label_to_coordinates[label] for label in y_test_actual])

In [30]:
# 計算 MDE (Mean Distance Error)
distances = np.linalg.norm(y_test_pred_coordinates - y_test_actual_coordinates, axis=1)
mean_mde = np.mean(distances)

# 記錄每個 RP 的 MDE
mde_report_test = {}
for true_label, distance in zip(y_test_actual, distances):
    if true_label not in mde_report_test:
        mde_report_test[true_label] = []
    mde_report_test[true_label].append(distance)

# 計算測試資料的 MDE 平均值
mde_report_test_avg = {label: {"mde": np.mean(dists), "count": len(dists)} for label, dists in mde_report_test.items()}

# 儲存 MDE 結果到 JSON 檔案
test_file_path = f"{modelname}_{date_test}.json"
with open(test_file_path, "w") as f:
    json.dump(mde_report_test_avg, f, indent=4)

print(f"Test Data MDE report saved to: {test_file_path}")
print(f"\nTest Data Mean MDE: {mean_mde:.4f} meters")


Test Data MDE report saved to: DNN only distance mcAPs_2025_02_28.json

Test Data Mean MDE: 1.2549 meters
