In [52]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
import json
import os 
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
import joblib  # 用於保存模型

# 訓練模型

In [None]:
selected_columns = ['Label',
                    'AP1_Distance (mm)','AP3_Distance (mm)',
                    'AP1_StdDev (mm)','AP3_StdDev (mm)',
                                ]  

what_data = "DNN_3Layer_2AP_Best_combine_2FTM_20241214_usingearlystopepchoc10000"
# what_data = "DNN_3Layer_1AP_Best_combine_F_S_R_20241214_usingearlystopepchoc10000"

# 'AP1_Distance (mm)',
# 'AP1_StdDev (mm)',

In [54]:
# 1. 讀取資料
file_path = "timestamp_allignment_Balanced_2024_12_14_rtt_logs.csv"

date = "2024_12_14"

data = pd.read_csv(file_path, usecols=selected_columns)

print(data.head())

  Label  AP2_Distance (mm)  AP4_Distance (mm)  AP2_StdDev (mm)  \
0  1-11             8178.0             4852.0           1502.0   
1  1-11             8139.0             4911.0           1466.0   
2  1-11             8139.0             4647.0             91.0   
3  1-11             8022.0             6219.0           1523.0   
4  1-11             8335.0             4471.0           1195.0   

   AP4_StdDev (mm)  
0            934.0  
1            485.0  
2            600.0  
3           2649.0  
4           1079.0  


In [55]:
label_to_coordinates = {
    "1-1": (0, 0), "1-2": (0.6, 0), "1-3": (1.2, 0), "1-4": (1.8, 0), "1-5": (2.4, 0), "1-6": (3.0, 0),"1-7": (3.6, 0), "1-8": (4.2, 0), "1-9": (4.8, 0), "1-10": (5.4, 0), "1-11": (6.0, 0),
    "2-1": (0, 0.6), "2-11": (6.0, 0.6),
    "3-1": (0, 1.2), "3-11": (6.0, 1.2),
    "4-1": (0, 1.8), "4-11": (6.0, 1.8),
    "5-1": (0, 2.4), "5-11": (6.0, 2.4),
    "6-1": (0, 3.0), "6-2": (0.6, 3.0), "6-3": (1.2, 3.0), "6-4": (1.8, 3.0), "6-5": (2.4, 3.0),"6-6": (3.0, 3.0), "6-7": (3.6, 3.0), "6-8": (4.2, 3.0), "6-9": (4.8, 3.0), "6-10": (5.4, 3.0), "6-11": (6.0, 3.0),
    "7-1": (0, 3.6), "7-11": (6.0, 3.6),
    "8-1": (0, 4.2), "8-11": (6.0, 4.2),
    "9-1": (0, 4.8), "9-11": (6.0, 4.8),
    "10-1": (0, 5.4), "10-11": (6.0, 5.4),
    "11-1": (0, 6.0), "11-2": (0.6, 6.0), "11-3": (1.2, 6.0), "11-4": (1.8, 6.0), "11-5": (2.4, 6.0),"11-6": (3.0, 6.0), "11-7": (3.6, 6.0), "11-8": (4.2, 6.0), "11-9": (4.8, 6.0), "11-10": (5.4, 6.0), "11-11": (6.0, 6.0)
}

In [56]:
label_mapping = {
    '11': '1-1','10': '1-2','9': '1-3','8': '1-4','7': '1-5','6': '1-6','5': '1-7','4': '1-8','3': '1-9','2': '1-10','1': '1-11',
    '12': '2-1','30': '2-11',
    '13': '3-1','29': '3-11',
    '14': '4-1','28': '4-11',
    '15': '5-1','27': '5-11',
    '16': '6-1','17': '6-2','18': '6-3','19': '6-4','20': '6-5','21': '6-6','22': '6-7','23': '6-8','24': '6-9','25': '6-10','26': '6-11',
    '49': '7-1','31': '7-11',
    '48': '8-1','32': '8-11',
    '47': '9-1','33': '9-11',
    '46': '10-1','34': '10-11',
    '45': '11-1','44': '11-2','43': '11-3','42': '11-4','41': '11-5','40': '11-6','39': '11-7','38': '11-8','37': '11-9','36': '11-10','35': '11-11'
}

In [57]:
# target_column = 'Label'  # 替換成目標欄位名稱
# # 替換目標欄位的 label
# data[target_column] = data[target_column].astype(str).map(label_mapping)
# # 確認是否有未映射的值
# unmapped_values = data[target_column][data[target_column].isna()].unique()
# if len(unmapped_values) > 0:
#     print("以下值未找到對應的映射：", unmapped_values)

In [58]:
test_size=0.2
val_size=0.1

target_column = 'Label'  # 替換成目標欄位名稱

In [59]:
label_column = 'Label'

In [60]:
# 資料前處理 (一): 刪除前後n筆資料
n = 1
# 確保依據Label排序
data = data.sort_values(by=label_column).reset_index(drop=True)

# 建立一個空的 DataFrame 用於存放處理後的資料
processed_data = pd.DataFrame(columns=data.columns)

# 針對每個Label群組進行處理
for label, group in data.groupby(label_column):
    # 刪除前n筆和後n筆資料
    if len(group) > 2 * n:  # 確保群組資料足夠
        group = group.iloc[n:-n]
    else:
        group = pd.DataFrame()  # 若資料不足，刪除整個群組
    # 將處理後的群組資料加入
    processed_data = pd.concat([processed_data, group], ignore_index=True)

processed_data


  processed_data = pd.concat([processed_data, group], ignore_index=True)


Unnamed: 0,Label,AP2_Distance (mm),AP4_Distance (mm),AP2_StdDev (mm),AP4_StdDev (mm)
0,1-1,6811.0,-811.0,1239.0,1318.0
1,1-1,6841.0,-850.0,1262.0,1503.0
2,1-1,6841.0,-733.0,1265.0,1350.0
3,1-1,6811.0,-743.0,1275.0,1421.0
4,1-1,6811.0,-889.0,1318.0,1500.0
...,...,...,...,...,...
19546,9-11,6255.0,5145.0,763.0,1005.0
19547,9-11,7866.0,4500.0,910.0,278.0
19548,9-11,8100.0,4998.0,832.0,1038.0
19549,9-11,8061.0,4461.0,878.0,1585.0


In [61]:
# Calculate the number of rows with NaN values
nan_rows = processed_data.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = processed_data[processed_data.isnull().any(axis=1)]

# 印出這些列
print("Rows with NaN values:")
print(rows_with_nan)

Number of rows with NaN values: 969
Rows with NaN values:
      Label  AP2_Distance (mm)  AP4_Distance (mm)  AP2_StdDev (mm)  \
5       1-1             6811.0                NaN           1275.0   
43      1-1             6958.0                NaN           1146.0   
65      1-1             6665.0                NaN           1292.0   
68      1-1                NaN             -889.0              NaN   
79      1-1                NaN            -1028.0              NaN   
...     ...                ...                ...              ...   
19399  9-11             5639.0                NaN           1871.0   
19418  9-11             5639.0                NaN           1916.0   
19440  9-11                NaN             4793.0              NaN   
19500  9-11                NaN             4764.0              NaN   
19526  9-11                NaN             4081.0              NaN   

       AP4_StdDev (mm)  
5                  NaN  
43                 NaN  
65                 NaN  
6

In [62]:
data_imputed = processed_data.groupby(label_column).apply(
    lambda group: group.fillna(group.mean())
).reset_index()
data_imputed

Unnamed: 0,Label,level_1,AP2_Distance (mm),AP4_Distance (mm),AP2_StdDev (mm),AP4_StdDev (mm)
0,1-1,0,6811.0,-811.0,1239.0,1318.0
1,1-1,1,6841.0,-850.0,1262.0,1503.0
2,1-1,2,6841.0,-733.0,1265.0,1350.0
3,1-1,3,6811.0,-743.0,1275.0,1421.0
4,1-1,4,6811.0,-889.0,1318.0,1500.0
...,...,...,...,...,...,...
19546,9-11,19546,6255.0,5145.0,763.0,1005.0
19547,9-11,19547,7866.0,4500.0,910.0,278.0
19548,9-11,19548,8100.0,4998.0,832.0,1038.0
19549,9-11,19549,8061.0,4461.0,878.0,1585.0


In [63]:
# 建立 Label 映射
y = data_imputed[target_column]
# label_mapping = {str(i): label for i, label in enumerate(y.unique())}
reverse_label_mapping = {v: int(k) - 1 for k, v in label_mapping.items()}  # 讓數字標籤 -1
y_numeric = y.map(reverse_label_mapping)

print("Final reverse_label_mapping in DNN:", reverse_label_mapping)
print("y_numeric unique values in DNN:", y_numeric.unique())

Final reverse_label_mapping in DNN: {'1-1': 10, '1-2': 9, '1-3': 8, '1-4': 7, '1-5': 6, '1-6': 5, '1-7': 4, '1-8': 3, '1-9': 2, '1-10': 1, '1-11': 0, '2-1': 11, '2-11': 29, '3-1': 12, '3-11': 28, '4-1': 13, '4-11': 27, '5-1': 14, '5-11': 26, '6-1': 15, '6-2': 16, '6-3': 17, '6-4': 18, '6-5': 19, '6-6': 20, '6-7': 21, '6-8': 22, '6-9': 23, '6-10': 24, '6-11': 25, '7-1': 48, '7-11': 30, '8-1': 47, '8-11': 31, '9-1': 46, '9-11': 32, '10-1': 45, '10-11': 33, '11-1': 44, '11-2': 43, '11-3': 42, '11-4': 41, '11-5': 40, '11-6': 39, '11-7': 38, '11-8': 37, '11-9': 36, '11-10': 35, '11-11': 34}
y_numeric unique values in DNN: [10  1  0  9  8  7  6  5  4  3  2 45 33 44 35 34 43 42 41 40 39 38 37 36
 11 29 12 28 13 27 14 26 15 24 25 16 17 18 19 20 21 22 23 48 30 47 31 46
 32]


In [64]:
# 把label部分拿掉
X = data_imputed.drop(columns=['level_1','Label'])

scaler = StandardScaler()
columns_to_scale = selected_columns.copy()  # 建立副本，避免影響原始變數
columns_to_scale.remove('Label')  # 在副本上移除 'Label'
X_scaled = scaler.fit_transform(X[columns_to_scale])

# 保存標準化器
joblib.dump(scaler, f'scaler_{what_data}.pkl')

print(X_scaled[0])

X

[ 0.54172408 -1.44422436  0.73583945  0.77925024]


Unnamed: 0,AP2_Distance (mm),AP4_Distance (mm),AP2_StdDev (mm),AP4_StdDev (mm)
0,6811.0,-811.0,1239.0,1318.0
1,6841.0,-850.0,1262.0,1503.0
2,6841.0,-733.0,1265.0,1350.0
3,6811.0,-743.0,1275.0,1421.0
4,6811.0,-889.0,1318.0,1500.0
...,...,...,...,...
19546,6255.0,5145.0,763.0,1005.0
19547,7866.0,4500.0,910.0,278.0
19548,8100.0,4998.0,832.0,1038.0
19549,8061.0,4461.0,878.0,1585.0


In [65]:
# 設定 KFold 交叉驗證
kf = KFold(n_splits=5, shuffle=True, random_state=42)
all_y_test, all_y_test_pred = [], []
overall_accuracy, overall_mde = [], []
mde_report, fold_reports = {}, []
fold_index = 1

mde_report_per_fold = {}

# 初始化最佳 MDE 變數
best_mde = float('inf')  # 設為無限大，確保第一個 fold 一定會更新
best_model = None  # 儲存最佳模型

In [66]:
print("X type")
print(X.dtypes)
print("y type")
print(y.dtypes)

X type
AP2_Distance (mm)    float64
AP4_Distance (mm)    float64
AP2_StdDev (mm)      float64
AP4_StdDev (mm)      float64
dtype: object
y type
object


### 5 fold

In [None]:
# 進行 5-Fold 交叉驗證
for train_index, test_index in kf.split(X_scaled):
    #original
    # X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    # y_train, y_test = y_numeric.iloc[train_index], y_numeric.iloc[test_index]

    X_train_full, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train_full, y_test = y_numeric.iloc[train_index], y_numeric.iloc[test_index]

    # # 從 Train Data 再切 10% 給 Validation Set
    # X_train, X_val, y_train, y_val = train_test_split(
    #     X_train_full, y_train_full, test_size=0.1, random_state=42, stratify=y_train_full
    # )
    X_train = X_train_full
    y_train = y_train_full

    

    # 建立 DNN 模型
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(len(label_mapping), activation='softmax')
    ])

    # 編譯模型
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # 設定 EarlyStopping 回呼函數
    early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

    # 訓練模型，包含驗證集
    model.fit(X_train, y_train, 
              
              epochs=10000, batch_size=32, verbose=0, callbacks=[early_stop])
    #  validation_data=(X_val, y_val),  # ✅ 這裡新增驗證集
    

    # 進行預測
    y_test_pred_numeric = model.predict(X_test)
    y_pred_classes = np.argmax(y_test_pred_numeric, axis=1)
    y_test_pred_labels = [label_mapping[str(num + 1)] for num in y_pred_classes]  # 補回 +1


    # 計算 MDE
    y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_test_pred_labels])
    y_test_coordinates = np.array([label_to_coordinates[label_mapping[str(label + 1)]] for label in y_test])


    distances = np.linalg.norm(y_test_pred_coordinates - y_test_coordinates, axis=1)
    fold_mde = np.mean(distances)
    overall_mde.append(fold_mde)

    # 記錄每個 RP 在當前 fold 的 MDE
    for true_label, distance in zip(y_test, distances):
        if true_label not in mde_report_per_fold:
            mde_report_per_fold[true_label] = []
        mde_report_per_fold[true_label].append(distance)  # 存所有 fold 的 MDE


    print(f"Fold {fold_index} - MDE: {fold_mde:.4f}")

    # 檢查是否為最佳模型
    if fold_mde < best_mde:
        best_mde = fold_mde
        best_model = model  # 更新最佳模型
        print(f"-> New best model found at Fold {fold_index} with MDE: {best_mde:.4f}")
    

    fold_index += 1
    

# 計算 5-Fold 平均 MDE
mde_report_avg = {label: {"mde": np.mean(distances), "count": len(distances)} for label, distances in mde_report_per_fold.items()}

# 儲存到 JSON 檔案
file_path = "2mcAP_FTMonly_BEST_2024_12_14_mde"
with open(file_path, "w") as f:
    json.dump(mde_report_avg, f, indent=4)

print(f"5-Fold Average MDE report saved to: {file_path}")

# 儲存最佳模型
if best_model:
    best_model.save(f"DNN_best_model{what_data}.h5")
    print(f"Best DNN model saved with MDE: {best_mde:.4f}")
else:
    print("No best model found!")

Fold 1 - MDE: 0.0434
-> New best model found at Fold 1 with MDE: 0.0434
Fold 2 - MDE: 0.0405
-> New best model found at Fold 2 with MDE: 0.0405
Fold 3 - MDE: 0.0518
Fold 4 - MDE: 0.0598
Fold 5 - MDE: 0.0543
5-Fold Average MDE report saved to: 2mcAP_FTMonly_WORST_2024_12_14_mde
Best DNN model saved with MDE: 0.0405


  saving_api.save_model(


In [68]:
# 計算 5-Fold 平均 MDE
mean_mde = np.mean(overall_mde)
print(f"\n5-Fold Mean MDE: {mean_mde:.4f} meters")


5-Fold Mean MDE: 0.0500 meters


# 測試模型

In [None]:
X_testing_selected_columns = ['AP1_Distance (mm)','AP3_Distance (mm)',
                              'AP1_StdDev (mm)','AP3_StdDev (mm)',

                                'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']  
# 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
# 'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)',

In [49]:
# 讀取測試資料
test_file_path = "timestamp_allignment_Balanced_2025_02_28_rtt_logs.csv"  # 測試資料的檔案名稱
date_test = "2025_02_28"
modelname = "DNN only distance mcAPs"
test_data = pd.read_csv(test_file_path, usecols=selected_columns)
# test_data

In [50]:
# 資料前處理 (一): 刪除前後n筆資料
n = 20
# 確保依據Label排序
test_data = test_data.sort_values(by=label_column).reset_index(drop=True)

# 建立一個空的 DataFrame 用於存放處理後的資料
test_processed_data = pd.DataFrame(columns=test_data.columns)

# 針對每個Label群組進行處理
for label, group in test_data.groupby(label_column):
    # 刪除前n筆和後n筆資料
    if len(group) > 2 * n:  # 確保群組資料足夠
        group = group.iloc[n:-n]
    else:
        group = pd.DataFrame()  # 若資料不足，刪除整個群組
    # 將處理後的群組資料加入
    test_processed_data = pd.concat([test_processed_data, group], ignore_index=True)

# test_processed_data

In [51]:
# Calculate the number of rows with NaN values
nan_rows = test_processed_data.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = test_processed_data[test_processed_data.isnull().any(axis=1)]

# # 印出這些列
# print("Rows with NaN values:")
# print(rows_with_nan)

Number of rows with NaN values: 1026


In [52]:
test_data_imputed = test_processed_data.groupby(label_column).apply(
    lambda group: group.fillna(group.mean())
).reset_index()

# Calculate the number of rows with NaN values
nan_rows = test_data_imputed.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = test_data_imputed[test_data_imputed.isnull().any(axis=1)]

# test_data_imputed

Number of rows with NaN values: 0


In [53]:
# 建立 Label 映射
y_test = test_data_imputed[target_column]
y_test_numeric = y_test.map(reverse_label_mapping)

print("Final reverse_label_mapping in DNN:", reverse_label_mapping)
print("y_numeric unique values in DNN:", y_test_numeric.unique())

# 把label部分拿掉
X_test = test_data_imputed.drop(columns=['level_1','Label'])
# 確保測試資料的特徵與訓練資料的特徵一致
X_test = X_test[X_testing_selected_columns]  # 選取相同的特徵


# y_test_numeric

Final reverse_label_mapping in DNN: {'1-1': 10, '1-2': 9, '1-3': 8, '1-4': 7, '1-5': 6, '1-6': 5, '1-7': 4, '1-8': 3, '1-9': 2, '1-10': 1, '1-11': 0, '2-1': 11, '2-11': 29, '3-1': 12, '3-11': 28, '4-1': 13, '4-11': 27, '5-1': 14, '5-11': 26, '6-1': 15, '6-2': 16, '6-3': 17, '6-4': 18, '6-5': 19, '6-6': 20, '6-7': 21, '6-8': 22, '6-9': 23, '6-10': 24, '6-11': 25, '7-1': 48, '7-11': 30, '8-1': 47, '8-11': 31, '9-1': 46, '9-11': 32, '10-1': 45, '10-11': 33, '11-1': 44, '11-2': 43, '11-3': 42, '11-4': 41, '11-5': 40, '11-6': 39, '11-7': 38, '11-8': 37, '11-9': 36, '11-10': 35, '11-11': 34}
y_numeric unique values in DNN: [10  1  0  9  8  7  6  5  4  3  2 45 33 44 35 34 43 42 41 40 39 38 37 36
 11 29 12 28 13 27 14 26 15 24 25 16 17 18 19 20 21 22 23 48 30 47 31 46
 32]


KeyError: "['AP1_Distance (mm)', 'AP3_Distance (mm)', 'AP1_StdDev (mm)', 'AP3_StdDev (mm)'] not in index"

In [None]:
# 使用之前訓練時的標準化器 (scaler) 來標準化測試數據
X_test_scaled = scaler.transform(X_test)

print(len(X_test_scaled))

17934


### 預測


In [None]:
best_model.evaluate(X_test_scaled, y_test_numeric)  # 確保模型在測試模式



[22.46282958984375, 0.12568305432796478]

In [None]:
# 預測測試資料
y_test_pred_numeric = best_model.predict(X_test_scaled)
y_pred_classes = np.argmax(y_test_pred_numeric, axis=1)

  1/561 [..............................] - ETA: 4s



In [None]:
# 轉換為原本的 Label
y_test_pred_labels = [label_mapping[str(num + 1)] for num in y_pred_classes]  # 補回 +1
# y_test_pred_labels

In [None]:
# 讀取測試資料的實際 Label
y_test_actual = test_data_imputed[target_column]
# test_data_imputed

In [None]:
# 取得預測與實際座標
y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_test_pred_labels])
y_test_actual_coordinates = np.array([label_to_coordinates[label] for label in y_test_actual])

In [None]:
# 計算 MDE (Mean Distance Error)
distances = np.linalg.norm(y_test_pred_coordinates - y_test_actual_coordinates, axis=1)
mean_mde = np.mean(distances)

# 記錄每個 RP 的 MDE
mde_report_test = {}
for true_label, distance in zip(y_test_actual, distances):
    if true_label not in mde_report_test:
        mde_report_test[true_label] = []
    mde_report_test[true_label].append(distance)

# 計算測試資料的 MDE 平均值
mde_report_test_avg = {label: {"mde": np.mean(dists), "count": len(dists)} for label, dists in mde_report_test.items()}

# 儲存 MDE 結果到 JSON 檔案
test_file_path = f"{modelname}_{date_test}.json"
with open(test_file_path, "w") as f:
    json.dump(mde_report_test_avg, f, indent=4)

print(f"Test Data MDE report saved to: {test_file_path}")
print(f"\nTest Data Mean MDE: {mean_mde:.4f} meters")


Test Data MDE report saved to: DNN only distance mcAPs_2025_02_28.json

Test Data Mean MDE: 1.2490 meters
