In [31]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
import json
import os 
from sklearn.preprocessing import StandardScaler

In [32]:
selected_columns = ['Label', 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
                    'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)',
                    'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']  

In [33]:
# 1. 讀取資料
file_path = "timestamp_allignment_Balanced_2024_12_14_rtt_logs.csv"

data = pd.read_csv(file_path, usecols=selected_columns)

print(data.head())

  Label  AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  \
0  1-11              718.0             8178.0             7432.0   
1  1-11              639.0             8139.0             6930.0   
2  1-11              678.0             8139.0             7471.0   
3  1-11              718.0             8022.0             7627.0   
4  1-11              561.0             8335.0             7471.0   

   AP4_Distance (mm)  AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  AP1_StdDev (mm)  \
0             4852.0     -60.0     -68.0     -61.0     -59.0            264.0   
1             4911.0     -62.0     -70.0     -59.0     -59.0            418.0   
2             4647.0     -61.0     -69.0     -62.0     -55.0            311.0   
3             6219.0     -62.0     -69.0     -63.0     -55.0            324.0   
4             4471.0     -61.0     -70.0     -62.0     -55.0            343.0   

   AP2_StdDev (mm)  AP3_StdDev (mm)  AP4_StdDev (mm)  
0           1502.0            325.0            93

In [34]:
test_size=0.2
val_size=0.1

target_column = 'Label'  # 替換成目標欄位名稱

In [35]:
label_column = 'Label'

In [36]:
# 資料前處理 (一): 刪除前後n筆資料
n = 20
# 確保依據Label排序
data = data.sort_values(by=label_column).reset_index(drop=True)

# 建立一個空的 DataFrame 用於存放處理後的資料
processed_data = pd.DataFrame(columns=data.columns)

# 針對每個Label群組進行處理
for label, group in data.groupby(label_column):
    # 刪除前n筆和後n筆資料
    if len(group) > 2 * n:  # 確保群組資料足夠
        group = group.iloc[n:-n]
    else:
        group = pd.DataFrame()  # 若資料不足，刪除整個群組
    # 將處理後的群組資料加入
    processed_data = pd.concat([processed_data, group], ignore_index=True)

processed_data


  processed_data = pd.concat([processed_data, group], ignore_index=True)


Unnamed: 0,Label,AP1_Distance (mm),AP2_Distance (mm),AP3_Distance (mm),AP4_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP1_StdDev (mm),AP2_StdDev (mm),AP3_StdDev (mm),AP4_StdDev (mm)
0,1-1,4233.0,6811.0,11650.0,-889.0,-61.0,-73.0,-67.0,-55.0,460.0,1219.0,519.0,1438.0
1,1-1,3838.0,6928.0,11611.0,-889.0,-53.0,-73.0,-67.0,-55.0,864.0,1141.0,601.0,1412.0
2,1-1,4155.0,6460.0,11924.0,-694.0,-62.0,-73.0,-66.0,-49.0,542.0,1451.0,409.0,1309.0
3,1-1,3960.0,6753.0,12041.0,-811.0,-62.0,-72.0,-65.0,-54.0,619.0,1249.0,351.0,1430.0
4,1-1,3960.0,6635.0,11650.0,-928.0,-63.0,-72.0,-67.0,-54.0,607.0,1326.0,481.0,1443.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17684,9-11,1772.0,8178.0,2861.0,4911.0,-58.0,-70.0,-52.0,-66.0,1108.0,449.0,369.0,813.0
17685,9-11,1733.0,6811.0,2939.0,4735.0,-57.0,-69.0,-52.0,-66.0,1109.0,1395.0,329.0,153.0
17686,9-11,1694.0,8257.0,2900.0,4852.0,-57.0,-69.0,-52.0,-61.0,1174.0,508.0,433.0,469.0
17687,9-11,1616.0,6694.0,2861.0,4930.0,-58.0,-70.0,-53.0,-66.0,1093.0,1579.0,425.0,226.0


In [37]:
# Calculate the number of rows with NaN values
nan_rows = processed_data.isnull().any(axis=1).sum()

# Print the result
print(f"Number of rows with NaN values: {nan_rows}")

# 找出包含 NaN 的列
rows_with_nan = processed_data[processed_data.isnull().any(axis=1)]

# 印出這些列
print("Rows with NaN values:")
print(rows_with_nan)

Number of rows with NaN values: 951
Rows with NaN values:
      Label  AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  \
24      1-1             4311.0             6958.0                NaN   
46      1-1             4116.0             6665.0            11963.0   
49      1-1                NaN                NaN                NaN   
60      1-1                NaN                NaN            12354.0   
71      1-1             4194.0             6782.0                NaN   
...     ...                ...                ...                ...   
17556  9-11                NaN             5639.0                NaN   
17575  9-11             1538.0             5639.0                NaN   
17597  9-11                NaN                NaN                NaN   
17657  9-11                NaN                NaN                NaN   
17683  9-11             1577.0                NaN             2783.0   

       AP4_Distance (mm)  AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  \
24       

In [38]:
data_imputed = processed_data.groupby(label_column).apply(
    lambda group: group.fillna(group.mean())
).reset_index()
data_imputed

Unnamed: 0,Label,level_1,AP1_Distance (mm),AP2_Distance (mm),AP3_Distance (mm),AP4_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP1_StdDev (mm),AP2_StdDev (mm),AP3_StdDev (mm),AP4_StdDev (mm)
0,1-1,0,4233.0,6811.0,11650.0,-889.0,-61.0,-73.0,-67.0,-55.0,460.0,1219.0,519.0,1438.0
1,1-1,1,3838.0,6928.0,11611.0,-889.0,-53.0,-73.0,-67.0,-55.0,864.0,1141.0,601.0,1412.0
2,1-1,2,4155.0,6460.0,11924.0,-694.0,-62.0,-73.0,-66.0,-49.0,542.0,1451.0,409.0,1309.0
3,1-1,3,3960.0,6753.0,12041.0,-811.0,-62.0,-72.0,-65.0,-54.0,619.0,1249.0,351.0,1430.0
4,1-1,4,3960.0,6635.0,11650.0,-928.0,-63.0,-72.0,-67.0,-54.0,607.0,1326.0,481.0,1443.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17684,9-11,17684,1772.0,8178.0,2861.0,4911.0,-58.0,-70.0,-52.0,-66.0,1108.0,449.0,369.0,813.0
17685,9-11,17685,1733.0,6811.0,2939.0,4735.0,-57.0,-69.0,-52.0,-66.0,1109.0,1395.0,329.0,153.0
17686,9-11,17686,1694.0,8257.0,2900.0,4852.0,-57.0,-69.0,-52.0,-61.0,1174.0,508.0,433.0,469.0
17687,9-11,17687,1616.0,6694.0,2861.0,4930.0,-58.0,-70.0,-53.0,-66.0,1093.0,1579.0,425.0,226.0


In [39]:
label_to_coordinates = {
    "1-1": (0, 0), "1-2": (0.6, 0), "1-3": (1.2, 0), "1-4": (1.8, 0), "1-5": (2.4, 0), "1-6": (3.0, 0),"1-7": (3.6, 0), "1-8": (4.2, 0), "1-9": (4.8, 0), "1-10": (5.4, 0), "1-11": (6.0, 0),
    "2-1": (0, 0.6), "2-11": (6.0, 0.6),
    "3-1": (0, 1.2), "3-11": (6.0, 1.2),
    "4-1": (0, 1.8), "4-11": (6.0, 1.8),
    "5-1": (0, 2.4), "5-11": (6.0, 2.4),
    "6-1": (0, 3.0), "6-2": (0.6, 3.0), "6-3": (1.2, 3.0), "6-4": (1.8, 3.0), "6-5": (2.4, 3.0),"6-6": (3.0, 3.0), "6-7": (3.6, 3.0), "6-8": (4.2, 3.0), "6-9": (4.8, 3.0), "6-10": (5.4, 3.0), "6-11": (6.0, 3.0),
    "7-1": (0, 3.6), "7-11": (6.0, 3.6),
    "8-1": (0, 4.2), "8-11": (6.0, 4.2),
    "9-1": (0, 4.8), "9-11": (6.0, 4.8),
    "10-1": (0, 5.4), "10-11": (6.0, 5.4),
    "11-1": (0, 6.0), "11-2": (0.6, 6.0), "11-3": (1.2, 6.0), "11-4": (1.8, 6.0), "11-5": (2.4, 6.0),"11-6": (3.0, 6.0), "11-7": (3.6, 6.0), "11-8": (4.2, 6.0), "11-9": (4.8, 6.0), "11-10": (5.4, 6.0), "11-11": (6.0, 6.0)
}

In [None]:
label_mapping = {
    '11': '1-1','10': '1-2','9': '1-3','8': '1-4','7': '1-5','6': '1-6','5': '1-7','4': '1-8','3': '1-9','2': '1-10','1': '1-11',
    '12': '2-1','30': '2-11',
    '13': '3-1','29': '3-11',
    '14': '4-1','28': '4-11',
    '15': '5-1','27': '5-11',
    '16': '6-1','17': '6-2','18': '6-3','19': '6-4','20': '6-5','21': '6-6','22': '6-7','23': '6-8','24': '6-9','25': '6-10','26': '6-11',
    '49': '7-1','31': '7-11',
    '48': '8-1','32': '8-11',
    '47': '9-1','33': '9-11',
    '46': '10-1','34': '10-11',
    '45': '11-1','44': '11-2','43': '11-3','42': '11-4','41': '11-5','40': '11-6','39': '11-7','38': '11-8','37': '11-9','36': '11-10','35': '11-11'
}

In [40]:
k = 5
n_neighbors = 5
# report files
file_path = 'DNN_report_{date}.txt'

In [41]:
# Step 3: 修改為支持 K-fold 的 KNN 評估並計算平均混淆矩陣
kf = KFold(n_splits=k, shuffle=True, random_state=42)
overall_accuracy = []
fold_reports = []

overall_mde = []  # 用於儲存每個 fold 的平均 MDE
mde_report = {}


# 初始化儲存所有 folds 的真實值和預測值
all_y_test = []
all_y_test_pred = []

In [42]:
# 把label部分拿掉
X = data_imputed.drop(columns=['level_1','Label'])
y = data_imputed[target_column]

print(X.head())
print(y.head())

   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0             4233.0             6811.0            11650.0             -889.0   
1             3838.0             6928.0            11611.0             -889.0   
2             4155.0             6460.0            11924.0             -694.0   
3             3960.0             6753.0            12041.0             -811.0   
4             3960.0             6635.0            11650.0             -928.0   

   AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  AP1_StdDev (mm)  AP2_StdDev (mm)  \
0     -61.0     -73.0     -67.0     -55.0            460.0           1219.0   
1     -53.0     -73.0     -67.0     -55.0            864.0           1141.0   
2     -62.0     -73.0     -66.0     -49.0            542.0           1451.0   
3     -62.0     -72.0     -65.0     -54.0            619.0           1249.0   
4     -63.0     -72.0     -67.0     -54.0            607.0           1326.0   

   AP3_StdDev (mm)  AP4_StdDev (mm)  


In [44]:
scaler = StandardScaler()
columns_to_scale = selected_columns.copy()  # 建立副本，避免影響原始變數
columns_to_scale.remove('Label')  # 在副本上移除 'Label'
X[columns_to_scale] = scaler.fit_transform(X[columns_to_scale])

print(X.head())

   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0           0.615778           0.540147           1.822815          -1.476931   
1           0.449047           0.580152           1.808008          -1.476931   
2           0.582854           0.420132           1.926839          -1.396360   
3           0.500544           0.520315           1.971258          -1.444702   
4           0.500544           0.479969           1.822815          -1.493045   

   AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  AP1_StdDev (mm)  AP2_StdDev (mm)  \
0 -0.120783 -1.059321 -1.674492  0.415218        -0.723499         0.706057   
1  1.584552 -1.059321 -1.674492  0.415218        -0.075477         0.586859   
2 -0.333950 -1.059321 -1.484683  1.345117        -0.591970         1.060596   
3 -0.333950 -0.876413 -1.294873  0.570201        -0.468461         0.751902   
4 -0.547117 -0.876413 -1.674492  0.570201        -0.487709         0.869573   

   AP3_StdDev (mm)  AP4_StdDev (mm)  


In [None]:
# #開始k fold
fold_index = 1
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]


    # 建立反向映射字典
    reverse_label_mapping = {v: int(k) for k, v in label_mapping.items()}

    # 將 y_train 的字串標籤轉換為數值
    y_train_numeric = y_train.map(reverse_label_mapping)


    print(X_train.head())

    
    # 建立 KNN 模型
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    knn.fit(X_train, y_train_numeric)
    
    # 預測
    y_test_pred_numeric  = knn.predict(X_test)


    # 將數值標籤轉換回字串標籤
    y_test_pred_labels = [label_mapping[str(num)] for num in y_test_pred_numeric]   

    # 儲存當前 fold 的 y_test 和 y_test_pred
    all_y_test.extend(y_test)  # 累積所有真實值
    all_y_test_pred.extend(y_test_pred_labels)  # 累積所有預測值

    
    # 計算整體準確率
    accuracy = accuracy_score(y_test, y_test_pred_labels)
    overall_accuracy.append(accuracy)

    # MDE
     # Calculate MDE metrics
    y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_test_pred_labels])
    y_test_coordinates = np.array([label_to_coordinates[label] for label in y_test])

    distances = np.linalg.norm(y_test_pred_coordinates - y_test_coordinates, axis=1)
    
    fold_mde = np.mean(distances)  # 計算當前 fold 的平均 MDE
    overall_mde.append(fold_mde)  # 儲存當前 fold 的平均 MDE
    
    # 累積 MDE 數據
    for true_label, distance in zip(y_test, distances):
        if true_label not in mde_report:
            mde_report[true_label] = []
        mde_report[true_label].append(distance)  # 累積所有 fold 的距離數據



    # 計算分類報告
    report = classification_report(
        y_test, y_test_pred_labels,
        target_names=[str(label) for label in np.unique(y)],
        zero_division=0,
        output_dict=True  # 以字典形式輸出，方便進一步分析
    )
    fold_reports.append(report)

    print(f"Fold {fold_index} - Accuracy: {accuracy:.4f}")
    print(f"Fold {fold_index} - MDE: {fold_mde:.4f}")

    fold_index += 1
    



unique_labels = sorted(np.unique(all_y_test))  # 確保標籤排序一致
# 在所有 folds 結束後，繪製總體混淆矩陣
all_cm = confusion_matrix(all_y_test, all_y_test_pred, labels=np.unique(y))

plt.figure(figsize=(20, 16))
sns.heatmap(all_cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Overall Confusion Matrix Across Folds")
plt.savefig("Overall_Confusion_Matrix.png")
plt.show()

# # 計算每個標籤的平均 MDE
# avg_mde_report = {label: np.mean(distances) for label, distances in mde_report.items()}

# 計算每個標籤的平均 MDE
avg_mde_report = {label: {"mde": np.mean(distances), "count": len(distances)} for label, distances in mde_report.items()}


# 儲存到 JSON 檔案
file_path = f"mde_report_{date}.json"
# Check if the file exists, and create it if it doesn't
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        f.write("")  # Create an empty file

with open(file_path, "w") as f:
    json.dump(avg_mde_report, f, indent=4)
print(f"MDE report saved to: {file_path}")

# 計算每個類別的平均性能
avg_report = {}
for label in np.unique(y):
    label = str(label)
    avg_report[label] = {
        "precision": np.mean([report[label]["precision"] for report in fold_reports if label in report]),
        "recall": np.mean([report[label]["recall"] for report in fold_reports if label in report]),
        "f1-score": np.mean([report[label]["f1-score"] for report in fold_reports if label in report])
    }


file_path = f'accuracy_report_{date}.txt'
# Check if the file exists, and create it if it doesn't
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        f.write("")  # Create an empty file

# Write the classification report to the file
with open(file_path, "w") as f:
    json.dump(report, f, indent=4)

print(f"Classification report saved to: {file_path}")


# 打印所有 fold 的平均 Accuracy 和 MDE
avg_accuracy = np.mean(overall_accuracy)
avg_mde = np.mean(overall_mde)
print(f"\nAverage Accuracy across {k} folds: {avg_accuracy:.4f}")
print("\nAverage Classification Report per Label:")
for label, metrics in avg_report.items():
    print(f"Label {label} - Precision: {metrics['precision']:.4f}, Recall: {metrics['recall']:.4f}, F1-score: {metrics['f1-score']:.4f}")
print(f"Average MDE across {k} folds: {avg_mde:.4f}")


In [None]:
Title = f"{date} {ti} MDE"
file_path = f'mde_report_{date}.json'

# 載入 JSON 報告
with open(file_path) as file:
    mde_data = json.load(file)

# 將 MDE 填入網格
rows, cols = 11, 11
grid = np.full((rows, cols), np.nan)  # 初始化網格
labels = np.empty((rows, cols), dtype=object)  # 初始化標籤

# 填充網格數據
for key, value in mde_data.items():
    if '-' in key:
        r, c = map(int, key.split('-'))
        grid[rows - r, c - 1] = value["mde"]  # 反轉行索引以正確對應圖表
        labels[rows - r, c - 1] = key

# 繪製圖表
fig, ax = plt.subplots(figsize=(10, 10))
cmap = plt.cm.Reds  # 顏色映射

# 畫出每個網格
for i in range(rows):
    for j in range(cols):
        value = grid[i, j]
        label = labels[i, j]
        if not np.isnan(value):
            ax.text(j, i + 0.2, f'{label}', ha='center', va='center', color='black', fontsize=10)
            ax.text(j, i - 0.2, f'{value:.4f}', ha='center', va='center', color='blue', fontsize=12)
        rect_color = cmap(value / np.nanmax(grid)) if not np.isnan(value) else 'white'
        ax.add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color, alpha=0.5))

# 格式化圖表
ax.set_xlim(-0.5, cols - 0.5)
ax.set_ylim(-0.5, rows - 0.5)
ax.set_xticks(np.arange(-0.5, cols, 1), minor=True)
ax.set_yticks(np.arange(-0.5, rows, 1), minor=True)
ax.grid(which="minor", color="black", linestyle='-', linewidth=2)
ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

plt.title(Title)
plt.show()
