In [407]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import KNNImputer
import matplotlib.pyplot as plt
import seaborn as sns                               
import json
import os 
from sklearn.preprocessing import StandardScaler

In [408]:
# 資料前處理 (一): 刪除每個 Label 的前後 n 筆資料
def remove_first_last_n(data, label_column, n=1):
    data = data.sort_values(by=label_column).reset_index(drop=True)
    processed_data = pd.DataFrame(columns=data.columns)
    for label, group in data.groupby(label_column):
        if len(group) > 2 * n:
            group = group.iloc[n:-n]
        else:
            group = pd.DataFrame()  # 若資料不足，刪除整個群組
        processed_data = pd.concat([processed_data, group], ignore_index=True)
    return processed_data

def fill_nan_with_mean(processed_data,label_column):
    data_imputed = processed_data.groupby(label_column).apply(
        lambda group: group.fillna(group.mean())
    ).reset_index()
    return data_imputed

# 訓練 KNN 模型
def train_knn_model(X_train, y_train, n_neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    return knn

In [409]:
traindate = '2024_12_14'

testdate = '2025_02_28'

with_data = 'distance'

In [410]:
train_path = f'timestamp_allignment_Balanced_{traindate}_rtt_logs.csv'
test_path = f'timestamp_allignment_Balanced_{testdate}_rtt_logs.csv'


# train_path = f'standalized_timestamp_allignment_{traindate}_rtt_logs.csv'
# test_path = f'standalized_timestamp_allignment_{testdate}_rtt_logs.csv'

In [411]:
# selected_columns = ['Label', 'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']
# selected_columns = ['Label', 'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']

# selected_columns = ['Label', 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)'] 

selected_columns = ['Label','AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)','AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi'] 
# 
# selected_columns = ['Label', 'AP1_Corrected Distance (mm)','AP2_Corrected Distance (mm)','AP3_Corrected Distance (mm)','AP4_Corrected Distance (mm)'] 

# selected_columns = ['Label', 'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi',
#                     'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)'] 

# selected_columns = ['Label', 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
#                     'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']

# selected_columns = ['Label', 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
#                     'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)']   

# selected_columns = ['Label', 'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
#                     'AP1_StdDev (mm)','AP2_StdDev (mm)','AP3_StdDev (mm)','AP4_StdDev (mm)',
#                     'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']  

# selected_columns = ['Label', 'AP1_Corrected Distance (mm)','AP2_Corrected Distance (mm)','AP3_Corrected Distance (mm)','AP4_Corrected Distance (mm)',
#                      'AP1_Rssi','AP2_Rssi','AP3_Rssi','AP4_Rssi']

In [412]:
# 讀取資料
train_data = pd.read_csv(train_path, usecols=selected_columns)
test_data = pd.read_csv(test_path, usecols=selected_columns)

In [413]:
n_remove = 20
n_neighbors = 5

In [414]:
# 資料清理
train_data_p = remove_first_last_n(train_data, label_column=['Label'], n=n_remove)
test_data_p = remove_first_last_n(test_data, label_column=['Label'], n=n_remove)


train_data_p = fill_nan_with_mean(train_data_p,label_column=['Label'])
test_data_p = fill_nan_with_mean(test_data_p,label_column=['Label'])


# 分離特徵與標籤
X_train = train_data_p.drop(columns=['level_1','Label'])
y_train = train_data_p['Label']
X_test = test_data_p.drop(columns=['level_1','Label'])
y_test = test_data_p['Label']

print(X_train.head())




   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0             4233.0             6811.0             -889.0            11650.0   
1             3838.0             6928.0             -889.0            11611.0   
2             4155.0             6460.0             -694.0            11924.0   
3             3960.0             6753.0             -811.0            12041.0   
4             3960.0             6635.0             -928.0            11650.0   

   AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  
0     -61.0     -73.0     -55.0     -67.0  
1     -53.0     -73.0     -55.0     -67.0  
2     -62.0     -73.0     -49.0     -66.0  
3     -62.0     -72.0     -54.0     -65.0  
4     -63.0     -72.0     -54.0     -67.0  


  processed_data = pd.concat([processed_data, group], ignore_index=True)
  processed_data = pd.concat([processed_data, group], ignore_index=True)


In [415]:
# 使用 Z-score 標準化
scaler = StandardScaler() 
selected_columns.remove('Label')

scaler.fit(X_train[selected_columns])

# 標準化數據
X_train.loc[:, selected_columns] = scaler.transform(X_train[selected_columns])
X_test.loc[:, selected_columns] = scaler.transform(X_test[selected_columns])

print(X_train.head())
print(X_test.head())


   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0           0.615778           0.540147          -1.476931           1.822815   
1           0.449047           0.580152          -1.476931           1.808008   
2           0.582854           0.420132          -1.396360           1.926839   
3           0.500544           0.520315          -1.444702           1.971258   
4           0.500544           0.479969          -1.493045           1.822815   

   AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  
0 -0.120783 -1.059321  0.415218 -1.674492  
1  1.584552 -1.059321  0.415218 -1.674492  
2 -0.333950 -1.059321  1.345117 -1.484683  
3 -0.333950 -0.876413  0.570201 -1.294873  
4 -0.547117 -0.876413  0.570201 -1.674492  
   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0           0.517006           0.570236          -1.025320           1.081359   
1           0.484082           0.530231          -1.090190           1.096165   
2     

In [416]:
label_to_coordinates = {
    "1-1": (0, 0), "1-2": (0.6, 0), "1-3": (1.2, 0), "1-4": (1.8, 0), "1-5": (2.4, 0), "1-6": (3.0, 0),"1-7": (3.6, 0), "1-8": (4.2, 0), "1-9": (4.8, 0), "1-10": (5.4, 0), "1-11": (6.0, 0),
    "2-1": (0, 0.6), "2-11": (6.0, 0.6),
    "3-1": (0, 1.2), "3-11": (6.0, 1.2),
    "4-1": (0, 1.8), "4-11": (6.0, 1.8),
    "5-1": (0, 2.4), "5-11": (6.0, 2.4),
    "6-1": (0, 3.0), "6-2": (0.6, 3.0), "6-3": (1.2, 3.0), "6-4": (1.8, 3.0), "6-5": (2.4, 3.0),"6-6": (3.0, 3.0), "6-7": (3.6, 3.0), "6-8": (4.2, 3.0), "6-9": (4.8, 3.0), "6-10": (5.4, 3.0), "6-11": (6.0, 3.0),
    "7-1": (0, 3.6), "7-11": (6.0, 3.6),
    "8-1": (0, 4.2), "8-11": (6.0, 4.2),
    "9-1": (0, 4.8), "9-11": (6.0, 4.8),
    "10-1": (0, 5.4), "10-11": (6.0, 5.4),
    "11-1": (0, 6.0), "11-2": (0.6, 6.0), "11-3": (1.2, 6.0), "11-4": (1.8, 6.0), "11-5": (2.4, 6.0),"11-6": (3.0, 6.0), "11-7": (3.6, 6.0), "11-8": (4.2, 6.0), "11-9": (4.8, 6.0), "11-10": (5.4, 6.0), "11-11": (6.0, 6.0)
}

In [417]:
# 建立 KNN 模型
knn = KNeighborsClassifier(n_neighbors=5,weights='uniform',metric='euclidean')

knn.fit(X_train, y_train)

In [418]:
# 評估模型
report_save_path = f'transfer_report_{traindate}_data_to_train_{testdate}_data_to_test_{with_data}.txt'
y_pred = knn.predict(X_test)

# accuracy
accuracy = accuracy_score(y_test, y_pred)
# MDE
# Calculate MDE metrics
y_test_pred_coordinates = np.array([label_to_coordinates[label] for label in y_pred])
y_test_coordinates = np.array([label_to_coordinates[label] for label in y_test])

# 計算各點的距離
distances = np.linalg.norm(y_test_pred_coordinates - y_test_coordinates, axis=1)
mean_distance_error = np.mean(distances)


# 計算每個點的 MDE
pointwise_mde = {}
for label in np.unique(y_test):
    # 過濾對應於該標籤的測試點
    indices = np.where(y_test == label)
    if len(indices[0]) > 0:
        point_distances = distances[indices]
        pointwise_mde[label] = {
            "count": len(point_distances),
            "MDE": np.mean(point_distances)
        }
    else:
        pointwise_mde[label] = {
            "count": 0,
            "MDE": None
        }



In [419]:
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Mean Distance Error (MDE): {mean_distance_error:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=[str(label) for label in np.unique(y_test)]))

# 生成 classification report 並保存
report = classification_report(y_test, y_pred, output_dict=True)
# 將 MDE 加入 report
report["Mean Distance Error (MDE)"] = mean_distance_error
report["accuracy"] = accuracy  # 加入準確度
report["Pointwise MDE"] = pointwise_mde  # 加入每個點的 MDE


if not os.path.exists(report_save_path):
    with open(report_save_path, 'w') as f:
        f.write("")  # Create an empty file

with open(report_save_path, "w") as f:
    json.dump(report, f, indent=4)

print(f"Updated report with MDE saved to: {report_save_path}")

Test Accuracy: 0.2080
Mean Distance Error (MDE): 0.9740
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         1-1       0.01      0.01      0.01       366
        1-10       0.00      0.00      0.00       366
        1-11       0.41      0.93      0.57       366
         1-2       0.00      0.00      0.00       366
         1-3       0.00      0.00      0.00       366
         1-4       0.00      0.00      0.00       366
         1-5       0.33      0.87      0.48       366
         1-6       0.37      0.99      0.54       366
         1-7       0.91      0.11      0.20       366
         1-8       0.24      0.40      0.30       366
         1-9       0.15      0.08      0.11       366
        10-1       0.99      0.80      0.88       366
       10-11       0.01      0.01      0.01       366
        11-1       0.03      0.07      0.04       366
       11-10       0.80      0.01      0.02       366
       11-11       0.00      0.00      0.00       366
        11-2       0.89      0.13      0.22       366
        11-3       1.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [420]:
# # 計算混淆矩陣
# cm = confusion_matrix(y_test, y_pred)
# num_classes = cm.shape[0]

# plt.figure(figsize=(20, 16))
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(y_test), yticklabels=np.unique(y_test),
#             cbar=False)

# # 手動修改對角線文字顏色為紅色
# for i in range(num_classes):
#     plt.text(i + 0.5, i + 0.5, cm[i, i], 
#                 color="red", ha="center", va="center", fontsize=12, fontweight="bold")

# plt.xlabel("Predicted Label")
# plt.ylabel("True Label")
# plt.title("Confusion Matrix with Highlighted Diagonal")
# plt.show()

In [421]:
# Title = f"Accuracy of 49 RP use KNN use {traindate} train and {testdate} test with data {with_data} to prodict"
# file_path = f'transfer_report_{traindate}_data_to_train_{testdate}_data_to_test_{with_data}.txt'

# # Load the figure.txt JSON file
# with open(file_path) as file:
#     data = json.load(file)

# print(data)

# # Filter out keys that are not class labels (e.g., "accuracy", "macro avg")
# filtered_data = {k: v for k, v in data.items() if '-' in k}

# print(filtered_data)

# # Extract precision values without mapping
# precision_values = {k: v['precision'] for k, v in filtered_data.items()}

# precision_values

# # Grid layout (11x11)
# rows, cols = 11, 11
# grid = np.full((rows, cols), np.nan)  # Initialize grid with NaN
# labels = np.empty((rows, cols), dtype=object)  # Initialize grid for labels

# # Populate the grid with precision values and labels (reverse row index for correct plotting)
# for key, value in precision_values.items():
#     if '-' in key:
#         r, c = map(int, key.split('-'))
#         grid[rows - r, c - 1] = value  # Reverse rows for top-left (1-1) to bottom-left (11-1)
#         labels[rows - r, c - 1] = key

# # Plotting
# fig, ax = plt.subplots(figsize=(10, 10))
# cmap = plt.cm.Reds_r  # Color map

# # 設定自定義顏色範圍
# vmin = 0  # 最小值對應的顏色
# vmax = np.nanmax(grid) * 0.5  # 最大值對應的顏色 (縮小範圍讓顏色對比更明顯)

# log_grid = np.log(grid + 1e-6)  # 避免取 log(0)

# # Draw grid with precision values and labels
# for i in range(rows):
#     for j in range(cols):
#         value = grid[i, j]
#         label = labels[i, j]
#         if not np.isnan(value):
#             ax.text(j, i + 0.2, f'{label}', ha='center', va='center', color='black', fontsize=10)
#             ax.text(j, i - 0.2, f'{value:.4f}', ha='center', va='center', color='red', fontsize=12)
#         rect_color = cmap(value / np.nanmax(grid)) if not np.isnan(value) else 'white'
#         ax.add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color, alpha=0.5))


# # Format grid
# ax.set_xlim(-0.5, cols - 0.5)
# ax.set_ylim(-0.5, rows - 0.5)
# ax.set_xticks(np.arange(-0.5, cols, 1), minor=True)
# ax.set_yticks(np.arange(-0.5, rows, 1), minor=True)
# ax.grid(which="minor", color="black", linestyle='-', linewidth=2)
# ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.title(Title)
# plt.show()

In [422]:
# Title = f"MDE of 49 RP use KNN use {traindate} train and {testdate} test with data {with_data} to prodict"
# file_path = f'transfer_report_{traindate}_data_to_train_{testdate}_data_to_test_{with_data}.txt'

# # 載入 JSON 報告
# with open(file_path) as file:
#     mde_data = json.load(file)

# # 提取 Pointwise MDE
# pointwise_mde = mde_data.get("Pointwise MDE", {})

# # 將 MDE 填入網格
# rows, cols = 11, 11
# grid = np.full((rows, cols), np.nan)  # 初始化網格
# labels = np.empty((rows, cols), dtype=object)  # 初始化標籤

# # 填充網格數據
# for key, value in pointwise_mde.items():
#     if '-' in key:
#         r, c = map(int, key.split('-'))
#         grid[rows - r, c - 1] = value["MDE"]  # 反轉行索引以正確對應圖表
#         labels[rows - r, c - 1] = key

# # 繪製圖表
# fig, ax = plt.subplots(figsize=(10, 10))
# cmap = plt.cm.Reds  # 顏色映射

# # 畫出每個網格
# for i in range(rows):
#     for j in range(cols):
#         value = grid[i, j]
#         label = labels[i, j]
#         if not np.isnan(value):
#             ax.text(j, i + 0.2, f'{label}', ha='center', va='center', color='black', fontsize=10)
#             ax.text(j, i - 0.2, f'{value:.4f}', ha='center', va='center', color='blue', fontsize=12)
#         rect_color = cmap(value / np.nanmax(grid)) if not np.isnan(value) else 'white'
#         ax.add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=rect_color, alpha=0.5))

# # 格式化圖表
# ax.set_xlim(-0.5, cols - 0.5)
# ax.set_ylim(-0.5, rows - 0.5)
# ax.set_xticks(np.arange(-0.5, cols, 1), minor=True)
# ax.set_yticks(np.arange(-0.5, rows, 1), minor=True)
# ax.grid(which="minor", color="black", linestyle='-', linewidth=2)
# ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.title(Title)
# plt.show()
