In [1]:
import os 
import sys

import sklearn as sk
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt 
import xgboost as xgb
from sklearn.preprocessing import OneHotEncoder

from scipy.stats import kurtosis, entropy
from scipy.fft import fft

In [2]:
sys.path.append(r"/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool") #這行為絕對路徑，如需使用，必須要修改為當前決路徑
import csi_tool
import denoise

In [3]:
base_path = "/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv"
reference_points = {
    f"{base_path}/reference_point{i}.xlsx": i for i in range(1, 37)
}

print(reference_points)

{'/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point1.xlsx': 1, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point2.xlsx': 2, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point3.xlsx': 3, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point4.xlsx': 4, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point5.xlsx': 5, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point6.xlsx': 6, '/media/mcs/1441ae67-d7cd-43e6-b028-169f78661a2f/kyle/csi_tool/csi_dataset/localization_phone/micro/0303/80Mhz/csv/reference_point7.xlsx': 7, '/med

In [4]:
def load_data(reference_points):
    data = []          
    rp_labels = []     

    for path, ref_id in reference_points.items():
        df = pd.read_excel(path)
        data.append(df.values)
        rp_labels.extend([ref_id] * len(df))  # 只保留 Reference Point ID

    data = pd.DataFrame(np.vstack(data))
    rp_labels = pd.Series(rp_labels, name="Reference Point ID")  # 轉為 Pandas Series

    return data, rp_labels

In [5]:
data, rp_labels = load_data(reference_points)

In [6]:
amp_data = np.array(data.iloc[:, :234])
phase_data = np.array(data.iloc[:, 234:-2]) 

In [7]:
amp_d = denoise.preprocess_csi_for_fingerprint2(amp_data)

In [8]:
encoder = OneHotEncoder(sparse_output=False)
one_hot_labels = encoder.fit_transform(np.array(rp_labels).reshape(-1, 1))

In [9]:
amp_train, amp_temp, y_train, y_temp = train_test_split(data, one_hot_labels, test_size=0.3, random_state=1)
amp_val, amp_test, y_val, y_test = train_test_split(amp_temp, y_temp, test_size=1/3, random_state=42)

In [10]:
print(y_test.shape)

(717, 36)


# K值測試

In [11]:
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# 設定 N-Fold 交叉驗證
N_FOLD = 10
skf = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=42)

# 轉換 One-Hot 標籤為標籤索引
y_labels = np.argmax(one_hot_labels, axis=1)

# 測試的 K 值範圍
k_values = range(3, 20, 2)  # 1, 3, 5, ..., 19
best_k = None
best_acc = 0
all_k_accuracies = {}  # 紀錄不同 K 值的準確率

# 測試不同的 K 值
for k in k_values:
    accuracies = []  # 儲存 N-Fold 交叉驗證的準確率
    
    # 執行 N-Fold 交叉驗證
    for train_index, test_index in skf.split(data, y_labels):
        X_train, X_test = amp_data[train_index], amp_data[test_index]
        y_train, y_test = y_labels[train_index], y_labels[test_index]

        # 訓練 KNN 模型
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)

        # 進行預測
        y_pred = knn.predict(X_test)

        # 計算準確率
        acc = accuracy_score(y_test, y_pred)
        accuracies.append(acc)

    # 計算 K 值的平均準確率
    mean_acc = np.mean(accuracies)
    all_k_accuracies[k] = mean_acc  # 記錄 K 值的結果

    print(f"K={k} 平均準確率: {mean_acc * 100:.2f}%")

    # 更新最佳 K 值
    if mean_acc > best_acc:
        best_acc = mean_acc
        best_k = k

# 輸出最佳 K 值
print("\n📊 最佳 KNN 模型")
print(f"🎯 最佳 K 值: {best_k}")
print(f"🏆 最佳準確率: {best_acc * 100:.2f}%")


K=3 平均準確率: 97.26%
K=5 平均準確率: 96.83%
K=7 平均準確率: 96.40%
K=9 平均準確率: 95.99%
K=11 平均準確率: 95.69%
K=13 平均準確率: 95.30%
K=15 平均準確率: 94.99%
K=17 平均準確率: 94.53%
K=19 平均準確率: 94.22%

📊 最佳 KNN 模型
🎯 最佳 K 值: 3
🏆 最佳準確率: 97.26%


In [25]:
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')

# N-ford 測試

In [34]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np

# 設定 N-Fold 交叉驗證
N_FOLD = 10  # 設定 N 為 10
skf = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=1)

accuracies = []  # 儲存每次 Fold 的準確率

# 轉換 one-hot 標籤為標籤索引
y_labels = np.argmax(one_hot_labels, axis=1)  # 轉換成 (N,)

# 開始 N-Fold 交叉驗證
for train_index, test_index in skf.split(amp_data, y_labels):  # ✅ 使用標籤索引
    X_train, X_test = amp_data[train_index], amp_data[test_index]
    y_train, y_test = y_labels[train_index], y_labels[test_index]  # ✅ 確保 y_train, y_test 是 (N,)

    # 訓練 KNN 模型
    knn.fit(X_train, y_train)

    # 進行預測
    y_pred = knn.predict(X_test)

    # 計算準確率
    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    print(f"Fold 準確率: {acc:.4f}")

# 計算 N-Fold 平均準確率與標準差
mean_acc = np.mean(accuracies)
std_acc = np.std(accuracies)

print("\n📊 N-FOLD 交叉驗證結果")
print(f"⏱️ 平均準確率: {mean_acc * 100:.2f}%")
print(f"📉 準確率標準差: {std_acc * 100:.2f}%")


Fold 準確率: 0.9749
Fold 準確率: 0.9693
Fold 準確率: 0.9637
Fold 準確率: 0.9637
Fold 準確率: 0.9721
Fold 準確率: 0.9763
Fold 準確率: 0.9721
Fold 準確率: 0.9721
Fold 準確率: 0.9595
Fold 準確率: 0.9777

📊 N-FOLD 交叉驗證結果
⏱️ 平均準確率: 97.01%
📉 準確率標準差: 0.57%


In [33]:
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# 轉換標籤索引
y_labels = np.argmax(one_hot_labels, axis=1) if one_hot_labels.ndim == 2 else one_hot_labels

# **先分 7:2:1，但捨棄 10% 測試集**
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=42)  # 30% = 20% 驗證 + 10% 測試
train_val_idx, test_idx = next(splitter.split(amp_data, y_labels))

# ✅ **不使用 10% 測試集，只用 20% Val 作為測試集**
X_train, y_train = amp_data[train_val_idx], y_labels[train_val_idx]  # 70% 訓練
X_test, y_test = amp_data[test_idx], y_labels[test_idx]  # 20% 測試（原本的驗證集）

# **在 70% 訓練集內部做 10-Fold 交叉驗證**
N_FOLD = 10
skf = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=42)

# 設定 KNN 和 GridSearch 參數
knn = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'metric': ['euclidean', 'manhattan'],
    'weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(knn, param_grid, cv=skf, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# **取得 K-Fold 平均準確率**
mean_cv_acc = grid_search.best_score_

# **最後測試最佳模型在測試集（原本的 Val）上的準確率**
best_knn = grid_search.best_estimator_
y_test_pred = best_knn.predict(X_test)
final_acc = accuracy_score(y_test, y_test_pred)

print("\n📊 KNN (7:2:1, Val 作為測試集) 結果")
print(f"✅ K-Fold 平均準確率: {mean_cv_acc * 100:.2f}%")
print(f"🏆 KNN 最終測試集準確率（使用原 Val）: {final_acc * 100:.2f}%")




📊 KNN (7:2:1, Val 作為測試集) 結果
✅ K-Fold 平均準確率: 97.15%
🏆 KNN 最終測試集準確率（使用原 Val）: 97.81%


In [None]:
import time
import numpy as np

N_FOLD = 10  # 設定重複測試的次數
total_times = []  # 儲存 N 次的總推論時間
sample_times = []  # 儲存 N 次的單筆推論時間

for _ in range(N_FOLD):
    start_time = time.perf_counter()
    y_pred = knn.predict(data)  # 進行 KNN 預測
    end_time = time.perf_counter()

    total_time = end_time - start_time  # 計算 KNN 預測總時間
    total_times.append(total_time)

    avg_time_per_sample = total_time / len(amp_test)  # 計算每筆資料的平均推論時間
    sample_times.append(avg_time_per_sample)

# 計算統計數據
mean_total_time = np.mean(total_times)
mean_sample_time = np.mean(sample_times)
std_sample_time = np.std(sample_times)

# 顯示結果
print(f"📊 N-FOLD 測試結果（共 {N_FOLD} 次）")
print(f"⏳ 平均每次總推論時間: {mean_total_time:.6f} 秒")
print(f"⏱️ 平均每筆資料推論時間: {mean_sample_time * 1000:.6f} 毫秒")
print(f"📉 單筆推論時間標準差: {std_sample_time * 1000:.6f} 毫秒")



ValueError: X has 470 features, but KNeighborsClassifier is expecting 234 features as input.