In [25]:
import os 
import sys

import sklearn as sk
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt 
import xgboost as xgb

from scipy.stats import kurtosis, entropy
from scipy.fft import fft

sys.path.append(r"c:\Users\keng-tse\Desktop\csi_tool") #這行為絕對路徑，如需使用，必須要修改為當前決路徑
import csi_tool

In [2]:
base_path = r"C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv"
reference_points = {}

# 每個點的實際位置 (x, y)，基於 0.6m 的間距
spacing = 0.6  # 每隔 0.6m
coordinates = {
    # 下邊界 (1-10 和 40-31)
    1: (0, 0), 40: (0.6, 0), 39: (1.2, 0), 38: (1.8, 0), 37: (2.4, 0),
    36: (3.0, 0), 35: (3.6, 0), 34: (4.2, 0), 33: (4.8, 0), 32: (5.4, 0), 31: (6.0, 0),

    # 左邊界 (1-11)
    2: (0, 0.6), 3: (0, 1.2), 4: (0, 1.8), 5: (0, 2.4),
    6: (0, 3.0), 7: (0, 3.6), 8: (0, 4.2), 9: (0, 4.8), 10: (0, 5.4), 11: (0, 6.0),

    # 上邊界 (11-21)
    12: (0.6, 6.0), 13: (1.2, 6.0), 14: (1.8, 6.0), 15: (2.4, 6.0),
    16: (3.0, 6.0), 17: (3.6, 6.0), 18: (4.2, 6.0), 19: (4.8, 6.0),
    20: (5.4, 6.0), 21: (6.0, 6.0),

    # 右邊界 (21-31)
    22: (6.0, 5.4), 23: (6.0, 4.8), 24: (6.0, 4.2), 25: (6.0, 3.6),
    26: (6.0, 3.0), 27: (6.0, 2.4), 28: (6.0, 1.8), 29: (6.0, 1.2), 30: (6.0, 0.6),

    # 中間點 (41-49)
    41: (3.0, 0.6), 42: (3.0, 1.2), 43: (3.0, 1.8),
    44: (3.0, 2.4), 45: (3.0, 3.0), 46: (3.0, 3.6),
    47: (3.0, 4.2), 48: (3.0, 4.8), 49: (3.0, 5.4)
}

# 生成 reference_points 字典
for ref_id, coord in coordinates.items():
    folder_path = os.path.join(base_path, f"reference_point_{ref_id}.xlsx")
    reference_points[folder_path] = (ref_id, coord)

# 打印結果
for path, (ref_id, coord) in reference_points.items():
    print(f"Path: {path}, ID: {ref_id}, Coordinates: {coord}")



Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_1.xlsx, ID: 1, Coordinates: (0, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_40.xlsx, ID: 40, Coordinates: (0.6, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_39.xlsx, ID: 39, Coordinates: (1.2, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_38.xlsx, ID: 38, Coordinates: (1.8, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_37.xlsx, ID: 37, Coordinates: (2.4, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_36.xlsx, ID: 36, Coordinates: (3.0, 0)
Path: C:\Users\keng-tse\Desktop\csi_tool\csi_dataset\localization_phone\1123_phone\csv\reference_point_35.xlsx, ID: 35, Coordinates: (3.6, 0)
Path: C:\U

In [3]:
def load_data(reference_points):
    data = []          # 用于存储所有数据
    rp_labels = []     # 用于存储参考点编号标签
    coord_labels = []  # 用于存储 (x, y) 坐标标签

    for path, (ref_id, coord) in reference_points.items():
        df = pd.read_excel(path)
        data.append(df.values)
        rp_labels.extend([ref_id] * len(df))
        coord_labels.extend([coord] * len(df))
    data = pd.DataFrame(np.vstack(data))
    rp_labels = pd.Series(rp_labels, name="Reference Point ID")
    coord_labels = pd.DataFrame(coord_labels, columns=["X", "Y"])
    return data, rp_labels, coord_labels

In [4]:
data, rp_labels, coord_labels = load_data(reference_points)

In [5]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,248.364651,256.124969,258.520792,253.314034,246.876487,256.204996,277.584221,327.519465,365.492818,397.498428,...,-35.213569,-36.152671,-37.077824,-37.996585,-38.933101,-40.842190,-41.843853,-42.856956,-43.895749,-44.971859
1,408.274418,398.898483,393.794363,392.475477,405.080239,448.487458,472.366383,504.602814,534.059922,571.423661,...,-0.674907,-0.828299,-0.995178,-1.181195,-1.373580,-1.797259,-2.035996,-2.299030,-2.606457,-2.968883
2,309.079278,307.071653,288.716124,299.808272,287.758579,350.570963,386.462159,460.739623,490.929730,562.616210,...,-14.295734,-14.775544,-15.301622,-15.811853,-16.313733,-17.392656,-17.952935,-18.538355,-19.145090,-19.785700
3,307.102589,308.311531,289.613536,304.095380,302.735858,358.611210,398.339052,420.685155,427.566369,443.878362,...,1.274214,1.095487,0.911706,0.742449,0.587687,0.229569,0.033436,-0.191633,-0.485279,-0.828462
4,260.762344,257.650927,246.237690,245.601303,254.420911,307.722277,341.422026,404.754246,447.115198,493.725632,...,-19.952661,-20.604677,-21.228961,-21.879303,-22.517103,-23.837371,-24.523261,-25.244138,-25.959707,-26.682238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21196,345.861244,345.897384,333.654012,323.185705,309.594897,322.131961,317.327906,322.913301,321.790304,307.675153,...,-0.618815,-0.809990,-1.062486,-1.289537,-1.523995,-2.035031,-2.324250,-2.644683,-3.005345,-3.417424
21197,247.745838,250.139961,241.745734,228.186327,231.387122,236.596703,241.478778,267.867505,277.908258,289.006920,...,0.328276,0.120954,-0.105349,-0.334395,-0.546340,-1.022160,-1.306454,-1.607319,-1.931079,-2.298385
21198,361.459541,355.827205,342.803151,330.541979,320.053121,324.024690,331.670017,329.645264,319.075226,311.236245,...,-2.602122,-2.773536,-2.917864,-3.072894,-3.236217,-3.594053,-3.806214,-4.042126,-4.313407,-4.639725
21199,225.621364,219.692968,218.586367,205.975727,209.313163,199.902476,215.176672,236.592477,253.639508,246.594809,...,-2.036794,-2.292204,-2.523448,-2.727684,-2.976168,-3.516047,-3.814645,-4.182563,-4.521496,-4.891739


In [13]:
# 分離振幅和相位數據
amplitude_data = data.iloc[:, :48]  # 前 48 列是振幅
phase_data = data.iloc[:, 48:]  # 後 48 列是相位


# 合併振幅和相位作為特徵
X = pd.concat([amplitude_data, phase_data], axis=1)

# 使用參考點 ID 作為分類標籤
y_class = rp_labels

# 使用座標作為回歸標籤
y_reg = coord_labels


In [39]:
def preprocess_data(data, labels):
    # 1. 分離振幅與相位
    amplitude_data = data.iloc[:, :48]
    phase_data = data.iloc[:, 48:]

    # 2. 相位解纏
    #unwrapped_phase = np.unwrap(phase_data.values, axis=1)
    #phase_data = pd.DataFrame(unwrapped_phase, columns=phase_data.columns)

    # 3. 特徵提取（可選）
    amplitude_features = pd.DataFrame({
    "mean": amplitude_data.mean(axis=1),
    "std": amplitude_data.std(axis=1),
    "max": amplitude_data.max(axis=1),
    "min": amplitude_data.min(axis=1),
    "energy": (amplitude_data ** 2).sum(axis=1),  # 能量
    "power": (amplitude_data ** 2).sum(axis=1) / amplitude_data.shape[1],  # 信號功率
    #"entropy": amplitude_data.apply(lambda x: entropy(np.abs(x) / np.sum(np.abs(x))), axis=1),  # 熵
    #"smoothness": amplitude_data.diff(axis=1).std(axis=1),  # 平滑度
    #"kurtosis": amplitude_data.apply(kurtosis, axis=1)  # 峰度
})
    phase_features = pd.DataFrame({
    "mean": phase_data.mean(axis=1),
    "std": phase_data.std(axis=1),
    "max": phase_data.max(axis=1),
    "min": phase_data.min(axis=1),
    "phase_energy": (phase_data ** 2).sum(axis=1),  # 相位能量
})

    # 4. 數據標準化
    scaler = StandardScaler()
    amplitude_scaled = scaler.fit_transform(amplitude_data)
    phase_scaled = scaler.fit_transform(phase_data)
    data_scaled = np.hstack([amplitude_features, phase_features])

    # 5. 切分數據集
    X_train, X_test, y_train, y_test = train_test_split(amplitude_scaled, labels, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test


In [42]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

# 切分數據集
#X_train, X_test, y_train, y_test = train_test_split(amplitude_data, y_class, test_size=0.2, random_state=1)
X_train, X_test, y_train, y_test = preprocess_data(data, rp_labels)
# 初始化 KNN 模型
knn = KNeighborsClassifier(n_neighbors=3)

# 訓練模型
knn.fit(X_train, y_train)

# 測試模型
y_pred = knn.predict(X_test)

# 評估模型
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[67  0  0 ...  0  0  0]
 [ 0 96  0 ...  0  0  0]
 [ 0  0 90 ...  0  0  0]
 ...
 [ 0  0  0 ... 67  0  0]
 [ 0  0  0 ...  1 87  0]
 [ 0  0  0 ...  0  0 78]]
              precision    recall  f1-score   support

           1       0.99      0.96      0.97        70
           2       0.94      0.97      0.96        99
           3       0.92      0.99      0.95        91
           4       0.92      1.00      0.96        98
           5       0.94      0.99      0.97        86
           6       0.85      0.87      0.86        95
           7       0.84      0.92      0.88        83
           8       0.85      0.97      0.91        79
           9       0.80      0.81      0.81       106
          10       0.83      0.93      0.88        92
          11       0.88      0.83      0.85        84
          12       0.88      0.93      0.91        98
          13       0.98      0.93      0.95        85
          14       0.79      0.92      0.85        89
          15       0.90      0.76

In [34]:
from sklearn.preprocessing import LabelEncoder

xgb_model = xgb.XGBClassifier()

# 假設 labels 是你的原始標籤
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_train)  # 編碼 y_train

# 重新訓練模型
xgb_model.fit(X_train, y_encoded)

# 在預測階段，將預測結果轉換回原始標籤
y_pred_encoded = xgb_model.predict(X_test)
y_pred = label_encoder.inverse_transform(y_pred_encoded)  # 轉換回原始標籤

# 計算準確度
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# 顯示分類報告和混淆矩陣
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 89.29%
Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.93      0.96        70
           2       0.94      0.97      0.96        99
           3       0.94      0.97      0.95        91
           4       0.95      0.98      0.96        98
           5       0.93      0.95      0.94        86
           6       0.91      0.82      0.86        95
           7       0.91      0.90      0.91        83
           8       0.91      0.99      0.95        79
           9       0.86      0.81      0.83       106
          10       0.96      0.93      0.95        92
          11       0.90      0.86      0.88        84
          12       0.87      0.92      0.89        98
          13       0.95      0.98      0.97        85
          14       0.83      0.91      0.87        89
          15       0.86      0.74      0.79        80
          16       0.93      0.89      0.91        88
          17       0.95      0.99      0.

In [48]:
def calculate_distance(predicted_label, true_label):
    # 從 coordinates 中查找預測的座標和真實的座標
    predicted_coord = coordinates.get(predicted_label, (0, 0))
    true_coord = coordinates.get(true_label, (0, 0))
    
    # 計算歐幾里得距離
    distance = np.sqrt((predicted_coord[0] - true_coord[0])**2 + (predicted_coord[1] - true_coord[1])**2)
    return distance

In [43]:


# 假設你已經得到了 y_pred 和 y_test
distances = []
for pred_label, true_label in zip(y_pred, y_test):
    dist = calculate_distance(pred_label, true_label)
    distances.append(dist)

# 計算所有預測的平均距離
average_distance = np.mean(distances)
print(f"Average distance between predicted and true locations: {average_distance:.2f} m")

Average distance between predicted and true locations: 0.42 m


In [46]:
print(coord_labels.columns)

Index(['X', 'Y'], dtype='object')


In [51]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# 假設 y_reg_x 和 y_reg_y 是座標標籤
y_reg_x = coord_labels['X']  # 假設 'x_coord' 是 x 座標的列名
y_reg_y = coord_labels['Y']  # 假設 'y_coord' 是 y 座標的列名


# 分割數據集
X_train, X_test, y_train_x, y_test_x, y_train_y, y_test_y = train_test_split(X, y_reg_x, y_reg_y, test_size=0.2, random_state=42)
# 創建 KNN 回歸模型
knn_reg_x = KNeighborsRegressor(n_neighbors=5)
knn_reg_y = KNeighborsRegressor(n_neighbors=5)

# 訓練 KNN 回歸模型
knn_reg_x.fit(X_train, y_train_x)
knn_reg_y.fit(X_train, y_train_y)

# 預測
y_pred_x = knn_reg_x.predict(X_test)
y_pred_y = knn_reg_y.predict(X_test)

# 計算 RMSE
rmse_x = np.sqrt(mean_squared_error(y_test_x, y_pred_x))
rmse_y = np.sqrt(mean_squared_error(y_test_y, y_pred_y))

print(f"RMSE for x-coordinate: {rmse_x}")
print(f"RMSE for y-coordinate: {rmse_y}")

# 你也可以計算總的距離誤差，這是預測點和真實點之間的距離
distance_error = np.sqrt((y_pred_x - y_test_x) ** 2 + (y_pred_y - y_test_y) ** 2)
mean_distance_error = np.mean(distance_error)

print(f"Mean distance error: {mean_distance_error}")

RMSE for x-coordinate: 0.9202517519465109
RMSE for y-coordinate: 0.8415225195488264
Mean distance error: 0.534238505830063


In [54]:
print(y_pred_x, y_pred_y)
print(y_test_x, y_test_y)

[1.68 4.8  6.   ... 3.   3.   6.  ] [4.2  2.28 0.6  ... 5.4  5.4  5.28]
6224     0.0
16205    6.0
17099    6.0
9892     1.8
8487     0.0
        ... 
8337     0.0
16539    6.0
20901    3.0
21036    3.0
13420    6.0
Name: X, Length: 4241, dtype: float64 6224     3.0
16205    1.8
17099    0.6
9892     6.0
8487     6.0
        ... 
8337     5.4
16539    1.8
20901    5.4
21036    5.4
13420    5.4
Name: Y, Length: 4241, dtype: float64
