In [56]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, PassiveAggressiveRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
import joblib
import json
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

loop = 1

all_mde = []
all_accuracy = []

best_mde = float('inf')  # 初始化最佳 MDE


# 1. 先 load 原始 CSV，拿出特徵以外的「Label＋真實座標」
coords_df = pd.read_csv(
    "ESP32C3_processed_for_trainging.csv",
    usecols=["Label","X Position (meters)","Y Position (meters)"]
)

df_reg = pd.read_csv("ESP32C3_processed_for_trainging.csv")

print(df_reg.head())


### 1. 訓練 regressor 
df_reg = pd.read_csv("ESP32C3_processed_for_trainging.csv")

# 篩選有有效 distance 的資料
ap1_data = df_reg[['AP1_Rssi', 'AP1_Distance (mm)']].dropna().rename(
    columns={'AP1_Rssi': 'Rssi', 'AP1_Distance (mm)': 'Distance'}
)

ap2_data = df_reg[['AP2_Rssi', 'AP2_Distance (mm)']].dropna().rename(
    columns={'AP2_Rssi': 'Rssi', 'AP2_Distance (mm)': 'Distance'}
)

ap3_data = df_reg[['AP3_Rssi', 'AP3_Distance (mm)']].dropna().rename(
    columns={'AP3_Rssi': 'Rssi', 'AP3_Distance (mm)': 'Distance'}
)

ap4_data = df_reg[['AP4_Rssi', 'AP4_Distance (mm)']].dropna().rename(
    columns={'AP4_Rssi': 'Rssi', 'AP4_Distance (mm)': 'Distance'}
)

train_data_reg = pd.concat([ap1_data,ap2_data,ap3_data,ap4_data], ignore_index=True)
X_train_reg = train_data_reg[['Rssi']]
y_train_reg = train_data_reg['Distance']

### 訓練 regressor
# PassiveAggressiveRegressor 
model_reg = SGDRegressor(tol=1e-4, penalty="l2", max_iter=5000,alpha= 0.0001, eta0= 0.1, learning_rate= 'optimal', loss= 'huber')
model_reg.fit(X_train_reg, y_train_reg)
print("Regressor trained. Coefficient:", model_reg.coef_, "Intercept:", model_reg.intercept_)

   Label  X Position (meters)  Y Position (meters)  AP1_Distance (mm)  \
0      0                -0.41                15.69           46255.65   
1      0                -0.41                15.69           46255.20   
2      0                -0.41                15.69           46254.60   
3      0                -0.41                15.69           46254.15   
4      0                -0.41                15.69           46253.70   

   AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  AP5_Distance (mm)  \
0           20295.90           46323.00           42110.25           49340.85   
1           20296.05           46324.80           42109.80           49344.00   
2           20296.35           46326.75           42109.20           49347.30   
3           20296.50           46328.40           42108.75           49350.45   
4           20296.65           46330.20           42108.30           49353.60   

   AP6_Distance (mm)  AP7_Distance (mm)  AP8_Distance (mm)  AP1_Rssi  \
0 

In [57]:
### 2. 讀取 DNN 需要的資料
# DNN 使用的原始欄位
selected_columns = ['Label',
                    'AP1_Distance (mm)','AP2_Distance (mm)','AP3_Distance (mm)','AP4_Distance (mm)',
                    'AP1_Rssi', 'AP2_Rssi', 'AP3_Rssi', 'AP4_Rssi','AP5_Rssi', 'AP6_Rssi', 'AP7_Rssi', 'AP8_Rssi']

file_path = "ESP32C3_processed_for_trainging.csv"
data = pd.read_csv(file_path, usecols=selected_columns)
print("原始資料預覽：")
print(data.head())

label_column = 'Label'
# 這邊簡單用全部資料當作 processed_data，你也可以根據需要刪除前後 n 筆資料等
processed_data = data.copy()

data_imputed = processed_data

### 3. 利用 regressor 擴充 AP 2 3 的 Distance 預測值
# 建立新欄位，初值設定為 NaN
data_imputed['AP5_Distance_predicted'] = np.nan
data_imputed['AP6_Distance_predicted'] = np.nan
data_imputed['AP7_Distance_predicted'] = np.nan
data_imputed['AP8_Distance_predicted'] = np.nan

# # 利用 AP1_Rssi 預測 AP1_Distance_predicted
mask_ap5 = data_imputed['AP5_Rssi'].notna()
data_imputed.loc[mask_ap5, 'AP5_Distance_predicted'] = model_reg.predict(
    data_imputed.loc[mask_ap5, ['AP5_Rssi']].rename(columns={'AP5_Rssi': 'Rssi'})
)

# 利用 AP2_Rssi 預測 AP2_Distance_predicted
mask_ap6 = data_imputed['AP6_Rssi'].notna()
data_imputed.loc[mask_ap6, 'AP6_Distance_predicted'] = model_reg.predict(
    data_imputed.loc[mask_ap6, ['AP6_Rssi']].rename(columns={'AP6_Rssi': 'Rssi'})
)

# 利用 AP3_Rssi 預測 AP3_Distance_predicted
mask_ap7 = data_imputed['AP7_Rssi'].notna()
data_imputed.loc[mask_ap7, 'AP7_Distance_predicted'] = model_reg.predict(
    data_imputed.loc[mask_ap7, ['AP7_Rssi']].rename(columns={'AP7_Rssi': 'Rssi'})
)

# # 利用 AP4_Rssi 預測 AP4_Distance_predicted
mask_ap8 = data_imputed['AP8_Rssi'].notna()
data_imputed.loc[mask_ap8, 'AP8_Distance_predicted'] = model_reg.predict(
    data_imputed.loc[mask_ap8, ['AP8_Rssi']].rename(columns={'AP8_Rssi': 'Rssi'})
)

# 更新 DNN 模型用的特徵欄位，將 regressor 預測值加入
selected_columns_dnn = selected_columns + ['AP5_Distance_predicted', 'AP6_Distance_predicted' ,'AP7_Distance_predicted','AP8_Distance_predicted']

### 4. 後續 DNN 資料準備與訓練
print("每個 RP 的資料筆數: " + str(len(data_imputed)/42))

# 以下部分依照原有 DNN code 執行
target_column = 'Label'
y = data_imputed[target_column]

原始資料預覽：
   Label  AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  \
0      0           46255.65           20295.90           46323.00   
1      0           46255.20           20296.05           46324.80   
2      0           46254.60           20296.35           46326.75   
3      0           46254.15           20296.50           46328.40   
4      0           46253.70           20296.65           46330.20   

   AP4_Distance (mm)  AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  AP5_Rssi  \
0           42110.25       -67       -36       -69       -71       -75   
1           42109.80       -67       -36       -69       -71       -75   
2           42109.20       -67       -36       -69       -71       -75   
3           42108.75       -67       -36       -69       -71       -75   
4           42108.30       -67       -36       -69       -71       -75   

   AP6_Rssi  AP7_Rssi  AP8_Rssi  
0       -60       -55       -71  
1       -60       -55       -72  
2       -60       -55       -7

In [58]:
# 取出作為模型輸入的特徵，這邊不含 Label 欄位
X = data_imputed[selected_columns_dnn].drop(columns=['Label'])
print("擴充後的特徵資料預覽：")
print(X.head())

print("*******************所有特徵欄位*******************：")
print(list(X.columns))

# 標準化 and Save
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("標準化後的特徵陣列：")
print(X_scaled)

擴充後的特徵資料預覽：
   AP1_Distance (mm)  AP2_Distance (mm)  AP3_Distance (mm)  AP4_Distance (mm)  \
0           46255.65           20295.90           46323.00           42110.25   
1           46255.20           20296.05           46324.80           42109.80   
2           46254.60           20296.35           46326.75           42109.20   
3           46254.15           20296.50           46328.40           42108.75   
4           46253.70           20296.65           46330.20           42108.30   

   AP1_Rssi  AP2_Rssi  AP3_Rssi  AP4_Rssi  AP5_Rssi  AP6_Rssi  AP7_Rssi  \
0       -67       -36       -69       -71       -75       -60       -55   
1       -67       -36       -69       -71       -75       -60       -55   
2       -67       -36       -69       -71       -75       -60       -55   
3       -67       -36       -69       -71       -75       -60       -55   
4       -67       -36       -69       -71       -75       -60       -55   

   AP8_Rssi  AP5_Distance_predicted  AP6_Distance_

In [59]:
from sklearn.model_selection import train_test_split

print(selected_columns_dnn)


aaa = selected_columns_dnn.copy()
aaa.remove('Label')

# 加上 label 欄位
data = pd.DataFrame(X_scaled, columns=aaa)
data['label'] = y

# 對每個 label 做 7:1:2 的分割
train_parts = []
val_parts = []
test_parts = []

data


['Label', 'AP1_Distance (mm)', 'AP2_Distance (mm)', 'AP3_Distance (mm)', 'AP4_Distance (mm)', 'AP1_Rssi', 'AP2_Rssi', 'AP3_Rssi', 'AP4_Rssi', 'AP5_Rssi', 'AP6_Rssi', 'AP7_Rssi', 'AP8_Rssi', 'AP5_Distance_predicted', 'AP6_Distance_predicted', 'AP7_Distance_predicted', 'AP8_Distance_predicted']


Unnamed: 0,AP1_Distance (mm),AP2_Distance (mm),AP3_Distance (mm),AP4_Distance (mm),AP1_Rssi,AP2_Rssi,AP3_Rssi,AP4_Rssi,AP5_Rssi,AP6_Rssi,AP7_Rssi,AP8_Rssi,AP5_Distance_predicted,AP6_Distance_predicted,AP7_Distance_predicted,AP8_Distance_predicted,label
0,0.855585,-1.742775,0.798716,0.051351,-0.394237,1.961396,-0.566925,-0.120176,-1.298892,-0.096082,0.464286,-1.021286,1.298892,0.096082,-0.464286,1.021286,0
1,0.855541,-1.742761,0.798877,0.051302,-0.394237,1.961396,-0.566925,-0.120176,-1.298892,-0.096082,0.464286,-1.092205,1.298892,0.096082,-0.464286,1.092205,0
2,0.855483,-1.742732,0.799052,0.051236,-0.394237,1.961396,-0.566925,-0.120176,-1.298892,-0.096082,0.464286,-1.092205,1.298892,0.096082,-0.464286,1.092205,0
3,0.855439,-1.742718,0.799200,0.051187,-0.394237,1.961396,-0.566925,-0.120176,-1.298892,-0.096082,0.464286,-1.021286,1.298892,0.096082,-0.464286,1.021286,0
4,0.855396,-1.742704,0.799361,0.051138,-0.394237,1.961396,-0.566925,-0.120176,-1.298892,-0.096082,0.464286,-1.092205,1.298892,0.096082,-0.464286,1.092205,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34737,1.642830,-0.682084,-1.080480,0.846711,-1.653926,0.586978,0.983744,-1.348670,-1.298892,2.204814,-0.780153,0.042502,1.298892,-2.204814,0.780153,-0.042502,41
34738,1.643034,-0.682154,-1.080413,0.846892,-1.653926,0.586978,0.983744,-1.348670,-1.298892,2.204814,-0.780153,0.042502,1.298892,-2.204814,0.780153,-0.042502,41
34739,1.643238,-0.682252,-1.080332,0.847073,-1.653926,0.586978,0.983744,-1.348670,-1.298892,2.204814,-0.845649,0.042502,1.298892,-2.204814,0.845649,-0.042502,41
34740,1.643340,-0.682281,-1.080305,0.847171,-1.653926,0.586978,0.983744,-1.348670,-1.298892,2.204814,-0.780153,0.042502,1.298892,-2.204814,0.780153,-0.042502,41


In [60]:
for label, group in data.groupby('label'):
    train_val, test = train_test_split(group, test_size=0.2, random_state=42)
    train, val = train_test_split(train_val, test_size=0.125, random_state=42)
    train_parts.append(train)
    val_parts.append(val)
    test_parts.append(test)

# 合併所有 label 的資料
train_data = pd.concat(train_parts, ignore_index=True)
val_data = pd.concat(val_parts, ignore_index=True)
test_data = pd.concat(test_parts, ignore_index=True)

# 將特徵欄位加入欄名（selected_columns 去除 Label）
feature_names = selected_columns_dnn.copy()
feature_names.remove("Label")

print("Feature names")
print(feature_names)




Feature names
['AP1_Distance (mm)', 'AP2_Distance (mm)', 'AP3_Distance (mm)', 'AP4_Distance (mm)', 'AP1_Rssi', 'AP2_Rssi', 'AP3_Rssi', 'AP4_Rssi', 'AP5_Rssi', 'AP6_Rssi', 'AP7_Rssi', 'AP8_Rssi', 'AP5_Distance_predicted', 'AP6_Distance_predicted', 'AP7_Distance_predicted', 'AP8_Distance_predicted']


In [61]:
# 建立有欄名的 DataFrame 並補上 label
train_data_named = train_data[feature_names].copy()
train_data_named['label'] = train_data['label'].values

val_data_named = val_data[feature_names].copy()
val_data_named['label'] = val_data['label'].values

test_data_named = test_data[feature_names].copy()
test_data_named['label'] = test_data['label'].values

X_train, y_train = train_data[feature_names].values, train_data['label'].values
X_val, y_val = val_data[feature_names].values, val_data['label'].values
X_test, y_test = test_data[feature_names].values, test_data['label'].values

print("train data")
print(X_train)

# 計算每個 Set 內各 Label 的資料數量
train_label_counts = pd.Series(y_train).value_counts().sort_index()
val_label_counts = pd.Series(y_val).value_counts().sort_index()
test_label_counts = pd.Series(y_test).value_counts().sort_index()

# 確保所有 Labels 都有出現在三個 Set 裡
all_labels = sorted(set(train_label_counts.index) | set(val_label_counts.index) | set(test_label_counts.index))
label_distribution = pd.DataFrame(index=all_labels)
label_distribution["Training Set"] = train_label_counts
label_distribution["Validation Set"] = val_label_counts
label_distribution["Test Set"] = test_label_counts
label_distribution = label_distribution.fillna(0).astype(int)

from IPython.display import display
display(label_distribution)

train data
[[ 1.06108771 -1.65675249  1.06389224 ...  0.31874932 -0.52978248
   1.09220514]
 [ 0.75439356 -1.7148417   1.01472025 ...  0.39297176 -0.52978248
   1.09220514]
 [ 0.95419664 -1.70704582  1.1063834  ...  0.31874932 -0.52978248
   1.16312434]
 ...
 [ 1.55624424 -0.64852188 -1.11256653 ... -2.27903625  0.84564947
   0.02841708]
 [ 0.9471415  -0.30850016 -1.12280958 ... -2.3532587   0.84564947
  -0.04250212]
 [ 1.64151804 -0.68157701 -1.08097709 ... -2.20481381  0.84564947
  -0.04250212]]


Unnamed: 0,Training Set,Validation Set,Test Set
0,449,65,129
1,1144,164,328
2,954,137,273
3,529,76,152
4,529,76,152
5,475,68,136
6,493,71,142
7,524,75,150
8,486,70,139
9,591,85,170


In [72]:
print(len(X_train[0]))
print(y_train)

16
[ 0  0  0 ... 41 41 41]


In [62]:
import time
# 記錄開始時間
start_time = time.time()

from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping

# 假設 X_train, y_train, X_val, y_val, X_test, y_test 已經切分好
# 建立 DNN 模型 (包含 BatchNormalization 與 Dropout)
model_dnn = keras.Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(42, activation='softmax')
])

# 可顯式設定學習率
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model_dnn.compile(optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])

# 設定 EarlyStopping 以在驗證集不再改善時提前停止
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# 如有需要也可以加入 ModelCheckpoint 保存最佳模型權重：
# checkpoint = ModelCheckpoint('best_dnn_model.h5', monitor='val_loss', save_best_only=True)

# 訓練模型 (設置較大 epoch 數並依 EarlyStopping 停止)
history = model_dnn.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=10000,
                    batch_size=32,
                    verbose=1,
                    callbacks=[early_stop])

# 記錄結束時間
end_time = time.time()



training_time = end_time - start_time
print(f"訓練時間：{training_time:.2f} 秒")
mde_report_per_fold = {}

Epoch 1/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7147 - loss: 1.0863 - val_accuracy: 0.9971 - val_loss: 0.0113
Epoch 2/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9809 - loss: 0.0788 - val_accuracy: 1.0000 - val_loss: 0.0018
Epoch 3/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9872 - loss: 0.0478 - val_accuracy: 0.9971 - val_loss: 0.0067
Epoch 4/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9903 - loss: 0.0345 - val_accuracy: 0.9954 - val_loss: 0.0142
Epoch 5/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9885 - loss: 0.0372 - val_accuracy: 1.0000 - val_loss: 6.3641e-04
Epoch 6/10000
[1m759/759[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9904 - loss: 0.0313 - val_accuracy: 0.9977 - val_loss: 0.0071
Epoch 

In [63]:
coords_test = coords_df.loc[
    test_data.index, 
    ["X Position (meters)", "Y Position (meters)"]
].values

In [64]:
# 5. 先用 DNN 預測出每筆的類別
y_logits       = model_dnn.predict(X_test)             # shape=(N, num_classes)
y_pred_classes = np.argmax(y_logits, axis=1)     

[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  


In [70]:
# 直接用 DNN 模型評估 Test Set
loss, test_accuracy = model_dnn.evaluate(X_test, y_test, verbose=0)
print(f"整體 Test Accuracy: {test_accuracy:.4f}")


整體 Test Accuracy: 0.9994


In [65]:


label_center = (
    coords_df
    .groupby("Label")[["X Position (meters)","Y Position (meters)"]]
    .first()
    .to_dict("index")
)
# label_center[5] = {"X Position (meters)": ..., "Y Position (meters)": ...}

# 7. 把預測的 label 映到中心點座標上
y_pred_coords = np.array([
    ( label_center[l]["X Position (meters)"],
      label_center[l]["Y Position (meters)"] )
    for l in y_pred_classes
])

# 8. 計算每筆距離，取平均即 MDE
distances = np.linalg.norm(y_pred_coords - coords_test, axis=1)
avg_mde   = distances.mean()
print(f"MDE: {avg_mde:.4f} m")

MDE: 11.9685 m
