In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LeakyReLU 


# 데이터 로딩
train_path = '/content/drive/MyDrive/Toa_sort.csv'
test_path = '/content/drive/MyDrive/Toa_test_sort.csv'

# 데이터 로딩 (헤더 없음)
columns = ['re_x', 're_y', 'anchor_id', 'TOA'] + [f'CSI_{i}' for i in range(1, 129)]
df_train = pd.read_csv(train_path, header=None, names=columns)
df_test = pd.read_csv(test_path, header=None, names=columns)

# 출력: re_x, re_y
output_cols = ['re_x', 're_y']

# 입력 특징: CSI_1 to CSI_128
csi_cols = [f'CSI_{i}' for i in range(1, 129)]

# --- 그룹화 방식: 연속 8행을 하나의 입력으로 묶기 (non-overlap, label=마지막 행) ---
group_size = 8
step = 8  # non-overlapping

# ---------------------------------------------------------
# 1. 데이터 전처리 (CSI + TOA) - Anchor ID 제거
# 8행으로 나누어 떨어지지 않는 나머지 부분이 있다면 제거
# ---------------------------------------------------------

# 학습 데이터 준비
Xv_csi = df_train[csi_cols].values
Xv_toa = df_train['TOA'].values
yv = df_train[output_cols].values

n_full = (Xv_csi.shape[0] // step) * step
Xv_csi_cut = Xv_csi[:n_full]
Xv_toa_cut = Xv_toa[:n_full]
yv_cut = yv[:n_full]

n_groups = n_full // step

# CSI Reshape: (n_groups, 8, 128, 1)
X_groups_csi = Xv_csi_cut.reshape(n_groups, step, 128)
X_groups_csi = X_groups_csi[..., np.newaxis] # 4D 텐서로 변환

# TOA Reshape: (n_groups, 8)
X_groups_toa = Xv_toa_cut.reshape(n_groups, step)

# Label: 그룹의 마지막 행 좌표 사용
y_group_labels = yv_cut.reshape(n_groups, step, 2)[:, -1, :]

# 테스트 데이터 준비
Xv_test_csi = df_test[csi_cols].values
Xv_test_toa = df_test['TOA'].values
yv_test = df_test[output_cols].values

n_full_test = (Xv_test_csi.shape[0] // step) * step
Xv_test_csi_cut = Xv_test_csi[:n_full_test]
Xv_test_toa_cut = Xv_test_toa[:n_full_test]
yv_test_cut = yv_test[:n_full_test]

n_groups_test = n_full_test // step

X_groups_test_csi = Xv_test_csi_cut.reshape(n_groups_test, step, 128)[..., np.newaxis]
X_groups_test_toa = Xv_test_toa_cut.reshape(n_groups_test, step)
y_groups_test = yv_test_cut.reshape(n_groups_test, step, 2)[:, -1, :]

# ---------------------------------------------------------
# 2. 정규화 (Scaling)
# ---------------------------------------------------------
# 변수명 매핑 (User snippet compatibility)
X_train_csi = X_groups_csi
X_test_csi = X_groups_test_csi
X_train_toa = X_groups_toa
X_test_toa = X_groups_test_toa

# [User Request] CSI Log1p + Max Scaling
X_train_csi = np.log1p(X_train_csi)
X_test_csi = np.log1p(X_test_csi)

max_val = np.max(np.abs(X_train_csi))
if max_val == 0: max_val = 1.0

X_train_csi_scaled = X_train_csi / max_val
X_test_csi_scaled = X_test_csi / max_val


# axis=1 (step=8) 기준 평균/표준편차
toa_mean_tr = np.mean(X_train_toa, axis=1, keepdims=True)
toa_std_tr = np.std(X_train_toa, axis=1, keepdims=True)
toa_std_tr[toa_std_tr == 0] = 1.0
X_train_toa_scaled = (X_train_toa - toa_mean_tr) / toa_std_tr

toa_mean_te = np.mean(X_test_toa, axis=1, keepdims=True)
toa_std_te = np.std(X_test_toa, axis=1, keepdims=True)
toa_std_te[toa_std_te == 0] = 1.0
X_test_toa_scaled = (X_test_toa - toa_mean_te) / toa_std_te


# Label 정규화
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_group_labels)
y_test_scaled = scaler_y.transform(y_groups_test)

# ---------------------------------------------------------
# 3. Multi-Input 모델 구축 (CSI + TOA)
# ---------------------------------------------------------

# Branch 1: CSI (CNN)
input_csi = Input(shape=(8, 128, 1), name='input_csi')

# [Step 1: 256으로 뻥튀기]
x = Conv2D(256, (3, 3), padding='same')(input_csi)
x = LeakyReLU(alpha=0.1)(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.3)(x)

# [Step 2: 128로 축소]
x = Conv2D(128, (3, 3), padding='same')(x)
x = LeakyReLU(alpha=0.1)(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.3)(x)

# [Step 3: 64로 축소]
x = Conv2D(64, (3, 3), padding='same')(x)
x = LeakyReLU(alpha=0.1)(x)
x = BatchNormalization()(x)


x = Flatten()(x)
x = Dense(128)(x)
x = LeakyReLU(alpha=0.1)(x)

# Branch 2: TOA (Dense) - 거리/지연 정보 학습
input_toa = Input(shape=(8,), name='input_toa')
y = Dense(32)(input_toa)
y = LeakyReLU(alpha=0.1)(y)
y = BatchNormalization()(y)
y = Dense(16)(y)
y = LeakyReLU(alpha=0.1)(y)

# Concatenate (CSI + TOA)
combined = concatenate([x, y])

# Joint processing
w = Dense(128)(combined)
w = LeakyReLU(alpha=0.1)(w)
w = Dropout(0.3)(w)

output = Dense(2, name='output')(w) # 출력층은 좌표값이므로 활성화 함수 없이 Linear 유지

model = Model(inputs=[input_csi, input_toa], outputs=output)

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
model.summary()

# ---------------------------------------------------------
# 4. 모델 학습
# ---------------------------------------------------------
history = model.fit(
    [X_train_csi_scaled, X_train_toa_scaled], 
    y_train_scaled, 
    epochs=40, 
    batch_size=128, 
    validation_split=0.2, 
    verbose=1
)

# ---------------------------------------------------------
# 5. 평가 및 예측
# ---------------------------------------------------------
loss, mae = model.evaluate([X_test_csi_scaled, X_test_toa_scaled], y_test_scaled, verbose=0)
print(f'Test Loss: {loss:.4f}, Test MAE: {mae:.4f}')

predictions = model.predict([X_test_csi_scaled, X_test_toa_scaled])
predictions_original = scaler_y.inverse_transform(predictions)
actual_original = scaler_y.inverse_transform(y_test_scaled)

# RMSE 계산
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(actual_original, predictions_original))
print(f'RMSE: {rmse:.4f}')

# 모델 저장
model.save('Final_model.h5')
print("모델이 'Final_model.h5'로 저장되었습니다.")

Loading Data...
Total Train Rows: 890216
Total Test Rows:  79768
Total Train Rows: 890216
Total Test Rows:  79768

[Processing Complete]
Train Samples (Groups): 111277 (= 890216 rows)
Test Samples (Groups):  9971 (= 79768 rows)
Applying Advanced CSI Correction (Smoothing + Noise Removal)...

[Processing Complete]
Train Samples (Groups): 111277 (= 890216 rows)
Test Samples (Groups):  9971 (= 79768 rows)
Applying Advanced CSI Correction (Smoothing + Noise Removal)...


In [None]:
# ============================================
# 10. CDF of Positioning Errors
# ============================================
import numpy as np
import matplotlib.pyplot as plt

# 오차 계산 (유클리드 거리)
errors = np.sqrt((actual_original[:, 0] - predictions_original[:, 0])**2 + (actual_original[:, 1] - predictions_original[:, 1])**2)

# CDF 계산
sorted_errors = np.sort(errors)
cdf = np.arange(1, len(sorted_errors) + 1) / len(sorted_errors)

# 플롯
plt.figure(figsize=(8, 6))
plt.plot(sorted_errors, cdf, label='CDF of Errors')
plt.xlabel('Positioning Error (m)')
plt.ylabel('Cumulative Probability')
plt.title('CDF of Positioning Errors')
plt.grid(True)
plt.legend()
plt.show()

# 추가 통계 출력
print(f"Mean Error: {np.mean(errors):.4f} m")
print(f"Median Error: {np.median(errors):.4f} m")
print(f"90th Percentile Error: {np.percentile(errors, 90):.4f} m")