In [63]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import json
import os

# --- 參數設定 ---
TRAIN_DATE = '2024_12_14'
TEST_DATES = ["2024_12_21", "2024_12_27", "2025_01_03", "2025_01_10", "2025_02_28"]
N_REMOVE = 1
N_TRAIN = 300
N_NEIGHBORS = 5

FEATURE_COLS = [
    'AP1_Rssi', 'AP2_Rssi', 'AP3_Rssi', 'AP4_Rssi'
]
SELECTED_COLUMNS = ['Label'] + FEATURE_COLS
LABEL_COL = 'Label'

LABEL_TO_COORDINATES = {
    "1-1": (0, 0), "1-2": (0.6, 0), "1-3": (1.2, 0), "1-4": (1.8, 0), "1-5": (2.4, 0), "1-6": (3.0, 0),
    "1-7": (3.6, 0), "1-8": (4.2, 0), "1-9": (4.8, 0), "1-10": (5.4, 0), "1-11": (6.0, 0),
    "2-1": (0, 0.6), "2-11": (6.0, 0.6),
    "3-1": (0, 1.2), "3-11": (6.0, 1.2),
    "4-1": (0, 1.8), "4-11": (6.0, 1.8),
    "5-1": (0, 2.4), "5-11": (6.0, 2.4),
    "6-1": (0, 3.0), "6-2": (0.6, 3.0), "6-3": (1.2, 3.0), "6-4": (1.8, 3.0), "6-5": (2.4, 3.0),
    "6-6": (3.0, 3.0), "6-7": (3.6, 3.0), "6-8": (4.2, 3.0), "6-9": (4.8, 3.0), "6-10": (5.4, 3.0), "6-11": (6.0, 3.0),
    "7-1": (0, 3.6), "7-11": (6.0, 3.6),
    "8-1": (0, 4.2), "8-11": (6.0, 4.2),
    "9-1": (0, 4.8), "9-11": (6.0, 4.8),
    "10-1": (0, 5.4), "10-11": (6.0, 5.4),
    "11-1": (0, 6.0), "11-2": (0.6, 6.0), "11-3": (1.2, 6.0), "11-4": (1.8, 6.0), "11-5": (2.4, 6.0),
    "11-6": (3.0, 6.0), "11-7": (3.6, 6.0), "11-8": (4.2, 6.0), "11-9": (4.8, 6.0), "11-10": (5.4, 6.0), "11-11": (6.0, 6.0)
}

def preprocess_data(df, label_col, n_remove=1):
    df = df.sort_values(by=label_col).reset_index(drop=True)
    processed = []
    for label, group in df.groupby(label_col):
        if len(group) > 2 * n_remove:
            group = group.iloc[n_remove:-n_remove]
            processed.append(group)
    if processed:
        df = pd.concat(processed, ignore_index=True)
        df = df.groupby(label_col).apply(lambda group: group.fillna(group.mean()))
        df = df.reset_index(level=0)
        df = df.reset_index(drop=True)
    else:
        df = pd.DataFrame(columns=df.columns)
    return df

def get_coords(labels):
    return np.array([LABEL_TO_COORDINATES[l] for l in labels])

def pointwise_mde_func(y_true, dists):
    pointwise_mde = {}
    for label in np.unique(y_true):
        idx = np.where(np.array(y_true) == label)[0]
        if len(idx) > 0:
            pointwise_mde[label] = {
                "count": len(idx),
                "MDE": float(np.mean(dists[idx]))
            }
        else:
            pointwise_mde[label] = {
                "count": 0,
                "MDE": None
            }
    return pointwise_mde

# --- 載入並處理 base 週資料 ---
base_df = pd.read_csv(f'timestamp_allignment_Balanced_{TRAIN_DATE}_rtt_logs.csv', usecols=SELECTED_COLUMNS)
base_df = preprocess_data(base_df, LABEL_COL, n_remove=N_REMOVE)

# --- Balanced train (每類 N_TRAIN 筆) ---
train = base_df.groupby(LABEL_COL, group_keys=False).sample(n=N_TRAIN, replace=False, random_state=42)
X_train = train[FEATURE_COLS].copy()
y_train = train[LABEL_COL].copy()

# --- 其餘資料作為 base test ---
remaining = base_df.drop(train.index)
X_base_test = remaining[FEATURE_COLS].copy()
print(len(X_base_test))
y_base_test = remaining[LABEL_COL].copy()

# --- 用 base train fit scaler、KNN ---
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_base_test_scaled = scaler.transform(X_base_test)

knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS,weights='uniform',metric='euclidean')
knn.fit(X_train_scaled, y_train)

# --- Base test evaluation ---
y_base_pred = knn.predict(X_base_test_scaled)
base_acc = accuracy_score(y_base_test, y_base_pred)
base_y_true_coords = get_coords(y_base_test)
base_y_pred_coords = get_coords(y_base_pred)
base_dists = np.linalg.norm(base_y_true_coords - base_y_pred_coords, axis=1)
base_mde = np.mean(base_dists)
base_pointwise_mde = pointwise_mde_func(y_base_test, base_dists)

print("\n=== Base Model (Same week held-out test) ===")
print(f"Base Test Accuracy: {base_acc:.4f}")
print(f"Base Test MDE: {base_mde:.4f}")

base_report = classification_report(y_base_test, y_base_pred, output_dict=True)
base_report["Mean Distance Error (MDE)"] = float(base_mde)
base_report["accuracy"] = float(base_acc)
base_report["Pointwise MDE"] = base_pointwise_mde
with open(f'basemodel_{TRAIN_DATE}_base_test_{N_TRAIN}_each.json', "w") as f:
    json.dump(base_report, f, indent=4, ensure_ascii=False)

# --- Loop cross-week test ---
for test_date in TEST_DATES:
    cross_df = pd.read_csv(f'timestamp_allignment_Balanced_{test_date}_rtt_logs.csv', usecols=SELECTED_COLUMNS)
    cross_df = preprocess_data(cross_df, LABEL_COL, n_remove=N_REMOVE)
    X_cross_test = cross_df[FEATURE_COLS].copy()
    y_cross_test = cross_df[LABEL_COL].copy()
    X_cross_test_scaled = scaler.transform(X_cross_test)
    y_cross_pred = knn.predict(X_cross_test_scaled)

    cross_acc = accuracy_score(y_cross_test, y_cross_pred)
    cross_y_true_coords = get_coords(y_cross_test)
    cross_y_pred_coords = get_coords(y_cross_pred)
    cross_dists = np.linalg.norm(cross_y_true_coords - cross_y_pred_coords, axis=1)
    cross_mde = np.mean(cross_dists)
    cross_pointwise_mde = pointwise_mde_func(y_cross_test, cross_dists)

    print(f"\n=== Cross-Week Test Result ({test_date}) ===")
    print(f"Cross Test Accuracy: {cross_acc:.4f}")
    print(f"Cross Test MDE: {cross_mde:.4f}")

    cross_report = classification_report(y_cross_test, y_cross_pred, output_dict=True)
    cross_report["Mean Distance Error (MDE)"] = float(cross_mde)
    cross_report["accuracy"] = float(cross_acc)
    cross_report["Pointwise MDE"] = cross_pointwise_mde
    with open(f'transfer_report_{TRAIN_DATE}_train_{test_date}_test_{N_TRAIN}_each.json', "w") as f:
        json.dump(cross_report, f, indent=4, ensure_ascii=False)


4851

=== Base Model (Same week held-out test) ===
Base Test Accuracy: 0.9161
Base Test MDE: 0.2258

=== Cross-Week Test Result (2024_12_21) ===
Cross Test Accuracy: 0.2117
Cross Test MDE: 1.9980

=== Cross-Week Test Result (2024_12_27) ===
Cross Test Accuracy: 0.1512
Cross Test MDE: 2.2101

=== Cross-Week Test Result (2025_01_03) ===
Cross Test Accuracy: 0.1135
Cross Test MDE: 2.0307

=== Cross-Week Test Result (2025_01_10) ===
Cross Test Accuracy: 0.1142
Cross Test MDE: 2.1273

=== Cross-Week Test Result (2025_02_28) ===
Cross Test Accuracy: 0.0939
Cross Test MDE: 2.1813


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
