In [1]:
import time 
import pickle

import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from collections import defaultdict

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler

from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputRegressor

import optuna
from xgboost import XGBClassifier, XGBRegressor
from optuna.integration import XGBoostPruningCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error


from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import Normalize


import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from tqdm import tqdm

from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split

np.random.seed(0)
torch.manual_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x204a342e690>

In [2]:
path = 'data_preprocessed.csv'  
df = pd.read_csv(path)
print(df.shape)

(1984, 12)


In [3]:
df.sample(5)

Unnamed: 0,time,reference_point,iteration,ssid,bssid,channel,xr,yr,rssi_ap_digilab,rssi_ap_dosen,relative_position,time_numeric
1321,2024-10-23 07:24:12.080672200+00:00,41,3,DTE Staff,2,1,2454,1037,-200.0,-64.0,1,07:24:12.080672
76,2024-10-17 09:00:17.716274900+00:00,2,18,DTE Student,4,11,300,1567,-63.0,-200.0,0,09:00:17.716274
530,2024-10-17 10:31:15.907614500+00:00,30,6,DTE Staff,3,11,989,1202,-42.0,-200.0,1,10:31:15.907614
629,2024-10-17 10:43:18.522535900+00:00,37,11,DTE Student,4,11,1908,1037,-53.0,-200.0,1,10:43:18.522535
1503,2024-10-23 07:47:25.337724200+00:00,39,10,DTE Staff,3,11,2224,1037,-61.0,-200.0,1,07:47:25.337724


In [4]:
# Define the mapping dictionary for converting numeric BSSID back to normal
numeric_to_bssid = {
    1: '70:a7:41:dc:8e:55',  # AP Dosen (DTE Staff)
    2: '72:a7:41:9c:8e:55',  # AP Dosen (DTE Student)
    3: '70:a7:41:dc:77:cd',  # AP Digilab (DTE Staff)
    4: '72:a7:41:9c:77:cd'   # AP Digilab (DTE Student)
}

# Replace numeric BSSID identifiers in the 'bssid' column with their original format
df['bssid'] = df['bssid'].map(numeric_to_bssid)

# Print the updated DataFrame to check the conversion
df.sample(5)


Unnamed: 0,time,reference_point,iteration,ssid,bssid,channel,xr,yr,rssi_ap_digilab,rssi_ap_dosen,relative_position,time_numeric
1535,2024-10-23 07:51:33.349010300+00:00,39,18,DTE Staff,70:a7:41:dc:8e:55,1,2224,1037,-200.0,-70.0,1,07:51:33.349010
1118,2024-10-23 06:49:29.038629800+00:00,38,11,DTE Staff,72:a7:41:9c:8e:55,1,1908,1202,-200.0,-81.0,1,06:49:29.038629
727,2024-10-23 02:54:11.383374+00:00,11,19,DTE Student,72:a7:41:9c:77:cd,11,759,1368,-62.0,-200.0,0,02:54:11.383374
1969,2024-10-23 09:32:11.607256700+00:00,31,17,DTE Staff,70:a7:41:dc:77:cd,11,1219,1037,-43.0,-200.0,1,09:32:11.607256
1801,2024-10-23 09:11:44.641679+00:00,34,15,DTE Staff,70:a7:41:dc:77:cd,11,1449,1202,-48.0,-200.0,1,09:11:44.641679


In [5]:
ap_coordinates = {
    '70:a7:41:dc:77:cd': (870, 1122),  # AP Digilab (DTE Staff)
    '72:a7:41:9c:77:cd': (870, 1122),  # AP Digilab (DTE Student)
    '70:a7:41:dc:8e:55': (2314, 468),  # AP Dosen (DTE Staff)
    '72:a7:41:9c:8e:55': (2314, 468),  # AP Dosen (DTE Student)
}

# Map each row to the corresponding AP coordinates
def get_ap_coordinates(bssid):
    return ap_coordinates.get(bssid, (np.nan, np.nan))  # Default to NaN if BSSID not found

df['ap_x'], df['ap_y'] = zip(*df['bssid'].map(get_ap_coordinates))

# Step 2: Calculate the distance to the corresponding AP
df['distance'] = np.sqrt((df['xr'] - df['ap_x'])**2 + (df['yr'] - df['ap_y'])**2)

# Display the updated DataFrame
df.sample(5)

Unnamed: 0,time,reference_point,iteration,ssid,bssid,channel,xr,yr,rssi_ap_digilab,rssi_ap_dosen,relative_position,time_numeric,ap_x,ap_y,distance
1666,2024-10-23 08:55:36.647695400+00:00,33,1,DTE Staff,70:a7:41:dc:77:cd,11,1449,1037,-48.0,-200.0,1,08:55:36.647695,870,1122,585.205947
1827,2024-10-23 09:15:36.985659900+00:00,32,1,DTE Staff,70:a7:41:dc:8e:55,1,1219,1202,-200.0,-94.0,1,09:15:36.985659,2314,468,1318.249218
1212,2024-10-23 07:02:28.818008600+00:00,36,15,DTE Staff,72:a7:41:9c:8e:55,1,1678,1202,-200.0,-86.0,1,07:02:28.818008,2314,468,971.211614
1235,2024-10-23 07:09:51.262252+00:00,42,1,DTE Staff,70:a7:41:dc:8e:55,1,2454,1202,-200.0,-70.0,1,07:09:51.262252,2314,468,747.232226
1377,2024-10-23 07:30:17.199164300+00:00,41,18,DTE Staff,70:a7:41:dc:77:cd,11,2454,1037,-62.0,-200.0,1,07:30:17.199164,870,1122,1586.278979


In [6]:
# Function to compute d0 and RSSI_d0 for a given AP
def compute_reference_values(data, rssi_column):
    # Find the row with the minimum distance, but only for the data specific to the AP
    reference_values = data.loc[data['distance'].idxmin()]
    return {
        'ap': reference_values['ap'],  # Add AP information for clarity
        'bssid': reference_values['bssid'],
        'd0': reference_values['distance'],
        'RSSI_d0': reference_values[rssi_column]
    }

# Ensure the DataFrame has a column for 'ap' indicating the AP it belongs to
df['ap'] = np.where(df['rssi_ap_digilab'].notnull(), 'digilab', 
                    np.where(df['rssi_ap_dosen'].notnull(), 'dosen', np.nan))

# Separate DataFrames for each AP
df_digilab = df[df['rssi_ap_digilab'].notnull()].copy()
df_dosen = df[df['rssi_ap_dosen'].notnull()].copy()

# Compute for each AP and specify the correct column for RSSI
digilab_values = compute_reference_values(df_digilab, 'rssi_ap_digilab')
dosen_values = compute_reference_values(df_dosen, 'rssi_ap_dosen')

# Combine results into a DataFrame
reference_values = pd.DataFrame([digilab_values, dosen_values])

# Display the reference values
print(reference_values)


        ap              bssid          d0  RSSI_d0
0  digilab  72:a7:41:9c:77:cd  143.391074    -46.0
1  digilab  72:a7:41:9c:77:cd  143.391074   -200.0


In [7]:
# Filter the DataFrame to only include rows for AP Dosen
# Replace 'bssid1' and 'bssid2' with the actual BSSIDs for AP Dosen
df_dosen = df[df['bssid'].isin(['70:a7:41:dc:8e:55', '72:a7:41:9c:8e:55'])].copy()

# Function to compute d0 and RSSI_d0 for a given AP
def compute_reference_values(data, rssi_column):
    # Find the row with the minimum distance
    reference_values = data.loc[data['distance'].idxmin()]
    return {
        'bssid': reference_values['bssid'],
        'd0': reference_values['distance'],
        'RSSI_d0': reference_values[rssi_column]
    }

# Compute the reference values for AP Dosen
dosen_values = compute_reference_values(df_dosen, 'rssi_ap_dosen')

# Display the reference values for AP Dosen
print("Reference values for AP Dosen:")
print(dosen_values)


Reference values for AP Dosen:
{'bssid': '72:a7:41:9c:8e:55', 'd0': 576.0737799969722, 'RSSI_d0': -72.0}


In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

# Function to calculate path loss given a distance and parameters for log-distance path loss model
def path_loss_model_log_distance(distance, d0, RSSI_d0, n=2.7):
    path_loss = RSSI_d0 - 10 * n * np.log10(distance / d0)
    return path_loss

# Function to calculate path loss given a distance and parameters for the Hata path loss model
def path_loss_model_hata(distance, d0, RSSI_d0, n=3.5):
    path_loss = RSSI_d0 - 10 * n * np.log10(distance / d0)
    return path_loss

# Function to calculate error metrics (MSE, RMSE, MAE, and R²)
def calculate_error_metrics(actual, predicted):
    mse = np.mean((actual - predicted)**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(actual - predicted))
    r2 = r2_score(actual, predicted)
    return mse, rmse, mae, r2

# Generate a grid for visualization (e.g., 100x100 points)
x_vals = np.linspace(0, 3000, 100)
y_vals = np.linspace(0, 1500, 100)
X, Y = np.meshgrid(x_vals, y_vals)

# Define the AP coordinates and d0, RSSI_d0 for each AP
ap_coordinates = {
    'AP Digilab': (870, 1122, 1586.278979, -62.0),  # (x, y, d0, RSSI_d0)
    'AP Dosen': (2314, 468, 971.211614, -86.0)      # (x, y, d0, RSSI_d0)
}

# Generate synthetic actual RSSI data for error calculation (e.g., random values for demonstration)
# In practice, these should come from actual measurements or simulations

# actual_rssi_digilab = -50 + np.random.normal(0, 5, X.shape)  # Synthetic actual data for Digilab
# actual_rssi_dosen = -75 + np.random.normal(0, 5, X.shape)   # Synthetic actual data for Dosen

# Ensure actual RSSI data is a NumPy array for compatibility
actual_rssi_digilab = np.array(df_digilab['rssi_ap_digilab'])
actual_rssi_dosen = np.array(df_dosen['rssi_ap_dosen'])

# Flatten the 2D path loss grid to compare with actual data (assuming both are aligned)
path_loss_flat_digilab_log = path_loss_model_log_distance(distance.ravel(), d0, RSSI_d0).flatten()
path_loss_flat_digilab_hata = path_loss_model_hata(distance.ravel(), d0, RSSI_d0).flatten()

# Ensure data lengths match (you may need to adjust this part based on alignment between RSSI points)
n_samples = min(len(actual_rssi_digilab), len(path_loss_flat_digilab_log))

actual_rssi_digilab = actual_rssi_digilab[:n_samples]
path_loss_flat_digilab_log = path_loss_flat_digilab_log[:n_samples]
path_loss_flat_digilab_hata = path_loss_flat_digilab_hata[:n_samples]

# Compute error metrics for Digilab
mse_digilab_log, rmse_digilab_log, mae_digilab_log, r2_digilab_log = calculate_error_metrics(
    actual_rssi_digilab, path_loss_flat_digilab_log
)
mse_digilab_hata, rmse_digilab_hata, mae_digilab_hata, r2_digilab_hata = calculate_error_metrics(
    actual_rssi_digilab, path_loss_flat_digilab_hata
)

# Flatten the 2D path loss grid to compare with actual data (assuming both are aligned)
path_loss_flat_digilab_log = path_loss_model_log_distance(distance.ravel(), d0, RSSI_d0).flatten()
path_loss_flat_digilab_hata = path_loss_model_hata(distance.ravel(), d0, RSSI_d0).flatten()

# Ensure data lengths match (you may need to adjust this part based on alignment between RSSI points)
n_samples = min(len(actual_rssi_digilab), len(path_loss_flat_digilab_log))

actual_rssi_digilab = actual_rssi_digilab[:n_samples]
path_loss_flat_digilab_log = path_loss_flat_digilab_log[:n_samples]
path_loss_flat_digilab_hata = path_loss_flat_digilab_hata[:n_samples]

# Compute error metrics for Digilab
mse_digilab_log, rmse_digilab_log, mae_digilab_log, r2_digilab_log = calculate_error_metrics(
    actual_rssi_digilab, path_loss_flat_digilab_log
)
mse_digilab_hata, rmse_digilab_hata, mae_digilab_hata, r2_digilab_hata = calculate_error_metrics(
    actual_rssi_digilab, path_loss_flat_digilab_hata

# Initialize empty grids for path loss
path_loss_grid_digilab_log = np.zeros(X.shape)
path_loss_grid_digilab_hata = np.zeros(X.shape)
path_loss_grid_dosen_log = np.zeros(X.shape)
path_loss_grid_dosen_hata = np.zeros(X.shape)

# Compute path loss for each AP and model
for ap, (ap_x, ap_y, d0, RSSI_d0) in ap_coordinates.items():
    # Calculate distance from each grid point to the AP
    distance = np.sqrt((X - ap_x)**2 + (Y - ap_y)**2)
    # Avoid division by zero for distance (set to 1e-6)
    distance[distance == 0] = 1e-6

    # Log-Distance Model for Digilab
    if ap == 'AP Digilab':
        path_loss_grid_digilab_log = path_loss_model_log_distance(distance, d0, RSSI_d0)
        mse_digilab_log, rmse_digilab_log, mae_digilab_log, r2_digilab_log = calculate_error_metrics(actual_rssi_digilab, path_loss_grid_digilab_log)

    # Hata Model for Digilab
    if ap == 'AP Digilab':
        path_loss_grid_digilab_hata = path_loss_model_hata(distance, d0, RSSI_d0)
        mse_digilab_hata, rmse_digilab_hata, mae_digilab_hata, r2_digilab_hata = calculate_error_metrics(actual_rssi_digilab, path_loss_grid_digilab_hata)

    # Log-Distance Model for Dosen
    if ap == 'AP Dosen':
        path_loss_grid_dosen_log = path_loss_model_log_distance(distance, d0, RSSI_d0)
        mse_dosen_log, rmse_dosen_log, mae_dosen_log, r2_dosen_log = calculate_error_metrics(actual_rssi_dosen, path_loss_grid_dosen_log)

    # Hata Model for Dosen
    if ap == 'AP Dosen':
        path_loss_grid_dosen_hata = path_loss_model_hata(distance, d0, RSSI_d0)
        mse_dosen_hata, rmse_dosen_hata, mae_dosen_hata, r2_dosen_hata = calculate_error_metrics(actual_rssi_dosen, path_loss_grid_dosen_hata)

# Print error metrics for each model
print("Error Metrics for AP Digilab (Log-Distance Model):")
print(f"Mean Squared Error (MSE): {mse_digilab_log:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_digilab_log:.2f}")
print(f"Mean Absolute Error (MAE): {mae_digilab_log:.2f}")
print(f"R-squared (R²): {r2_digilab_log:.2f}")

print("\nError Metrics for AP Digilab (Hata Model):")
print(f"Mean Squared Error (MSE): {mse_digilab_hata:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_digilab_hata:.2f}")
print(f"Mean Absolute Error (MAE): {mae_digilab_hata:.2f}")
print(f"R-squared (R²): {r2_digilab_hata:.2f}")

print("\nError Metrics for AP Dosen (Log-Distance Model):")
print(f"Mean Squared Error (MSE): {mse_dosen_log:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_dosen_log:.2f}")
print(f"Mean Absolute Error (MAE): {mae_dosen_log:.2f}")
print(f"R-squared (R²): {r2_dosen_log:.2f}")

print("\nError Metrics for AP Dosen (Hata Model):")
print(f"Mean Squared Error (MSE): {mse_dosen_hata:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_dosen_hata:.2f}")
print(f"Mean Absolute Error (MAE): {mae_dosen_hata:.2f}")
print(f"R-squared (R²): {r2_dosen_hata:.2f}")

# Plot the path loss visualizations as heatmaps for all models
fig, ax = plt.subplots(2, 2, figsize=(18, 16))

# Plot Digilab Log-Distance Model
ax[0, 0].imshow(path_loss_grid_digilab_log, extent=(0, 3000, 0, 1500), origin='lower', cmap='viridis')
ax[0, 0].scatter(*ap_coordinates['AP Digilab'][:2], color='red', label='AP Digilab', marker='o')
ax[0, 0].set_title('Path Loss Visualization (Digilab - Log-Distance Model)')
ax[0, 0].set_xlabel('X Coordinate (m)')
ax[0, 0].set_ylabel('Y Coordinate (m)')
ax[0, 0].legend()
fig.colorbar(ax[0, 0].images[0], ax=ax[0, 0], label='Path Loss (dB)')

# Plot Digilab Hata Model
ax[0, 1].imshow(path_loss_grid_digilab_hata, extent=(0, 3000, 0, 1500), origin='lower', cmap='plasma')
ax[0, 1].scatter(*ap_coordinates['AP Digilab'][:2], color='red', label='AP Digilab', marker='o')
ax[0, 1].set_title('Path Loss Visualization (Digilab - Hata Model)')
ax[0, 1].set_xlabel('X Coordinate (m)')
ax[0, 1].set_ylabel('Y Coordinate (m)')
ax[0, 1].legend()
fig.colorbar(ax[0, 1].images[0], ax=ax[0, 1], label='Path Loss (dB)')

# Plot Dosen Log-Distance Model
ax[1, 0].imshow(path_loss_grid_dosen_log, extent=(0, 3000, 0, 1500), origin='lower', cmap='viridis')
ax[1, 0].scatter(*ap_coordinates['AP Dosen'][:2], color='blue', label='AP Dosen', marker='x')
ax[1, 0].set_title('Path Loss Visualization (Dosen - Log-Distance Model)')
ax[1, 0].set_xlabel('X Coordinate (m)')
ax[1, 0].set_ylabel('Y Coordinate (m)')
ax[1, 0].legend()
fig.colorbar(ax[1, 0].images[0], ax=ax[1, 0], label='Path Loss (dB)')

# Plot Dosen Hata Model
ax[1, 1].imshow(path_loss_grid_dosen_hata, extent=(0, 3000, 0, 1500), origin='lower', cmap='plasma')
ax[1, 1].scatter(*ap_coordinates['AP Dosen'][:2], color='blue', label='AP Dosen', marker='x')
ax[1, 1].set_title('Path Loss Visualization (Dosen - Hata Model)')
ax[1, 1].set_xlabel('X Coordinate (m)')
ax[1, 1].set_ylabel('Y Coordinate (m)')
ax[1, 1].legend()
fig.colorbar(ax[1, 1].images[0], ax=ax[1, 1], label='Path Loss (dB)')

plt.tight_layout()
plt.show()


ValueError: operands could not be broadcast together with shapes (1984,) (100,100) 