# Flexible AP Selection with Optimized Parameters

This notebook allows you to run the AP selection system with the best parameters found from benchmarking.

**Features:**
- Uses best QUBO parameters (k, alpha, penalty) from Phase 1
- Uses best OpenJij parameters (num_sweeps, num_reads, beta, gamma) from Phase 2
- **Flexible k**: Choose any number of APs you want to select
- Complete evaluation with 3D error metrics and floor accuracy
- Saves results to Excel for easy analysis

## Setup: Imports and Configuration

In [1]:
# Add project root to Python path
import sys
from pathlib import Path

project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

print(f"✓ Project root: {project_root}")

✓ Project root: C:\Users\Mohamed Khalil\Desktop\Quantum-Optimization-In-AP-Selection


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import openjij as oj
import time
from datetime import datetime

# Import custom modules
from scripts.data.data_loaders import (
    load_preprocessed_data,
    load_all_precomputed_data
)

from scripts.optimization.QUBO import formulate_qubo
from scripts.ml.ML_post_processing import train_regressor
from scripts.evaluation.Analysis import calculate_comprehensive_metrics

sns.set_style('whitegrid')

print("✓ All libraries imported successfully")

✓ All libraries imported successfully


## Load Data and System Parameters

In [3]:
# Load preprocessed data for Building 1
building_id = 1

rssi_train, coords_train, rssi_val, coords_val, ap_columns = load_preprocessed_data(
    building_id=building_id,
    use_pickle=True
)

scaler_coords = MinMaxScaler()
scaler_coords.fit(coords_train)

print(f"✓ Loaded Building {building_id} data:")
print(f"  Training samples: {rssi_train.shape[0]}")
print(f"  Validation samples: {rssi_val.shape[0]}")
print(f"  Number of APs: {len(ap_columns)}")

[OK] Loaded preprocessed data from pickle: C:\Users\Mohamed Khalil\Desktop\Quantum-Optimization-In-AP-Selection\data\output_data\preprocessed_data\preprocessed_building_1.pkl
  Training samples: 5196
  Validation samples: 307
  Number of APs: 520
✓ Loaded Building 1 data:
  Training samples: 5196
  Validation samples: 307
  Number of APs: 520


In [4]:
# Load importance scores and redundancy matrix
importance_dicts, redundancy_matrix = load_all_precomputed_data()

# Use entropy importance (best performing from Phase 1)
importance_entropy = importance_dicts['entropy']

print(f"✓ Loaded entropy importance scores: {len(importance_entropy)} APs")
print(f"✓ Loaded redundancy matrix: {redundancy_matrix.shape}")

Loading pre-computed importance scores and redundancy matrix

Loading importance scores...
[OK] Loaded 520 APs for entropy importance
[OK] Loaded 520 APs for average importance
[OK] Loaded 520 APs for median importance
[OK] Loaded 520 APs for max importance
[OK] Loaded 520 APs for variance importance
[OK] Loaded 520 APs for mutual_info importance

Loading redundancy matrix...
[OK] Loaded redundancy matrix with shape: (520, 520)

[OK] All data loaded successfully!
✓ Loaded entropy importance scores: 520 APs
✓ Loaded redundancy matrix: (520, 520)


In [5]:
# Load system parameters
system_params_path = Path('../../data') / 'system_input' / 'system_parameters.csv'
system_params_df = pd.read_csv(system_params_path)
system_params_dict = dict(zip(system_params_df['Parameter'], system_params_df['Value']))

LON_MIN = system_params_dict['LON_MIN']
LON_MAX = system_params_dict['LON_MAX']
LAT_MIN = system_params_dict['LAT_MIN']
LAT_MAX = system_params_dict['LAT_MAX']
FLOOR_HEIGHT = system_params_dict['FLOOR_HEIGHT']

print("✓ System parameters loaded")

✓ System parameters loaded


## Load Best Parameters from Benchmark Results

In [6]:
# Load Phase 1 results (best QUBO parameters)
phase1_file = Path('../../data/results/phase1_openjij_parameters.xlsx')

if phase1_file.exists():
    phase1_df = pd.read_excel(phase1_file)
    phase1_df_sorted = phase1_df.sort_values('mean_3d_error_m')
    best_phase1 = phase1_df_sorted.iloc[0]
    
    # Extract best QUBO parameters (we'll override k)
    best_alpha = best_phase1['alpha']
    best_penalty = best_phase1['penalty']
    
    print("="*80)
    print("BEST QUBO PARAMETERS (from Phase 1)")
    print("="*80)
    print(f"alpha = {best_alpha}")
    print(f"penalty = {best_penalty}")
    print(f"(k will be set by user below)")
    print("="*80)
else:
    print("fuck")
    print("⚠ Phase 1 results not found. Using default parameters.")
    best_alpha = 0.9
    best_penalty = 1.5

BEST QUBO PARAMETERS (from Phase 1)
alpha = 0.9
penalty = 1.5
(k will be set by user below)


In [7]:
# Load Phase 2 results (best OpenJij parameters)
phase2_file = Path('../../data/results/phase2_openjij_parameters.xlsx')

if phase2_file.exists():
    phase2_df = pd.read_excel(phase2_file)
    # Filter out infinite TTS and sort by TTS
    phase2_finite = phase2_df[phase2_df['tts_s'] != float('inf')]
    phase2_sorted = phase2_finite.sort_values('tts_s')
    
    if len(phase2_sorted) > 0:
        best_phase2 = phase2_sorted.iloc[0]
        
        best_num_sweeps = int(best_phase2['num_sweeps'])
        best_num_reads = int(best_phase2['num_reads'])
        best_beta = best_phase2['beta']
        best_gamma = best_phase2['gamma']
        
        print("\n" + "="*80)
        print("BEST OPENJIJ PARAMETERS (from Phase 2)")
        print("="*80)
        print(f"num_sweeps = {best_num_sweeps}")
        print(f"num_reads = {best_num_reads}")
        print(f"beta = {best_beta}")
        print(f"gamma = {best_gamma}")
        print(f"TTS = {best_phase2['tts_s']:.2f}s")
        print("="*80)
    else:
        print("⚠ No valid Phase 2 results. Using default OpenJij parameters.")
        best_num_sweeps = 1000
        best_num_reads = 100
        best_beta = 5.0
        best_gamma = 1.0
else:
    print("⚠ Phase 2 results not found. Using default OpenJij parameters.")
    best_num_sweeps = 1000
    best_num_reads = 100
    best_beta = 5.0
    best_gamma = 1.0


BEST OPENJIJ PARAMETERS (from Phase 2)
num_sweeps = 100
num_reads = 10
beta = 10.0
gamma = 1.0
TTS = 0.15s


## User Configuration: Select Number of APs

**Modify the `k_values` list below to test different AP budgets:**

In [16]:
# ============================================================================
# USER CONFIGURATION: Set the number of APs you want to select
# ============================================================================

# Option 1: Test a single value
# k_values = [20]

# Option 2: Test multiple values
k_values = [100]

# Option 3: Test a range
# k_values = list(range(10, 51, 5))  # 10, 15, 20, ..., 50

print("="*80)
print("AP SELECTION CONFIGURATION")
print("="*80)
print(f"Number of AP budgets to test: {len(k_values)}")
print(f"k values: {k_values}")
print("="*80)

AP SELECTION CONFIGURATION
Number of AP budgets to test: 1
k values: [100]


## Define Custom OpenJij Solver

In [17]:
def solve_qubo_with_best_params(Q, num_sweeps, num_reads, beta, gamma):
    """
    Solve QUBO with best OpenJij parameters
    
    Returns:
        selected_indices: List of selected AP indices
        duration: Time taken in seconds
    """
    start_time = time.time()
    
    # Create SQA sampler
    sampler = oj.SQASampler()
    
    # Sample with best parameters
    response = sampler.sample_qubo(
        Q, 
        num_sweeps=num_sweeps,
        num_reads=num_reads,
        beta=beta,
        gamma=gamma
    )
    
    duration = time.time() - start_time
    
    # Get best solution
    best_sample = response.first.sample
    selected_indices = [i for i, val in best_sample.items() if val == 1]
    
    return selected_indices, duration

print("✓ Custom solver function defined")

✓ Custom solver function defined


## Run AP Selection for All k Values

In [18]:
results = []

print("\n" + "="*80)
print("RUNNING AP SELECTION WITH BEST PARAMETERS")
print("="*80)

for idx, k in enumerate(k_values, 1):
    print(f"\n[{idx}/{len(k_values)}] Testing k={k} APs")
    print("-" * 60)
    
    try:
        # 1. Formulate QUBO
        print(f"  Formulating QUBO with k={k}, alpha={best_alpha}, penalty={best_penalty}...")
        Q, relevant_aps, offset = formulate_qubo(
            importance_entropy, 
            redundancy_matrix, 
            k, 
            best_alpha, 
            best_penalty
        )
        print(f"  ✓ QUBO formulated with {len(relevant_aps)} relevant APs")
        
        # 2. Solve with best OpenJij parameters
        print(f"  Solving with OpenJij (sweeps={best_num_sweeps}, reads={best_num_reads}, beta={best_beta}, gamma={best_gamma})...")
        selected_indices, qubo_time = solve_qubo_with_best_params(
            Q, best_num_sweeps, best_num_reads, best_beta, best_gamma
        )
        print(f"  ✓ QUBO solved in {qubo_time:.3f}s, selected {len(selected_indices)} APs")
        
        if len(selected_indices) == 0:
            print("  ✗ No APs selected, skipping...")
            continue
        
        selected_aps = [relevant_aps[i] for i in selected_indices]
        
        # 3. Train ML model
        print(f"  Training Random Forest with {len(selected_aps)} APs...")
        models, predictions = train_regressor(
            rssi_train, coords_train, rssi_val, coords_val, selected_aps
        )
        preds = predictions['rf_val']
        print(f"  ✓ Model trained")
        
        # 4. Evaluate
        print(f"  Evaluating positioning accuracy...")
        _, _, metrics = calculate_comprehensive_metrics(
            coords_val, preds, LON_MIN, LON_MAX, LAT_MIN, LAT_MAX, FLOOR_HEIGHT
        )
        
        # 5. Store results
        result = {
            'k': k,
            'num_aps_selected': len(selected_aps),
            'selected_aps': ', '.join(selected_aps),
            'qubo_time_s': qubo_time,
            'mean_3d_error_m': metrics['real_mean_m'],
            'median_3d_error_m': metrics['real_median_m'],
            'min_3d_error_m': metrics['real_min_m'],
            'max_3d_error_m': metrics['real_max_m'],
            'floor_accuracy_0': metrics['floor_accuracy_0'],
            'floor_accuracy_1': metrics['floor_accuracy_1'],
            'floor_accuracy_2': metrics['floor_accuracy_2'],
            'alpha': best_alpha,
            'penalty': best_penalty,
            'num_sweeps': best_num_sweeps,
            'num_reads': best_num_reads,
            'beta': best_beta,
            'gamma': best_gamma
        }
        results.append(result)
        
        print(f"  ✓ Results:")
        print(f"    Mean 3D Error: {metrics['real_mean_m']:.2f}m")
        print(f"    Median 3D Error: {metrics['real_median_m']:.2f}m")
        print(f"    Floor Accuracy (exact): {metrics['floor_accuracy_0']:.2%}")
        print(f"    Floor Accuracy (±1): {metrics['floor_accuracy_1']:.2%}")
        
    except Exception as e:
        print(f"  ✗ Error: {e}")
        continue

print("\n" + "="*80)
print(f"✓ Completed: {len(results)}/{len(k_values)} successful runs")
print("="*80)


RUNNING AP SELECTION WITH BEST PARAMETERS

[1/1] Testing k=100 APs
------------------------------------------------------------
  Formulating QUBO with k=100, alpha=0.9, penalty=1.5...
Formulating enhanced QUBO for k=100 APs selection...
Done
  ✓ QUBO formulated with 207 relevant APs
  Solving with OpenJij (sweeps=100, reads=10, beta=10.0, gamma=1.0)...
  ✓ QUBO solved in 0.152s, selected 100 APs
  Training Random Forest with 100 APs...
Training random forest regressor...
✓ Enhanced Random Forest trained
   Average OOB Score: 0.9644
  ✓ Model trained
  Evaluating positioning accuracy...
  ✓ Results:
    Mean 3D Error: 13.37m
    Median 3D Error: 10.25m
    Floor Accuracy (exact): 71.99%
    Floor Accuracy (±1): 98.05%

✓ Completed: 1/1 successful runs


## Results Summary

In [13]:
# Convert to DataFrame
results_df = pd.DataFrame(results)

# Display summary
print("\n" + "="*80)
print("RESULTS SUMMARY")
print("="*80)
print()
print(results_df[['k', 'num_aps_selected', 'mean_3d_error_m', 'median_3d_error_m', 
                   'floor_accuracy_0', 'floor_accuracy_1', 'qubo_time_s']].to_string(index=False))
print()


RESULTS SUMMARY

 k  num_aps_selected  mean_3d_error_m  median_3d_error_m  floor_accuracy_0  floor_accuracy_1  qubo_time_s
50                50        16.019402          11.264689          0.618893          0.980456     0.179379



## Visualize Results

In [14]:
if len(results_df) > 1:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Plot 1: Mean 3D Error vs k
    axes[0, 0].plot(results_df['k'], results_df['mean_3d_error_m'], marker='o', linewidth=2, markersize=8)
    axes[0, 0].set_xlabel('Number of Selected APs (k)', fontsize=12)
    axes[0, 0].set_ylabel('Mean 3D Error (m)', fontsize=12)
    axes[0, 0].set_title('Mean 3D Positioning Error vs AP Budget', fontsize=13, fontweight='bold')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Floor Accuracy vs k
    axes[0, 1].plot(results_df['k'], results_df['floor_accuracy_0']*100, marker='s', linewidth=2, markersize=8, label='Exact Floor')
    axes[0, 1].plot(results_df['k'], results_df['floor_accuracy_1']*100, marker='^', linewidth=2, markersize=8, label='±1 Floor')
    axes[0, 1].set_xlabel('Number of Selected APs (k)', fontsize=12)
    axes[0, 1].set_ylabel('Floor Accuracy (%)', fontsize=12)
    axes[0, 1].set_title('Floor Classification Accuracy vs AP Budget', fontsize=13, fontweight='bold')
    axes[0, 1].legend(fontsize=10)
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: QUBO Time vs k
    axes[1, 0].plot(results_df['k'], results_df['qubo_time_s'], marker='d', linewidth=2, markersize=8, color='green')
    axes[1, 0].set_xlabel('Number of Selected APs (k)', fontsize=12)
    axes[1, 0].set_ylabel('QUBO Solution Time (s)', fontsize=12)
    axes[1, 0].set_title('Computational Time vs AP Budget', fontsize=13, fontweight='bold')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Error distribution (box plot)
    error_data = []
    labels = []
    for _, row in results_df.iterrows():
        error_data.append([row['min_3d_error_m'], row['median_3d_error_m'], row['max_3d_error_m']])
        labels.append(f"k={int(row['k'])}")
    
    axes[1, 1].bar(range(len(results_df)), results_df['median_3d_error_m'], alpha=0.7, color='skyblue', label='Median')
    axes[1, 1].plot(range(len(results_df)), results_df['mean_3d_error_m'], marker='o', color='red', linewidth=2, label='Mean')
    axes[1, 1].set_xticks(range(len(results_df)))
    axes[1, 1].set_xticklabels(labels, rotation=45)
    axes[1, 1].set_ylabel('3D Error (m)', fontsize=12)
    axes[1, 1].set_title('Mean vs Median Error Comparison', fontsize=13, fontweight='bold')
    axes[1, 1].legend(fontsize=10)
    axes[1, 1].grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('../../data/results/flexible_ap_selection_results.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("✓ Visualizations saved to: data/results/flexible_ap_selection_results.png")
else:
    print("⚠ Not enough data points to create plots (need at least 2 k values)")

⚠ Not enough data points to create plots (need at least 2 k values)


## Save Results to Excel

In [15]:
# Save to Excel
output_dir = Path('../../data/results')
output_dir.mkdir(parents=True, exist_ok=True)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'flexible_ap_selection_{timestamp}.xlsx'

# Create Excel writer with multiple sheets
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    # Main results
    results_df.to_excel(writer, sheet_name='Results', index=False)
    
    # Parameters used
    params_df = pd.DataFrame([
        {'Parameter': 'alpha', 'Value': best_alpha},
        {'Parameter': 'penalty', 'Value': best_penalty},
        {'Parameter': 'num_sweeps', 'Value': best_num_sweeps},
        {'Parameter': 'num_reads', 'Value': best_num_reads},
        {'Parameter': 'beta', 'Value': best_beta},
        {'Parameter': 'gamma', 'Value': best_gamma},
        {'Parameter': 'importance_metric', 'Value': 'entropy'},
        {'Parameter': 'building_id', 'Value': building_id}
    ])
    params_df.to_excel(writer, sheet_name='Parameters', index=False)

print(f"\n✓ Results saved to: {output_file}")
print("\n" + "="*80)
print("EXPERIMENT COMPLETE!")
print("="*80)


✓ Results saved to: ..\..\data\results\flexible_ap_selection_20251117_234514.xlsx

EXPERIMENT COMPLETE!


## Find Best k Value

In [None]:
if len(results_df) > 0:
    # Find best by different criteria
    best_mean_error = results_df.loc[results_df['mean_3d_error_m'].idxmin()]
    best_floor_acc = results_df.loc[results_df['floor_accuracy_0'].idxmax()]
    best_time = results_df.loc[results_df['qubo_time_s'].idxmin()]
    
    print("\n" + "="*80)
    print("BEST CONFIGURATIONS BY DIFFERENT CRITERIA")
    print("="*80)
    
    print("\n1. Best Mean 3D Error:")
    print(f"   k = {int(best_mean_error['k'])} APs")
    print(f"   Mean 3D Error: {best_mean_error['mean_3d_error_m']:.2f}m")
    print(f"   Floor Accuracy: {best_mean_error['floor_accuracy_0']:.2%}")
    
    print("\n2. Best Floor Accuracy:")
    print(f"   k = {int(best_floor_acc['k'])} APs")
    print(f"   Floor Accuracy: {best_floor_acc['floor_accuracy_0']:.2%}")
    print(f"   Mean 3D Error: {best_floor_acc['mean_3d_error_m']:.2f}m")
    
    print("\n3. Fastest QUBO Solution:")
    print(f"   k = {int(best_time['k'])} APs")
    print(f"   QUBO Time: {best_time['qubo_time_s']:.3f}s")
    print(f"   Mean 3D Error: {best_time['mean_3d_error_m']:.2f}m")
    
    print("\n" + "="*80)