# 12. Hybrid Query Routing (Additive)

This notebook builds a workload router that selects query outputs from existing synthetic result sets using a fixed query-type rule.

It does not modify existing methods or baseline outputs.

In [1]:
from pathlib import Path
import sys
import pandas as pd
from IPython.display import display, Markdown

ROOT = Path.cwd().resolve().parent if Path.cwd().name == 'notebooks' else Path.cwd().resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.experiments.hybrid_router import DEFAULT_ROUTING_RULE, build_routed_results, summarize_routing
from src.eval.compare import evaluate_all, results_to_dataframe

REAL_RESULTS_DIR = ROOT / 'data' / 'results' / 'real'
SOURCE_RESULT_DIRS = {
    'pertable': ROOT / 'data' / 'results' / 'synth_pertable',
    'mst': ROOT / 'data' / 'results' / 'synth_mst',
}
OUT_DIR = ROOT / 'data' / 'experiments_additive' / 'hybrid_routing'
ROUTED_RESULTS_DIR = OUT_DIR / 'query_results'

display(Markdown(f"Project root: `{ROOT}`"))

Project root: `/Users/enscribe/Repositories/School/dsc180-q2`

In [2]:
routed_from = build_routed_results(
    source_result_dirs=SOURCE_RESULT_DIRS,
    output_dir=ROUTED_RESULTS_DIR,
    rule=DEFAULT_ROUTING_RULE,
)

routing_df = summarize_routing(routed_from)
display(Markdown('## Routing map'))
display(routing_df)

display(Markdown('## Query counts by source method'))
display(routing_df['method'].value_counts().rename_axis('method').reset_index(name='n_queries'))

## Routing map

Unnamed: 0,query,method,type
5,display_devices_vendors_percentage,mst,distribution
11,popular_browsers_by_count_usage_percentage,mst,distribution
14,top_10_applications_by_app_type_ranked_by_foca...,mst,ranking_numeric
15,top_10_applications_by_app_type_ranked_by_syst...,mst,ranking_numeric
16,top_10_applications_by_app_type_ranked_by_tota...,mst,ranking_numeric
18,userwait_top_10_wait_processes,mst,ranking_numeric
19,userwait_top_10_wait_processes_wait_type_ac_dc,mst,ranking_numeric
20,userwait_top_20_wait_processes_compare_ac_dc_u...,mst,ranking_numeric
13,server_exploration_1,mst,row_level
0,Xeon_network_consumption,pertable,aggregate


## Query counts by source method

Unnamed: 0,method,n_queries
0,pertable,12
1,mst,9


In [3]:
hybrid_results = evaluate_all(REAL_RESULTS_DIR, ROUTED_RESULTS_DIR)
hybrid_eval = results_to_dataframe(hybrid_results)
OUT_DIR.mkdir(parents=True, exist_ok=True)
hybrid_eval.to_csv(OUT_DIR / 'evaluation.csv', index=False)

ev = hybrid_eval[hybrid_eval['n_metrics'] > 0]
passed = int(ev['passed'].fillna(False).sum()) if len(ev) else 0
avg_score = float(ev['score'].mean()) if len(ev) else 0.0

display(Markdown('## Hybrid evaluation summary'))
display(pd.DataFrame([{
    'queries_evaluated': len(ev),
    'queries_passed': passed,
    'pass_rate': passed / len(ev) if len(ev) else 0.0,
    'avg_score': avg_score,
    'evaluation_csv': str(OUT_DIR / 'evaluation.csv'),
}]))

## Hybrid evaluation summary

Unnamed: 0,queries_evaluated,queries_passed,pass_rate,avg_score,evaluation_csv
0,21,8,0.380952,0.404014,/Users/enscribe/Repositories/School/dsc180-q2/...


In [4]:
baseline_pertable = pd.read_csv(ROOT / 'data' / 'results' / 'evaluation_pertable.csv')
baseline_mst = pd.read_csv(ROOT / 'data' / 'results' / 'evaluation_mst.csv')

def summarize_eval(df, name):
    ev = df[df['n_metrics'] > 0]
    return {
        'run': name,
        'queries_evaluated': len(ev),
        'queries_passed': int(ev['passed'].fillna(False).sum()) if len(ev) else 0,
        'pass_rate': (int(ev['passed'].fillna(False).sum()) / len(ev)) if len(ev) else 0.0,
        'avg_score': float(ev['score'].mean()) if len(ev) else 0.0,
    }

comparison = pd.DataFrame([
    summarize_eval(baseline_pertable, 'pertable_baseline'),
    summarize_eval(baseline_mst, 'mst_baseline'),
    summarize_eval(hybrid_eval, 'hybrid_routed'),
]).sort_values(['queries_passed', 'avg_score'], ascending=False)

display(Markdown('## Baseline vs hybrid'))
display(comparison)

## Baseline vs hybrid

Unnamed: 0,run,queries_evaluated,queries_passed,pass_rate,avg_score
2,hybrid_routed,21,8,0.380952,0.404014
1,mst_baseline,21,6,0.285714,0.328326
0,pertable_baseline,21,6,0.285714,0.303107
