# Confronto Cross-Mode (Normal vs Streaming Endpoint)

Confronta, per ogni algoritmo/configurazione, i risultati one-shot con quelli
streaming all'endpoint (`element_index = sample_size`).


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

PLOT_CONFIG = {'scrollZoom': True, 'displaylogo': False}

def resolve_results_root() -> Path:
    candidates = [Path('results'), Path('../results'), Path('../../results')]
    for c in candidates:
        if c.exists():
            return c.resolve()
    raise FileNotFoundError('Cartella results non trovata. Esegui dalla root progetto o da notebooks/.')

RESULTS_ROOT = resolve_results_root()
print('RESULTS_ROOT =', RESULTS_ROOT)


In [None]:
oneshot_files = sorted(RESULTS_ROOT.glob('*/*/results_oneshot.csv'))
stream_files = sorted(RESULTS_ROOT.glob('*/*/results_streaming.csv'))

on = pd.concat([pd.read_csv(f) for f in oneshot_files], ignore_index=True)
st = pd.concat([pd.read_csv(f) for f in stream_files], ignore_index=True)

num_cols = [
    'runs', 'sample_size', 'element_index', 'distinct_count', 'seed',
    'f0_mean', 'f0_hat_mean', 'mean', 'variance', 'stddev',
    'rse_theoretical', 'rse_observed', 'bias', 'difference',
    'bias_relative', 'mean_relative_error', 'rmse', 'mae'
]
for c in num_cols:
    on[c] = pd.to_numeric(on[c], errors='coerce')
    st[c] = pd.to_numeric(st[c], errors='coerce')

st_last = st[st['element_index'] == st['sample_size']].copy()

join_cols = ['algorithm', 'params', 'sample_size', 'distinct_count', 'seed']
metrics = ['f0_hat_mean', 'bias', 'rmse', 'mae', 'mean_relative_error', 'rse_observed']

m = on[join_cols + metrics].merge(
    st_last[join_cols + metrics],
    on=join_cols,
    suffixes=('_normal', '_stream')
)
m['d_over_n'] = m['distinct_count'] / m['sample_size']
m['d_over_n_label'] = (100.0 * m['d_over_n']).round(2).astype(str) + '%'
m['algorithm_label'] = m['algorithm'] + ' [' + m['params'] + ']'

print('merged rows:', len(m))
m.head(5)


In [None]:
for metric in metrics:
    fig = px.scatter(
        m,
        x=f'{metric}_normal',
        y=f'{metric}_stream',
        color='algorithm_label',
        symbol='d_over_n_label',
        hover_data=['sample_size', 'distinct_count', 'seed'],
        title=f'Normal vs Streaming endpoint: {metric}'
    )
    mn = float(min(m[f'{metric}_normal'].min(), m[f'{metric}_stream'].min()))
    mx = float(max(m[f'{metric}_normal'].max(), m[f'{metric}_stream'].max()))
    fig.add_trace(go.Scatter(x=[mn, mx], y=[mn, mx], mode='lines', name='y=x', line=dict(color='black', dash='dash')))
    fig.update_layout(template='plotly_white', dragmode='zoom', legend=dict(itemclick='toggle', itemdoubleclick='toggleothers'))
    fig.update_xaxes(fixedrange=False)
    fig.update_yaxes(fixedrange=False)
    fig.show(config=PLOT_CONFIG)


In [None]:
delta_rows = []
for metric in metrics:
    tmp = m[['algorithm_label', 'sample_size', 'd_over_n_label', metric + '_normal', metric + '_stream']].copy()
    tmp['metric'] = metric
    tmp['delta_abs'] = (tmp[f'{metric}_normal'] - tmp[f'{metric}_stream']).abs()
    denom = tmp[f'{metric}_normal'].abs().replace(0.0, np.nan)
    tmp['delta_rel'] = tmp['delta_abs'] / denom
    delta_rows.append(tmp[['algorithm_label', 'sample_size', 'd_over_n_label', 'metric', 'delta_abs', 'delta_rel']])

delta = pd.concat(delta_rows, ignore_index=True)
delta.head(10)


In [None]:
fig = px.box(
    delta,
    x='algorithm_label',
    y='delta_abs',
    color='algorithm_label',
    facet_col='metric',
    facet_col_wrap=3,
    points='all',
    title='Delta assoluto tra normal e streaming endpoint'
)
fig.update_layout(template='plotly_white', dragmode='zoom', showlegend=False)
fig.update_yaxes(fixedrange=False)
fig.show(config=PLOT_CONFIG)


In [None]:
heat = (
    delta[delta['metric'] == 'mean_relative_error']
    .groupby(['algorithm_label', 'sample_size', 'd_over_n_label'], as_index=False)['delta_abs']
    .mean()
)

fig = px.density_heatmap(
    heat,
    x='sample_size', y='d_over_n_label', z='delta_abs',
    facet_col='algorithm_label',
    color_continuous_scale='Viridis',
    title='|Delta MRE| tra normal e streaming endpoint'
)
fig.update_layout(template='plotly_white', dragmode='zoom')
fig.update_xaxes(type='log', fixedrange=False)
fig.update_yaxes(fixedrange=False)
fig.show(config=PLOT_CONFIG)


In [None]:
summary = (
    delta.groupby(['algorithm_label', 'metric'], as_index=False)
         .agg(delta_abs_mean=('delta_abs', 'mean'), delta_abs_p95=('delta_abs', lambda s: np.quantile(s, 0.95)))
         .sort_values(['metric', 'delta_abs_mean'])
)
summary
