# SATP Results - Modalita Normal

Grafici inline delle metriche aggregate per run (`mode=normal`).

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Enable interactive matplotlib toolbar (zoom/pan on both axes)
try:
    from IPython import get_ipython
    ip = get_ipython()
    if ip is not None:
        try:
            ip.run_line_magic('matplotlib', 'widget')
        except Exception:
            ip.run_line_magic('matplotlib', 'notebook')
except Exception:
    pass

plt.style.use('seaborn-v0_8-whitegrid')

RESULTS_PATH = Path('..').resolve() / 'results.csv'

def load_mode_rows(path: Path, mode: str, usecols, chunksize: int = 250_000) -> pd.DataFrame:
    parts = []
    for chunk in pd.read_csv(path, usecols=usecols, chunksize=chunksize):
        sub = chunk[chunk['mode'] == mode]
        if not sub.empty:
            parts.append(sub.copy())
    if not parts:
        return pd.DataFrame(columns=usecols)
    return pd.concat(parts, ignore_index=True)

metric_cols = [
    'f0_mean','f0_hat_mean','mean','variance','stddev','rse_theoretical','rse_observed',
    'bias','difference','bias_relative','mean_relative_error','rmse','mae'
]
usecols = ['algorithm','params','mode','runs','sample_size','element_index','distinct_count','seed'] + metric_cols
normal = load_mode_rows(RESULTS_PATH, 'normal', usecols)

num_cols = ['runs','sample_size','element_index','distinct_count','seed'] + metric_cols
for c in num_cols:
    if c in normal.columns:
        normal[c] = pd.to_numeric(normal[c], errors='coerce')

print('results:', RESULTS_PATH)
print('rows normal:', len(normal))
print('sample sizes:', sorted(normal['sample_size'].dropna().unique().tolist()))
print('algorithms:', sorted(normal['algorithm'].dropna().unique().tolist()))


In [None]:
metrics = [
    'f0_hat_mean','bias','difference','variance','stddev','rmse','mae',
    'mean_relative_error','bias_relative','rse_observed','rse_theoretical'
]
algos = sorted(normal['algorithm'].unique())

for metric in metrics:
    fig, ax = plt.subplots(figsize=(8.5, 4.8))
    for algo in algos:
        g = normal[normal['algorithm'] == algo].sort_values('sample_size')
        ax.plot(g['sample_size'], g[metric], marker='o', label=algo)
    ax.set_xscale('log')
    ax.set_xlabel('sample_size')
    ax.set_ylabel(metric)
    ax.set_title(f'Normal mode: {metric} vs sample_size')
    ax.legend()
    plt.tight_layout()
    plt.show()


In [None]:
# RSE osservata vs teorica
rse_df = normal[np.isfinite(normal['rse_theoretical'])].copy()
fig, ax = plt.subplots(figsize=(8.5, 4.8))
for algo in sorted(rse_df['algorithm'].unique()):
    g = rse_df[rse_df['algorithm'] == algo].sort_values('sample_size')
    ax.plot(g['sample_size'], g['rse_observed'], marker='o', label=f'{algo} observed')
    ax.plot(g['sample_size'], g['rse_theoretical'], marker='x', linestyle='--', label=f'{algo} theory')
ax.set_xscale('log')
ax.set_xlabel('sample_size')
ax.set_ylabel('RSE')
ax.set_title('Normal mode: RSE osservata vs teorica')
ax.legend(ncol=2, fontsize=8)
plt.tight_layout()
plt.show()


In [None]:
# Calibrazione f0_hat_mean vs f0_mean
fig, ax = plt.subplots(figsize=(6.5, 6.0))
for algo in sorted(normal['algorithm'].unique()):
    g = normal[normal['algorithm'] == algo]
    ax.scatter(g['f0_mean'], g['f0_hat_mean'], label=algo)
mn = min(normal['f0_mean'].min(), normal['f0_hat_mean'].min())
mx = max(normal['f0_mean'].max(), normal['f0_hat_mean'].max())
ax.plot([mn, mx], [mn, mx], 'k--', linewidth=1, label='y=x')
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('f0_mean')
ax.set_ylabel('f0_hat_mean')
ax.set_title('Calibration: f0_hat_mean vs f0_mean')
ax.legend()
plt.tight_layout()
plt.show()
