# Hybrid Optimizer (Fixed Copy)

Diese Notebook‑Kopie ist JSON‑valide und enthält nur die robusten Abschluss‑Zellen:
- Optimierer mit `OPTIMIZE`‑Schalter und feinem tau/Ω‑Raster.
- Finale Auswertung (Konfusionsmatrix, P/R/F1, Vergleich).
- Visual Vergleiche (Balken) und Streuung richtig/falsch.

Voraussetzungen (im selben Kernel ausführen wie das Tuning‑Notebook):
- `data`: DataFrame mit Spalte `Bike Type` und Soft‑Voting Ergebnissen (bevorzugt `Pred_DS_soft_tuned`).
- `masses`: Liste der Baseline‑MassFunctions (eine pro Zeile in `data`).
- `omega`: Schlüsselname für Ω in den MassFunctions (z. B. `'omega'`).

Ablauf:
1) Optimierer‑Zelle ausführen (setzt `data['Pred_DS_final']`).
2) Finale Auswertung, dann Visual‑Zellen ausführen.

In [None]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

# Zielmetrik: 'accuracy' oder 'macro_f1'
OPTIMIZE = 'macro_f1'  # 'accuracy' | 'macro_f1'
tau_list = np.arange(0.46, 0.53, 0.01)
omega_list = np.arange(0.46, 0.53, 0.01)
label_map = {'r':'race bike','m':'mtb','t':'trecking bike'}
classes = ['r','m','t']

# Voraussetzungen prüfen
missing = []
for name in ['data','masses','omega']:
    if name not in globals(): missing.append(name)
if missing:
    raise RuntimeError(f'Fehlende Objekte im Kernel: {missing}. Bitte erst das Tuning‑Notebook bis inkl. Hybrid ausführen und diesen Kernel übernehmen.')

fb = None
for c in ['Pred_DS_soft_tuned','Pred_DS_soft_best','Pred_DS_soft']:
    if c in data.columns: fb = c; break
if fb is None:
    raise RuntimeError('Bitte Soft‑Voting zuerst ausführen (Pred_DS_soft_tuned oder Pred_DS_soft).')

def evaluate_hybrid(tau_singleton, omega_max):
    preds = []
    for i, row in data.iterrows():
        m = masses[i] if i < len(masses) else None
        if m is None:
            preds.append(row.get(fb, None)); continue
        singles = {c: (m[c] if c in m else 0.0) for c in classes}
        best_c, best_v = max(singles.items(), key=lambda kv: kv[1])
        om_v = float(m[omega]) if omega in m else 0.0
        preds.append(label_map[best_c] if (best_v >= tau_singleton and om_v <= omega_max) else row.get(fb, None))
    df_eval = data[data['Bike Type'].notna() & (data['Bike Type']!='')].copy()
    y_true = df_eval['Bike Type']
    y_pred = pd.Series(preds, index=data.index).loc[df_eval.index]
    cm = pd.crosstab(y_true, y_pred).reindex(index=['race bike','mtb','trecking bike'], columns=['race bike','mtb','trecking bike'], fill_value=0)
    diag = np.diag(cm.values); support = cm.sum(axis=1).values; pred_sum = cm.sum(axis=0).values
    rec = np.divide(diag, support, out=np.zeros_like(diag, float), where=support>0)
    prec = np.divide(diag, pred_sum, out=np.zeros_like(diag, float), where=pred_sum>0)
    f1 = np.divide(2*prec*rec, prec+rec, out=np.zeros_like(diag, float), where=(prec+rec)>0)
    macro_f1 = float(np.nanmean(f1)); acc = float((y_pred==y_true).mean())
    return acc, macro_f1, preds

# Raster durchsuchen
records = []; best = (-1.0, -1.0, None, None); best_preds=None
for t in tau_list:
    for o in omega_list:
        acc, mf1, preds = evaluate_hybrid(t,o)
        records.append({'tau':t,'omega':o,'acc':acc,'macro_f1':mf1})
        better = (mf1>best[1] or (mf1==best[1] and acc>best[0])) if OPTIMIZE=='macro_f1' else (acc>best[0] or (acc==best[0] and mf1>best[1]))
        if better: best=(acc,mf1,t,o); best_preds=preds

# Ergebnis ausgeben und speichern
df_res = pd.DataFrame.from_records(records)
acc, mf1, t, o = best
print(f"Best Optimizer -> acc={acc:.3f} | macroF1={mf1:.3f} | tau={t} | Ω_max={o} | OPT={OPTIMIZE}")
data['Pred_DS_final'] = best_preds

# Heatmap der gewählten Metrik
metric = 'macro_f1' if OPTIMIZE=='macro_f1' else 'acc'
pivot = df_res.pivot(index='tau', columns='omega', values=metric)
plt.figure(figsize=(6,4)); sns.heatmap(pivot.sort_index(), annot=True, fmt='.3f', cmap='viridis'); plt.title(f'Hybrid Grid {OPTIMIZE}'); plt.tight_layout(); plt.show()


In [None]:
# Finale Auswertung (Konfusionsmatrix + P/R/F1 + Baseline/Final‑Vergleich)
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

if 'Pred_DS_final' not in data.columns:
    raise RuntimeError('Bitte zuerst den Optimierer ausführen (Pred_DS_final fehlt).')

df_eval = data[data['Bike Type'].notna() & (data['Bike Type']!='')].copy()
cm_f = pd.crosstab(df_eval['Bike Type'], df_eval['Pred_DS_final']).reindex(index=['race bike','mtb','trecking bike'], columns=['race bike','mtb','trecking bike'], fill_value=0)
diag_f = np.diag(cm_f.values); support_f = cm_f.sum(axis=1).values; pred_sum_f = cm_f.sum(axis=0).values
rec_f = np.divide(diag_f, support_f, out=np.zeros_like(diag_f, float), where=support_f>0)
prec_f = np.divide(diag_f, pred_sum_f, out=np.zeros_like(diag_f, float), where=pred_sum_f>0)
f1_f = np.divide(2*prec_f*rec_f, prec_f+rec_f, out=np.zeros_like(diag_f, float), where=(prec_f+rec_f)>0)
macro_f1_f = float(np.nanmean(f1_f))
acc_f = float((df_eval['Pred_DS_final'] == df_eval['Bike Type']).mean())
plt.figure(figsize=(6,4))
sns.heatmap(cm_f.div(cm_f.sum(axis=1).replace(0,np.nan), axis=0).fillna(0), annot=True, fmt='.2f', cmap='Greens')
plt.title('Konfusionsmatrix Final (zeilennormiert)'); plt.xlabel('Vorhersage (final)'); plt.ylabel('Wahr'); plt.tight_layout(); plt.show()
print('Per‑Klasse (Final) P/R/F1:')
for cls, p,r,f,s in zip(cm_f.index, prec_f, rec_f, f1_f, support_f):
    print(f'{cls:14s}  P={p:.3f}  R={r:.3f}  F1={f:.3f}  (n={int(s)})')

# Baseline Vergleich (falls baseline_metrics nicht existiert, wird on‑the‑fly berechnet)
if 'baseline_metrics' in globals():
    acc_b = float(baseline_metrics['accuracy'])
    f1_b = float(np.mean([v['f1'] for v in baseline_metrics['per_class'].values()]))
else:
    cm_b = pd.crosstab(df_eval['Bike Type'], df_eval['Pred_DS']).reindex(index=['race bike','mtb','trecking bike'], columns=['race bike','mtb','trecking bike'], fill_value=0)
    diag_b = np.diag(cm_b.values); support_b = cm_b.sum(axis=1).values; pred_sum_b = cm_b.sum(axis=0).values
    rec_b = np.divide(diag_b, support_b, out=np.zeros_like(diag_b, float), where=support_b>0)
    prec_b = np.divide(diag_b, pred_sum_b, out=np.zeros_like(diag_b, float), where=pred_sum_b>0)
    f1_bv = np.divide(2*prec_b*rec_b, prec_b+rec_b, out=np.zeros_like(diag_b, float), where=(prec_b+rec_b)>0)
    f1_b = float(np.nanmean(f1_bv))
    acc_b = float((df_eval['Pred_DS'] == df_eval['Bike Type']).mean())
summary = pd.DataFrame({'Accuracy':[acc_b, acc_f],'Macro_F1':[f1_b, macro_f1_f]}, index=['Baseline','Final'])
display(summary.round(3))


In [None]:
# Visual: Baseline vs Final Balken‑Vergleich je Klasse (Precision/Recall/F1)
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
df_eval = data[data['Bike Type'].notna() & (data['Bike Type']!='')].copy()
classes = ['race bike','mtb','trecking bike']
# Baseline
if 'baseline_metrics' in globals():
    base_prec = [baseline_metrics['per_class'][c]['precision'] for c in classes]
    base_rec  = [baseline_metrics['per_class'][c]['recall']    for c in classes]
    base_f1   = [baseline_metrics['per_class'][c]['f1']       for c in classes]
else:
    cm_b = pd.crosstab(df_eval['Bike Type'], df_eval['Pred_DS']).reindex(index=classes, columns=classes, fill_value=0)
    diag_b = np.diag(cm_b.values); support_b = cm_b.sum(axis=1).values; pred_sum_b = cm_b.sum(axis=0).values
    base_rec = np.divide(diag_b, support_b, out=np.zeros_like(diag_b, float), where=support_b>0)
    base_prec = np.divide(diag_b, pred_sum_b, out=np.zeros_like(diag_b, float), where=pred_sum_b>0)
    base_f1 = np.divide(2*base_prec*base_rec, base_prec+base_rec, out=np.zeros_like(diag_b, float), where=(base_prec+base_rec)>0)
# Final
cm_f = pd.crosstab(df_eval['Bike Type'], df_eval['Pred_DS_final']).reindex(index=classes, columns=classes, fill_value=0)
diag_f = np.diag(cm_f.values); support_f = cm_f.sum(axis=1).values; pred_sum_f = cm_f.sum(axis=0).values
fin_rec = np.divide(diag_f, support_f, out=np.zeros_like(diag_f, float), where=support_f>0)
fin_prec = np.divide(diag_f, pred_sum_f, out=np.zeros_like(diag_f, float), where=pred_sum_f>0)
fin_f1 = np.divide(2*fin_prec*fin_rec, fin_prec+fin_rec, out=np.zeros_like(diag_f, float), where=(fin_prec+fin_rec)>0)
fig, axes = plt.subplots(1, 3, figsize=(12,4), sharey=False)
metrics = [('Precision', base_prec, fin_prec), ('Recall', base_rec, fin_rec), ('F1', base_f1, fin_f1)]
x = np.arange(len(classes)); width = 0.38
for ax, (title, base_vals, fin_vals) in zip(axes, metrics):
    ax.bar(x - width/2, base_vals, width, label='Baseline')
    ax.bar(x + width/2, fin_vals,  width, label='Final')
    ax.set_title(title); ax.set_xticks(x); ax.set_xticklabels(classes, rotation=15)
    ax.set_ylim(0,1); ax.grid(True, axis='y', alpha=0.3)
axes[0].set_ylabel('Wert'); axes[-1].legend(loc='lower right'); plt.tight_layout(); plt.show()


In [None]:
# Visual: Scatter Distance vs Elevation (grün = korrekt, rot = falsch; Marker = wahre Klasse)
import matplotlib.pyplot as plt
df_eval = data[data['Bike Type'].notna() & (data['Bike Type']!='')].copy()
df_eval['CorrectFinal'] = (df_eval['Pred_DS_final'] == df_eval['Bike Type'])
markers = {'race bike':'o','mtb':'s','trecking bike':'^'}
plt.figure(figsize=(7,5))
for truth, sub in df_eval.groupby('Bike Type'):
    plt.scatter(sub['Distance'], sub['Elevation Gain'], c=sub['CorrectFinal'].map({True:'tab:green',False:'tab:red'}),
                marker=markers.get(truth,'o'), alpha=0.8, edgecolor='k', linewidths=0.2, label=truth)
plt.xlabel('Distance'); plt.ylabel('Elevation Gain'); plt.title('Final: richtig (grün) vs. falsch (rot)');
plt.grid(True, alpha=0.3); plt.legend(title='Wahre Klasse'); plt.tight_layout(); plt.show()
