# 05 â€” Ensemble & final submission

This notebook:
- loads two (or more) submission files from `artifacts/submissions/`
- blends probabilities
- writes final submission

(Blending often improves logloss slightly and is a nice final touch.)

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

from src.config import Paths, ID_COL

paths = Paths()
subs_dir = paths.submissions
print("Looking in:", subs_dir)

# Adjust names if needed
sub_lama = subs_dir / "submission_lama_a.csv"
sub_custom = subs_dir / "submission_custom_best.csv"

assert sub_custom.exists(), f"Missing: {sub_custom} (run notebook 04 first)"
assert sub_lama.exists() or (subs_dir / "submission_lama_b.csv").exists(), "Run notebook 03 first (LAMA submission)."

if not sub_lama.exists():
    sub_lama = subs_dir / "submission_lama_b.csv"

s1 = pd.read_csv(sub_lama)
s2 = pd.read_csv(sub_custom)

labels = [c for c in s1.columns if c != ID_COL]
assert labels == [c for c in s2.columns if c != ID_COL], "Submission columns mismatch"

print("Blend labels:", labels)

In [None]:
# Simple weighted blend
w_custom = 0.65
w_lama = 1.0 - w_custom

blend = pd.DataFrame({ID_COL: s1[ID_COL].values})
blend[labels] = w_lama * s1[labels].values + w_custom * s2[labels].values

# Normalize to sum=1 (safety)
blend[labels] = blend[labels].div(blend[labels].sum(axis=1), axis=0)

out_path = paths.submissions / f"submission_blend_{w_custom:.2f}_custom.csv"
blend.to_csv(out_path, index=False)
print("Saved:", out_path)
blend.head()