# CSIRO Biomass – Inference Notebook

This notebook blends precomputed pillar submissions (SigLIP, DINO, MVP, Dinov2) into the final `submission.csv`.

**Usage**
1. Attach the dataset that contains the four pillar CSVs.
2. Update the file paths in the next cell if your dataset uses different names.
3. Run all cells – no training is performed here, so scoring should complete quickly.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

# === Configure pillar submission paths ===
SIGLIP_PATH = Path('/kaggle/input/csiro-pillars-run5/submission_siglip.csv')
DINO_PATH   = Path('/kaggle/input/csiro-pillars-run5/submission_dino.csv')
MVP_PATH    = Path('/kaggle/input/csiro-pillars-run5/submission_mvp.csv')
DINOV2_PATH = Path('/kaggle/input/csiro-pillars-run5/submission_dinov2.csv')
SAMPLE_SUB_PATH = Path('/kaggle/input/csiro-biomass/sample_submission.csv')

# Default SigLIP-heavy weights (SigLIP / DINO / MVP / Dinov2)
WEIGHTS = np.array([0.60, 0.20, 0.10, 0.10], dtype=np.float64)

# Optional clipping of target predictions
CLIP_MIN = 0.0
CLIP_MAX = None

print('Configured weights (unnormalized):', WEIGHTS)


In [None]:
SAMPLE_COLS = ('sample_id', 'target')

def load_submission(path: Path) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"Missing submission file: {path}")
    df = pd.read_csv(path)
    missing = [c for c in SAMPLE_COLS if c not in df.columns]
    if missing:
        raise ValueError(f"{path} missing column(s): {missing}")
    df = df[list(SAMPLE_COLS)].copy()
    df.sort_values('sample_id', inplace=True, ignore_index=True)
    return df

weights = WEIGHTS.astype(np.float64)
if np.any(weights < 0):
    raise ValueError('Weights must be non-negative.')
if weights.sum() <= 0:
    raise ValueError('Weights sum to zero; provide positive weights.')
weights /= weights.sum()
print('Normalized weights:', weights.round(4))

siglip_df = load_submission(SIGLIP_PATH)
dino_df = load_submission(DINO_PATH)
mvp_df = load_submission(MVP_PATH)
dinov2_df = load_submission(DINOV2_PATH)

sample_template = pd.read_csv(SAMPLE_SUB_PATH)[['sample_id']].copy()
sample_template.sort_values('sample_id', inplace=True, ignore_index=True)
if sample_template['sample_id'].duplicated().any():
    raise ValueError('sample_submission contains duplicate sample_ids.')

pillar_arrays = []
for name, pillar_df in (
    ('SigLIP', siglip_df),
    ('DINO', dino_df),
    ('MVP', mvp_df),
    ('Dinov2', dinov2_df),
):
    aligned = sample_template.merge(pillar_df, on='sample_id', how='left', validate='one_to_one')
    if aligned['target'].isna().any():
        missing_ids = aligned.loc[aligned['target'].isna(), 'sample_id'].head().tolist()
        raise ValueError(f"{name} submission missing predictions for ids: {missing_ids}")
    pillar_arrays.append(aligned['target'].to_numpy(dtype=np.float64))

stacked = np.stack(pillar_arrays, axis=0)

blended = np.tensordot(weights, stacked, axes=(0, 0))
if CLIP_MIN is not None or CLIP_MAX is not None:
    blended = np.clip(blended, CLIP_MIN, CLIP_MAX)

submission = sample_template.copy()
submission['target'] = blended.astype(np.float32)
submission.to_csv('submission.csv', index=False)
print(f"Saved blended submission with {len(submission):,} rows to submission.csv")
print(submission.head())
