# CSIRO Biomass – Inference Notebook

This notebook blends precomputed pillar submissions (SigLIP, DINO, MVP, Dinov2) into the final `submission.csv`.

**Usage**
1. Attach the dataset that contains the four pillar CSVs.
2. Update the file paths in the next cell if your dataset uses different names.
3. Run all cells – no training is performed here, so scoring should complete quickly.

In [5]:
from pathlib import Path
import numpy as np
import pandas as pd

# === Configure pillar submission paths ===
SIGLIP_PATH = Path('/kaggle/input/csiro-pillars-run5/submission_siglip.csv')
DINO_PATH   = Path('/kaggle/input/csiro-pillars-run5/submission_dino.csv')
MVP_PATH    = Path('/kaggle/input/csiro-pillars-run5/submission_mvp.csv')
DINOV2_PATH = Path('/kaggle/input/csiro-pillars-run5/submission_dinov2.csv')

# Default SigLIP-heavy weights (SigLIP / DINO / MVP / Dinov2)
WEIGHTS = np.array([0.60, 0.20, 0.10, 0.10], dtype=np.float64)

# Optional clipping of target predictions
CLIP_MIN = 0.0
CLIP_MAX = None

print('Configured weights (unnormalized):', WEIGHTS)


Configured weights (unnormalized): [0.6 0.2 0.1 0.1]


In [6]:
REQUIRED_COLS = ('sample_id', 'target')

def load_submission(path: Path) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f'Missing submission file: {path}')
    df = pd.read_csv(path)
    missing = [c for c in REQUIRED_COLS if c not in df.columns]
    if missing:
        raise ValueError(f'{path} missing column(s): {missing}')
    return df[list(REQUIRED_COLS)].copy()

weights = WEIGHTS.astype(np.float64)
if np.any(weights < 0):
    raise ValueError('Weights must be non-negative.')
total = weights.sum()
if total <= 0:
    raise ValueError('Weights sum to zero; provide positive weights.')
weights /= total
print('Normalized weights:', weights.round(4))

siglip_df = load_submission(SIGLIP_PATH)
dino_df = load_submission(DINO_PATH)
mvp_df = load_submission(MVP_PATH)
dinov2_df = load_submission(DINOV2_PATH)

id_column = siglip_df['sample_id']
for name, pillar_df in (
    ('DINO', dino_df),
    ('MVP', mvp_df),
    ('Dinov2', dinov2_df),
):
    if not pillar_df['sample_id'].equals(id_column):
        raise ValueError(f'{name} submission sample_id ordering does not match SigLIP.')

stacked = np.stack([
    siglip_df['target'].to_numpy(dtype=np.float64),
    dino_df['target'].to_numpy(dtype=np.float64),
    mvp_df['target'].to_numpy(dtype=np.float64),
    dinov2_df['target'].to_numpy(dtype=np.float64),
], axis=0)

blended = np.tensordot(weights, stacked, axes=(0, 0))
if CLIP_MIN is not None or CLIP_MAX is not None:
    blended = np.clip(blended, CLIP_MIN, CLIP_MAX)

submission = pd.DataFrame({'sample_id': id_column, 'target': blended})
submission.to_csv('submission.csv', index=False)
print(f'Saved blended submission with {len(submission):,} rows to submission.csv')
submission.head()


Normalized weights: [0.6 0.2 0.1 0.1]
Saved blended submission with 5 rows to submission.csv


Unnamed: 0,sample_id,target
0,ID1001187975__Dry_Clover_g,3.5572
1,ID1001187975__Dry_Dead_g,25.919768
2,ID1001187975__Dry_Green_g,26.357856
3,ID1001187975__Dry_Total_g,55.834824
4,ID1001187975__GDM_g,29.915056
