# CSIRO Image2Biomass - V6: Ensemble (V4 Baseline + V5 Depth Fusion)

Combines two diverse models:
- **V4**: EfficientNetV2-M baseline (scored 0.50)
- **V5**: RGB+Depth Fusion with external data (scored 0.57)

Optimized for speed with shared depth model and pre-computed depth maps.

## Setup
1. Add model dataset (ensemble-v4-v5) + competition data
2. **Set Internet to OFF**
3. Run all cells

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import AutoModelForDepthEstimation
import gc

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")

In [None]:
# Paths
TEST_CSV = '/kaggle/input/csiro-biomass/test.csv'
TEST_IMG_DIR = '/kaggle/input/csiro-biomass/test'
TRAIN_CSV = '/kaggle/input/csiro-biomass/train.csv'

# Model paths
MODEL_BASE = '/kaggle/input/ensemble-v4-v5/pytorch/default/1'
V4_PATH = f'{MODEL_BASE}/v4_baseline'
V5_PATH = f'{MODEL_BASE}/v5_depth_fusion'
DEPTH_MODEL_PATH = f'{MODEL_BASE}/depth_anything_v2_small'

N_FOLDS = 5
TARGET_NAMES = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']

# Ensemble weights (can tune these)
V4_WEIGHT = 0.4  # Baseline
V5_WEIGHT = 0.6  # Depth fusion (scored higher)

# Config
IMAGE_SIZE = 384  # Use 384 for depth model compatibility
BATCH_SIZE = 4
USE_TTA = True
TTA_ROTATIONS = False  # Skip rotations to save time

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {DEVICE}")
print(f"Ensemble weights: V4={V4_WEIGHT}, V5={V5_WEIGHT}")

## Model Definitions

In [None]:
class V4BaselineModel(nn.Module):
    """V4: EfficientNetV2-M baseline."""
    
    def __init__(self, backbone='tf_efficientnetv2_m', dropout=0.5, head_hidden_dim=512):
        super().__init__()
        self.backbone = timm.create_model(backbone, pretrained=False, num_classes=0, global_pool='avg')
        backbone_features = self.backbone.num_features
        
        self.heads = nn.ModuleDict({
            name: nn.Sequential(
                nn.Linear(backbone_features, head_hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(head_hidden_dim, 1)
            ) for name in TARGET_NAMES
        })
    
    def forward(self, x):
        features = self.backbone(x)
        return {name: self.heads[name](features).squeeze(-1) for name in TARGET_NAMES}


class SharedDepthEstimator(nn.Module):
    """Depth estimator loaded once."""
    
    def __init__(self, model_path):
        super().__init__()
        self.model = AutoModelForDepthEstimation.from_pretrained(model_path)
        self.model.eval()
        for param in self.model.parameters():
            param.requires_grad = False
    
    @torch.no_grad()
    def forward(self, images):
        B, C, H, W = images.shape
        outputs = self.model(images)
        depth = outputs.predicted_depth
        depth = F.interpolate(depth.unsqueeze(1), size=(H, W), mode='bilinear', align_corners=False)
        
        depth_flat = depth.view(B, -1)
        depth_min = depth_flat.min(dim=1, keepdim=True)[0].view(B, 1, 1, 1)
        depth_max = depth_flat.max(dim=1, keepdim=True)[0].view(B, 1, 1, 1)
        return (depth - depth_min) / (depth_max - depth_min + 1e-8)


class V5DepthFusionModel(nn.Module):
    """V5: Lightweight fusion model (uses pre-computed depth)."""
    
    def __init__(self, rgb_backbone='efficientnetv2_rw_m', dropout=0.3):
        super().__init__()
        self.rgb_encoder = timm.create_model(rgb_backbone, pretrained=False, num_classes=0, global_pool='avg')
        rgb_features = self.rgb_encoder.num_features
        
        self.depth_encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout)
        )
        
        fused_features = rgb_features + 256
        self.heads = nn.ModuleDict({
            name: nn.Sequential(
                nn.Linear(fused_features, 256),
                nn.ReLU(inplace=True),
                nn.Dropout(dropout),
                nn.Linear(256, 64),
                nn.ReLU(inplace=True),
                nn.Linear(64, 1)
            ) for name in TARGET_NAMES
        })
    
    def forward(self, images, depth_maps):
        rgb_features = self.rgb_encoder(images)
        depth_features = self.depth_encoder(depth_maps)
        fused = torch.cat([rgb_features, depth_features], dim=1)
        return {name: self.heads[name](fused).squeeze(-1) for name in TARGET_NAMES}

print("Models defined")

## Dataset and Transforms

In [None]:
class BiomassTestDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.img_dir = Path(img_dir)
        self.transform = transform
        self.df = pd.read_csv(csv_path)
        self.df['image_id'] = self.df['sample_id'].str.split('__').str[0]
        self.image_ids = self.df['image_id'].unique()
        self.image_paths = self.df.groupby('image_id')['image_path'].first().to_dict()
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_path = self.img_dir / Path(self.image_paths[image_id]).name
        image = np.array(Image.open(image_path).convert('RGB'))
        if self.transform:
            image = self.transform(image=image)['image']
        return {'image': image, 'image_id': image_id}

def get_tta_transforms(image_size, include_rotations=False):
    transforms = []
    flip_configs = [(False, False), (True, False), (False, True), (True, True)]
    rotation_angles = [0, 90] if include_rotations else [0]
    
    for hflip, vflip in flip_configs:
        for angle in rotation_angles:
            aug_list = [A.Resize(image_size, image_size)]
            if hflip:
                aug_list.append(A.HorizontalFlip(p=1.0))
            if vflip:
                aug_list.append(A.VerticalFlip(p=1.0))
            if angle != 0:
                aug_list.append(A.Rotate(limit=(angle, angle), p=1.0, border_mode=0))
            aug_list.extend([A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), ToTensorV2()])
            transforms.append(A.Compose(aug_list))
    return transforms

tta_transforms = get_tta_transforms(IMAGE_SIZE, include_rotations=TTA_ROTATIONS)
print(f"Using {len(tta_transforms)} TTA transforms")

## Load Target Statistics

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
train_df['image_id'] = train_df['sample_id'].str.split('__').str[0]
train_wide = train_df.pivot_table(index='image_id', columns='target_name', values='target', aggfunc='first')

target_stats = {}
for name in TARGET_NAMES:
    values = train_wide[name].values
    target_stats[name] = {'mean': float(np.mean(values)), 'std': float(np.std(values)) + 1e-8}

def denormalize(pred_dict, stats):
    return {name: (val * stats[name]['std']) + stats[name]['mean'] for name, val in pred_dict.items()}

print("Target stats loaded")

## Pre-compute Data for All TTA Transforms

In [None]:
# Load depth model once
print("Loading shared depth model...")
depth_model = SharedDepthEstimator(DEPTH_MODEL_PATH).to(DEVICE)

# Pre-compute depth maps for all TTA transforms
print("\nPre-computing images and depth maps...")
all_data = {}  # {tta_idx: {image_id: {'image': tensor, 'depth': tensor}}}

for tta_idx, transform in enumerate(tta_transforms):
    print(f"  TTA {tta_idx + 1}/{len(tta_transforms)}...")
    
    dataset = BiomassTestDataset(TEST_CSV, TEST_IMG_DIR, transform=transform)
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    
    tta_data = {}
    with torch.no_grad():
        for batch in loader:
            images = batch['image'].to(DEVICE)
            image_ids = batch['image_id']
            depth_maps = depth_model(images)
            
            for i, img_id in enumerate(image_ids):
                tta_data[img_id] = {
                    'image': images[i:i+1].cpu(),
                    'depth': depth_maps[i:i+1].cpu()
                }
    
    all_data[tta_idx] = tta_data

# Free depth model
del depth_model
torch.cuda.empty_cache()
gc.collect()

image_ids_order = list(all_data[0].keys())
print(f"\nPre-computed data for {len(image_ids_order)} images x {len(tta_transforms)} TTA")

## Generate V4 Predictions (Baseline)

In [None]:
print("="*60)
print("V4: Baseline EfficientNetV2-M")
print("="*60)

v4_predictions = {name: {img_id: 0.0 for img_id in image_ids_order} for name in TARGET_NAMES}

for fold_idx in range(N_FOLDS):
    print(f"\nFold {fold_idx + 1}/{N_FOLDS}...")
    
    # Load model
    model = V4BaselineModel().to(DEVICE)
    checkpoint = torch.load(f'{V4_PATH}/fold_{fold_idx}/best_model.pth', map_location=DEVICE, weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    print(f"  Val Loss: {checkpoint['best_val_loss']:.4f}")
    
    # Generate predictions
    with torch.no_grad():
        for tta_idx in range(len(tta_transforms)):
            for img_id in image_ids_order:
                images = all_data[tta_idx][img_id]['image'].to(DEVICE)
                pred = model(images)
                pred_denorm = denormalize({n: pred[n][0].item() for n in TARGET_NAMES}, target_stats)
                
                for name in TARGET_NAMES:
                    v4_predictions[name][img_id] += pred_denorm[name]
    
    del model
    torch.cuda.empty_cache()

# Average across folds and TTA
n_total = N_FOLDS * len(tta_transforms)
for name in TARGET_NAMES:
    for img_id in image_ids_order:
        v4_predictions[name][img_id] /= n_total

print("\nV4 predictions complete")

## Generate V5 Predictions (Depth Fusion)

In [None]:
print("="*60)
print("V5: RGB+Depth Fusion")
print("="*60)

v5_predictions = {name: {img_id: 0.0 for img_id in image_ids_order} for name in TARGET_NAMES}

for fold_idx in range(N_FOLDS):
    print(f"\nFold {fold_idx + 1}/{N_FOLDS}...")
    
    # Load model
    model = V5DepthFusionModel().to(DEVICE)
    checkpoint = torch.load(f'{V5_PATH}/fold_{fold_idx}/best_model.pth', map_location=DEVICE, weights_only=False)
    
    # Load weights (skip depth_estimator)
    state_dict = checkpoint['model_state_dict']
    model_state = model.state_dict()
    for key in model_state.keys():
        if key in state_dict:
            model_state[key] = state_dict[key]
    model.load_state_dict(model_state)
    model.eval()
    print(f"  Val Loss: {checkpoint['best_val_loss']:.4f}")
    
    # Generate predictions
    with torch.no_grad():
        for tta_idx in range(len(tta_transforms)):
            for img_id in image_ids_order:
                images = all_data[tta_idx][img_id]['image'].to(DEVICE)
                depth_maps = all_data[tta_idx][img_id]['depth'].to(DEVICE)
                pred = model(images, depth_maps)
                pred_denorm = denormalize({n: pred[n][0].item() for n in TARGET_NAMES}, target_stats)
                
                for name in TARGET_NAMES:
                    v5_predictions[name][img_id] += pred_denorm[name]
    
    del model
    torch.cuda.empty_cache()

# Average across folds and TTA
for name in TARGET_NAMES:
    for img_id in image_ids_order:
        v5_predictions[name][img_id] /= n_total

print("\nV5 predictions complete")

## Ensemble Predictions

In [None]:
print("="*60)
print(f"Ensembling: V4 x {V4_WEIGHT} + V5 x {V5_WEIGHT}")
print("="*60)

ensemble_predictions = {name: {} for name in TARGET_NAMES}

for img_id in image_ids_order:
    for name in TARGET_NAMES:
        v4_pred = v4_predictions[name][img_id]
        v5_pred = v5_predictions[name][img_id]
        ensemble_predictions[name][img_id] = V4_WEIGHT * v4_pred + V5_WEIGHT * v5_pred

# Apply biological constraints
print("\nApplying biological constraints...")
for img_id in image_ids_order:
    # Clip negatives
    for name in TARGET_NAMES:
        ensemble_predictions[name][img_id] = max(0.0, ensemble_predictions[name][img_id])
    
    clover = ensemble_predictions['Dry_Clover_g'][img_id]
    dead = ensemble_predictions['Dry_Dead_g'][img_id]
    green = ensemble_predictions['Dry_Green_g'][img_id]
    gdm = ensemble_predictions['GDM_g'][img_id]
    total = ensemble_predictions['Dry_Total_g'][img_id]
    
    # GDM = Green + Clover
    gdm_calc = green + clover
    adj_gdm = (gdm + gdm_calc) / 2
    if gdm_calc > 0:
        scale = adj_gdm / gdm_calc
        ensemble_predictions['Dry_Green_g'][img_id] = green * scale
        ensemble_predictions['Dry_Clover_g'][img_id] = clover * scale
    ensemble_predictions['GDM_g'][img_id] = adj_gdm
    
    # Total = GDM + Dead
    total_calc = adj_gdm + dead
    adj_total = (total + total_calc) / 2
    if adj_total > adj_gdm:
        ensemble_predictions['Dry_Dead_g'][img_id] = adj_total - adj_gdm
    else:
        ensemble_predictions['Dry_Dead_g'][img_id] = 0.0
        adj_total = adj_gdm
    ensemble_predictions['Dry_Total_g'][img_id] = adj_total

print("\nEnsemble predictions summary:")
for name in TARGET_NAMES:
    vals = list(ensemble_predictions[name].values())
    print(f"  {name:<15} mean: {np.mean(vals):>8.2f}")

print("\nIndividual model predictions (for comparison):")
for name in TARGET_NAMES:
    v4_vals = list(v4_predictions[name].values())
    v5_vals = list(v5_predictions[name].values())
    ens_vals = list(ensemble_predictions[name].values())
    print(f"  {name:<15} V4: {np.mean(v4_vals):>7.2f}  V5: {np.mean(v5_vals):>7.2f}  Ens: {np.mean(ens_vals):>7.2f}")

## Create Submission

In [None]:
submission_rows = []
for img_id in image_ids_order:
    for name in TARGET_NAMES:
        submission_rows.append({
            'sample_id': f"{img_id}__{name}",
            'target': ensemble_predictions[name][img_id]
        })

submission_df = pd.DataFrame(submission_rows)
submission_df.to_csv('submission.csv', index=False)

print("="*60)
print("Submission created: submission.csv")
print(f"V6 Ensemble: V4 ({V4_WEIGHT}) + V5 ({V5_WEIGHT})")
print("="*60)
print(submission_df)