# CSIRO Image2Biomass - V7: Foundation Model Ensemble

This notebook generates predictions using foundation model backbones (SigLIP or DINOv2):
- **Backbone**: Vision Transformer with pretrained foundation weights
- **Training**: Differential learning rates, warmup, gradient clipping
- **Ensemble**: 5-fold cross-validation

## Setup Instructions
1. Add the model dataset
2. Add the competition data
3. **Set Internet to OFF** (required for submission)
4. Run all cells to generate submission

## 1. Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import gc

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"timm version: {timm.__version__}")

## 2. Configuration

In [None]:
# Choose which model to use
MODEL_TYPE = 'siglip'  # Options: 'siglip', 'dinov2'

# Model configurations
MODEL_CONFIGS = {
    'siglip': {
        'backbone': 'vit_base_patch16_siglip_384',
        'image_size': 384,
        'features': 768,
        'checkpoint_base': '/kaggle/input/image2biomass-siglip-v2/pytorch/default/1/checkpoints_siglip'
    },
    'dinov2': {
        'backbone': 'vit_base_patch14_dinov2',
        'image_size': 518,
        'features': 768,
        'checkpoint_base': '/kaggle/input/image2biomass-dinov2-v2/pytorch/default/1/checkpoints_dinov2'
    }
}

CONFIG = MODEL_CONFIGS[MODEL_TYPE]

# Paths
TEST_CSV = '/kaggle/input/csiro-biomass/test.csv'
TEST_IMG_DIR = '/kaggle/input/csiro-biomass/test'
TRAIN_CSV = '/kaggle/input/csiro-biomass/train.csv'

N_FOLDS = 5

# Target names
TARGET_NAMES = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']

# ImageNet normalization
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")
print(f"Model type: {MODEL_TYPE}")
print(f"Backbone: {CONFIG['backbone']}")
print(f"Image size: {CONFIG['image_size']}")

## 3. Model Architecture (Foundation Model)

In [None]:
class FoundationModelRegressor(nn.Module):
    """Multi-task regressor using foundation model backbone."""

    def __init__(self, backbone_name, num_features=768, dropout=0.3):
        super().__init__()

        self.target_names = TARGET_NAMES

        # Load backbone - handle DINOv2 differently
        if 'dinov2' in backbone_name:
            self.backbone = timm.create_model(
                backbone_name,
                pretrained=False,
                num_classes=0,
            )
        else:
            self.backbone = timm.create_model(
                backbone_name,
                pretrained=False,
                num_classes=0,
                global_pool='avg'
            )

        # Regression heads
        self.heads = nn.ModuleDict()
        for name in self.target_names:
            self.heads[name] = nn.Sequential(
                nn.Linear(num_features, 256),
                nn.GELU(),
                nn.Dropout(dropout),
                nn.Linear(256, 64),
                nn.GELU(),
                nn.Linear(64, 1)
            )

    def forward(self, x):
        features = self.backbone(x)
        return {name: self.heads[name](features).squeeze(-1) for name in self.target_names}

print("Foundation model architecture defined")

## 4. Dataset Class

In [None]:
class BiomassTestDataset(Dataset):
    """Test dataset for biomass prediction."""
    
    def __init__(self, csv_path, img_dir, transform=None):
        self.img_dir = Path(img_dir)
        self.transform = transform
        
        # Load CSV
        self.df = pd.read_csv(csv_path)
        self.df['image_id'] = self.df['sample_id'].str.split('__').str[0]
        self.image_ids = self.df['image_id'].unique()
        self.image_paths = self.df.groupby('image_id')['image_path'].first().to_dict()
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_filename = Path(self.image_paths[image_id]).name
        image_path = self.img_dir / image_filename
        
        # Load image
        image = Image.open(image_path).convert('RGB')
        image = np.array(image)
        
        if self.transform is not None:
            transformed = self.transform(image=image)
            image = transformed['image']
        
        return {'image': image, 'image_id': image_id}

print("Dataset class defined")

## 5. Get Target Statistics

In [None]:
# Load training data to compute normalization statistics
train_df = pd.read_csv(TRAIN_CSV)
train_df['image_id'] = train_df['sample_id'].str.split('__').str[0]

# Pivot to wide format
train_wide = train_df.pivot_table(
    index='image_id',
    columns='target_name',
    values='target',
    aggfunc='first'
)

# Compute statistics
target_stats = {}
for target_name in TARGET_NAMES:
    values = train_wide[target_name].values
    target_stats[target_name] = {
        'mean': float(np.mean(values)),
        'std': float(np.std(values)) + 1e-8
    }

print("Target normalization statistics:")
for target_name, stats in target_stats.items():
    print(f"  {target_name:<20} mean: {stats['mean']:>8.2f}  std: {stats['std']:>8.2f}")

## 6. Create Test DataLoader

In [None]:
# Validation transforms
val_transform = A.Compose([
    A.Resize(CONFIG['image_size'], CONFIG['image_size']),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

# Create test dataset
test_dataset = BiomassTestDataset(
    csv_path=TEST_CSV,
    img_dir=TEST_IMG_DIR,
    transform=val_transform
)

# Create dataloader
test_loader = DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=0,
    pin_memory=False
)

print(f"Test dataset: {len(test_dataset)} samples")

## 7. Load Models and Generate Predictions

In [None]:
def denormalize_predictions(pred_dict, target_stats):
    """Denormalize predictions back to original scale."""
    denormalized = {}
    for target_name, value in pred_dict.items():
        stats = target_stats[target_name]
        denormalized[target_name] = (value * stats['std']) + stats['mean']
    return denormalized

# Generate predictions from each fold
print(f"Loading {N_FOLDS} fold models and generating predictions...")
all_fold_predictions = []

for fold_idx in range(N_FOLDS):
    checkpoint_path = Path(CONFIG['checkpoint_base']) / f'fold_{fold_idx}' / 'best_model.pth'
    
    print(f"\nFold {fold_idx + 1}/{N_FOLDS}:")
    print(f"  Loading from: {checkpoint_path}")
    
    # Create model
    model = FoundationModelRegressor(
        backbone_name=CONFIG['backbone'],
        num_features=CONFIG['features'],
        dropout=0.3
    )
    model = model.to(DEVICE)
    
    # Load checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=DEVICE, weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    print(f"  Val Loss: {checkpoint['best_val_loss']:.4f}")
    
    # Generate predictions
    fold_preds = {}
    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f"Fold {fold_idx + 1}"):
            images = batch['image'].to(DEVICE)
            image_ids = batch['image_id']
            
            pred = model(images)
            
            for i, image_id in enumerate(image_ids):
                pred_dict = {
                    target_name: pred[target_name][i].cpu().item()
                    for target_name in TARGET_NAMES
                }
                pred_dict = denormalize_predictions(pred_dict, target_stats)
                fold_preds[image_id] = pred_dict
    
    all_fold_predictions.append(fold_preds)
    print(f"  Generated {len(fold_preds)} predictions")
    
    # Free memory
    del model
    torch.cuda.empty_cache()
    gc.collect()

print(f"\nAll {N_FOLDS} folds processed")

## 8. Ensemble and Apply Constraints

In [None]:
def enforce_constraint(predictions, method='average'):
    """Enforce constraint: Dry_Total = Dry_Clover + Dry_Dead + Dry_Green"""
    enforced = {}
    
    for image_id, pred_dict in predictions.items():
        pred = pred_dict.copy()
        
        clover = pred['Dry_Clover_g']
        dead = pred['Dry_Dead_g']
        green = pred['Dry_Green_g']
        total = pred['Dry_Total_g']
        
        component_sum = clover + dead + green
        
        if method == 'average':
            new_total = (total + component_sum) / 2
            
            if component_sum > 0:
                scale = new_total / component_sum
                pred['Dry_Clover_g'] = max(0, clover * scale)
                pred['Dry_Dead_g'] = max(0, dead * scale)
                pred['Dry_Green_g'] = max(0, green * scale)
                pred['Dry_Total_g'] = new_total
            else:
                pred['Dry_Total_g'] = max(0, total)
        
        # Ensure non-negative
        for key in pred:
            pred[key] = max(0, pred[key])
        
        enforced[image_id] = pred
    
    return enforced

# Average predictions across folds
print("Averaging predictions across folds...")
ensemble_predictions = {}

all_image_ids = list(all_fold_predictions[0].keys())

for image_id in all_image_ids:
    ensemble_pred = {}
    for target_name in TARGET_NAMES:
        fold_values = [fold_preds[image_id][target_name] for fold_preds in all_fold_predictions]
        ensemble_pred[target_name] = np.mean(fold_values)
    ensemble_predictions[image_id] = ensemble_pred

print(f"Generated ensemble predictions for {len(ensemble_predictions)} images")

# Apply constraint enforcement
print("\nApplying constraint enforcement...")
ensemble_predictions = enforce_constraint(ensemble_predictions, method='average')

# Check constraint violations
violations = []
for image_id, pred in ensemble_predictions.items():
    total = pred['Dry_Total_g']
    component_sum = pred['Dry_Clover_g'] + pred['Dry_Dead_g'] + pred['Dry_Green_g']
    violation = abs(total - component_sum)
    violations.append(violation)

print(f"Constraint violations:")
print(f"  Mean: {np.mean(violations):.6f}g")
print(f"  Max: {np.max(violations):.6f}g")

## 9. Create Submission

In [None]:
# Load test.csv to get correct sample_id ordering
test_df = pd.read_csv(TEST_CSV)

# Create submission rows
submission_rows = []
for _, row in test_df.iterrows():
    sample_id = row['sample_id']
    image_id = sample_id.split('__')[0]
    target_name = row['target_name']
    
    pred_value = ensemble_predictions[image_id][target_name]
    
    submission_rows.append({
        'sample_id': sample_id,
        'target': pred_value
    })

# Create DataFrame and save
submission_df = pd.DataFrame(submission_rows)
submission_df.to_csv('submission.csv', index=False)

print("Submission file created!")
print(f"Shape: {submission_df.shape}")
print("\nFirst few predictions:")
print(submission_df.head(10))
print("\nSummary statistics:")
print(submission_df['target'].describe())

## 10. Summary

In [None]:
print("\nEnsemble predictions by target:")
for target_name in TARGET_NAMES:
    values = [pred[target_name] for pred in ensemble_predictions.values()]
    print(f"  {target_name:<20} mean: {np.mean(values):>8.2f}  "
          f"min: {np.min(values):>8.2f}  max: {np.max(values):>8.2f}")

print("\n" + "="*70)
print(f"Model: {MODEL_TYPE.upper()} Foundation Model")
print(f"Backbone: {CONFIG['backbone']}")
print(f"Image size: {CONFIG['image_size']}")
print(f"Ensemble: {N_FOLDS}-fold")
print("Submission file ready: submission.csv")
print("="*70)