# CSIRO Image2Biomass - EfficientNetV2-M with TTA

This notebook generates predictions using an optimized EfficientNetV2-M model trained with:
- Huber loss (delta=2.0)
- Conservative augmentation
- RMSprop optimizer
- Test-Time Augmentation (4 flips)
- Validation loss: 0.3147 (28% improvement over ResNet50)

## Setup Instructions
1. Upload the model checkpoint as a Kaggle Dataset named 'biomass-efficientnetv2-final'
2. Add the competition data
3. Run all cells to generate submission

## 1. Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path
import albumentations as A
from albumentations.pytorch import ToTensorV2
import timm
from tqdm import tqdm

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 2. Configuration

In [None]:
TEST_CSV = '/kaggle/input/csiro-biomass/test.csv'
TEST_IMG_DIR = '/kaggle/input/csiro-biomass/test'
TRAIN_CSV = '/kaggle/input/csiro-biomass/train.csv'
TRAIN_IMG_DIR = '/kaggle/input/csiro-biomass/train'

# Model checkpoint - UPDATE THIS PATH after uploading checkpoint
CHECKPOINT_PATH = '/kaggle/input/biomass-efficientnetv2-final/pytorch/default/1/best_model.pth'

# Target names
TARGET_NAMES = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']

# Model config (from Optuna optimization - Trial 41)
CONFIG = {
    'backbone': 'tf_efficientnetv2_m',
    'pretrained': True,
    'dropout': 0.5,
    'head_hidden_dim': 512,
    'image_size': 512,
    'batch_size': 8,  # Reduced for CPU/memory constraints
    'num_workers': 2,
    'use_tta': True  # Test-Time Augmentation
}

# ImageNet normalization
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")

## 3. Model Architecture

In [None]:
class MultiTaskModel(nn.Module):
    """Multi-task model for biomass prediction - supports any timm backbone."""
    
    def __init__(self, backbone='tf_efficientnetv2_m', num_targets=5, pretrained=True,
                 dropout=0.5, head_hidden_dim=512):
        super().__init__()
        
        # Load pretrained backbone
        self.backbone = timm.create_model(backbone, pretrained=pretrained, num_classes=0)
        backbone_features = self.backbone.num_features
        
        # Create prediction heads
        self.heads = nn.ModuleDict({
            target_name: self._make_head(backbone_features, head_hidden_dim, dropout)
            for target_name in TARGET_NAMES
        })
    
    def _make_head(self, in_features, hidden_dim, dropout):
        """Create a prediction head."""
        return nn.Sequential(
            nn.Linear(in_features, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)
        )
    
    def forward(self, x):
        """Forward pass."""
        features = self.backbone(x)
        outputs = {
            target_name: self.heads[target_name](features).squeeze(-1)
            for target_name in TARGET_NAMES
        }
        return outputs

print("Model architecture defined")

## 4. Dataset Class

In [None]:
class BiomassTestDataset(Dataset):
    """Test dataset for biomass prediction."""
    
    def __init__(self, csv_path, img_dir, transform=None, target_stats=None):
        self.csv_path = csv_path
        self.img_dir = Path(img_dir)
        self.transform = transform
        self.target_stats = target_stats
        
        # Load CSV
        self.df = pd.read_csv(csv_path)
        self.df['image_id'] = self.df['sample_id'].str.split('__').str[0]
        self.image_ids = self.df['image_id'].unique()
        self.image_paths = self.df.groupby('image_id')['image_path'].first().to_dict()
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_filename = Path(self.image_paths[image_id]).name
        image_path = self.img_dir / image_filename
        
        # Load image
        image = Image.open(image_path).convert('RGB')
        image = np.array(image)
        
        # Apply transforms
        if self.transform is not None:
            transformed = self.transform(image=image)
            image = transformed['image']
        
        return {
            'image': image,
            'image_id': image_id
        }

print("Dataset class defined")

## 5. Get Target Statistics from Training Data

In [None]:
# Load training data to compute normalization statistics
train_df = pd.read_csv(TRAIN_CSV)
train_df['image_id'] = train_df['sample_id'].str.split('__').str[0]

# Pivot to wide format
train_wide = train_df.pivot_table(
    index='image_id',
    columns='target_name',
    values='target',
    aggfunc='first'
)

# Compute statistics
target_stats = {}
for target_name in TARGET_NAMES:
    values = train_wide[target_name].values
    target_stats[target_name] = {
        'mean': float(np.mean(values)),
        'std': float(np.std(values)) + 1e-8
    }

print("Target normalization statistics:")
for target_name, stats in target_stats.items():
    print(f"  {target_name:<20} mean: {stats['mean']:>8.2f}  std: {stats['std']:>8.2f}")

## 6. TTA Transforms

In [None]:
def get_tta_transforms(image_size=512):
    """Get test-time augmentation transforms (4 flips)."""
    
    # Original (no flip)
    transform_original = A.Compose([
        A.Resize(image_size, image_size),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
    ])
    
    # Horizontal flip
    transform_hflip = A.Compose([
        A.Resize(image_size, image_size),
        A.HorizontalFlip(p=1.0),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
    ])
    
    # Vertical flip
    transform_vflip = A.Compose([
        A.Resize(image_size, image_size),
        A.VerticalFlip(p=1.0),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
    ])
    
    # Both flips
    transform_hvflip = A.Compose([
        A.Resize(image_size, image_size),
        A.HorizontalFlip(p=1.0),
        A.VerticalFlip(p=1.0),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
    ])
    
    return [
        transform_original,
        transform_hflip,
        transform_vflip,
        transform_hvflip
    ]

print("TTA transforms defined")

## 7. Load Model and Checkpoint

In [None]:
# Create model
print(f"Creating model: {CONFIG['backbone']}...")
model = MultiTaskModel(
    backbone=CONFIG['backbone'],
    num_targets=len(TARGET_NAMES),
    pretrained=False,  # We'll load trained weights
    dropout=CONFIG['dropout'],
    head_hidden_dim=CONFIG['head_hidden_dim']
)
model = model.to(DEVICE)

# Load checkpoint
print(f"Loading checkpoint from {CHECKPOINT_PATH}...")
checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Checkpoint loaded successfully!")
print(f"  Epoch: {checkpoint['epoch']}")
print(f"  Best Val Loss: {checkpoint['best_val_loss']:.4f}")

n_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {n_params:,}")

## 8. Helper Functions

In [None]:
def denormalize_predictions(pred_dict, target_stats):
    """Denormalize predictions back to original scale."""
    denormalized = {}
    for target_name, value in pred_dict.items():
        stats = target_stats[target_name]
        denormalized[target_name] = (value * stats['std']) + stats['mean']
    return denormalized

def enforce_constraint(predictions, method='average'):
    """Enforce constraint: Dry_Total = Dry_Clover + Dry_Dead + Dry_Green"""
    enforced = {}
    
    for image_id, pred_dict in predictions.items():
        pred = pred_dict.copy()
        
        clover = pred['Dry_Clover_g']
        dead = pred['Dry_Dead_g']
        green = pred['Dry_Green_g']
        total = pred['Dry_Total_g']
        
        component_sum = clover + dead + green
        
        if method == 'average':
            # Average the predicted total and sum of components
            new_total = (total + component_sum) / 2
            
            # Distribute discrepancy proportionally
            if component_sum > 0:
                scale = new_total / component_sum
                pred['Dry_Clover_g'] = clover * scale
                pred['Dry_Dead_g'] = dead * scale
                pred['Dry_Green_g'] = green * scale
                pred['Dry_Total_g'] = new_total
            else:
                pred['Dry_Total_g'] = 0.0
        
        enforced[image_id] = pred
    
    return enforced

print("Helper functions defined")

## 9. Generate Predictions with TTA

In [None]:
if CONFIG['use_tta']:
    print("Using Test-Time Augmentation (4 transforms)")
    tta_transforms = get_tta_transforms(CONFIG['image_size'])
    
    # Store predictions from all TTA iterations
    all_predictions = {target: {} for target in TARGET_NAMES}
    
    with torch.no_grad():
        for tta_idx, tta_transform in enumerate(tta_transforms):
            print(f"  TTA {tta_idx + 1}/{len(tta_transforms)}...")
            
            # Create dataset with this TTA transform
            test_dataset = BiomassTestDataset(
                csv_path=TEST_CSV,
                img_dir=TEST_IMG_DIR,
                transform=tta_transform,
                target_stats=target_stats
            )
            
            test_loader = DataLoader(
                test_dataset,
                batch_size=CONFIG['batch_size'],
                shuffle=False,
                num_workers=CONFIG['num_workers']
            )
            
            for batch in test_loader:
                images = batch['image'].to(DEVICE)
                image_ids = batch['image_id']
                
                # Get predictions
                pred = model(images)
                
                # Store predictions
                for i, image_id in enumerate(image_ids):
                    for target_name in TARGET_NAMES:
                        if image_id not in all_predictions[target_name]:
                            all_predictions[target_name][image_id] = []
                        all_predictions[target_name][image_id].append(
                            pred[target_name][i].cpu().item()
                        )
    
    # Average predictions
    predictions = {}
    for image_id in all_predictions[TARGET_NAMES[0]].keys():
        pred_dict = {
            target_name: np.mean(all_predictions[target_name][image_id])
            for target_name in TARGET_NAMES
        }
        # Denormalize
        pred_dict = denormalize_predictions(pred_dict, target_stats)
        predictions[image_id] = pred_dict
    
    print(f"Generated TTA predictions for {len(predictions)} images")

else:
    print("Using single inference (no TTA)")
    # Standard inference without TTA
    val_transform = A.Compose([
        A.Resize(CONFIG['image_size'], CONFIG['image_size']),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
    ])
    
    test_dataset = BiomassTestDataset(
        csv_path=TEST_CSV,
        img_dir=TEST_IMG_DIR,
        transform=val_transform,
        target_stats=target_stats
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=False,
        num_workers=CONFIG['num_workers']
    )
    
    predictions = {}
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Generating predictions"):
            images = batch['image'].to(DEVICE)
            image_ids = batch['image_id']
            
            pred = model(images)
            
            for i, image_id in enumerate(image_ids):
                pred_dict = {
                    target_name: pred[target_name][i].cpu().item()
                    for target_name in TARGET_NAMES
                }
                pred_dict = denormalize_predictions(pred_dict, target_stats)
                predictions[image_id] = pred_dict
    
    print(f"Generated predictions for {len(predictions)} images")

# Apply constraint enforcement
print("Applying constraint enforcement...")
predictions = enforce_constraint(predictions, method='average')

# Check constraint violations
violations = []
for image_id, pred in predictions.items():
    total = pred['Dry_Total_g']
    component_sum = pred['Dry_Clover_g'] + pred['Dry_Dead_g'] + pred['Dry_Green_g']
    violation = abs(total - component_sum)
    violations.append(violation)

print(f"Constraint violations:")
print(f"  Mean: {np.mean(violations):.6f}g")
print(f"  Max: {np.max(violations):.6f}g")
print(f"  All exact: {all(v < 1e-6 for v in violations)}")

## 10. Create Submission File

In [None]:
# Load test.csv to get correct sample_id ordering
test_df = pd.read_csv(TEST_CSV)

# Create submission rows
submission_rows = []
for _, row in test_df.iterrows():
    sample_id = row['sample_id']
    image_id = sample_id.split('__')[0]
    target_name = row['target_name']
    
    # Get prediction
    pred_value = predictions[image_id][target_name]
    
    submission_rows.append({
        'sample_id': sample_id,
        'target': pred_value
    })

# Create DataFrame and save
submission_df = pd.DataFrame(submission_rows)
submission_df.to_csv('submission.csv', index=False)

print("Submission file created!")
print(f"Shape: {submission_df.shape}")
print("\nFirst few predictions:")
print(submission_df.head(10))
print("\nSummary statistics:")
print(submission_df['target'].describe())

## 11. Display Predictions Summary

In [None]:
print("\nPredictions by target:")
for target_name in TARGET_NAMES:
    values = [pred[target_name] for pred in predictions.values()]
    print(f"  {target_name:<20} mean: {np.mean(values):>8.2f}  "
          f"min: {np.min(values):>8.2f}  max: {np.max(values):>8.2f}")

print("\n" + "="*70)
print("Submission file ready: submission.csv")
print("Model: EfficientNetV2-M with TTA (Val Loss: 0.3147)")
print("="*70)