# A.4a Synthetic Depth Only with Reset BN

**Experiment:** A.4a - Synthetic Depth
**Input:** Synthetic depth maps (3-channel, generated by Depth-Anything-V2)
**Objective:** Test if synthetic depth can replace real depth sensor
**Classes:** 1 (fresh_fruit_bunch)

## Workflow
1. Environment setup with auto-detection (Kaggle vs Local)
2. Install dependencies
3. Dataset preparation and verification
4. Training with 5 seeds (42, 123, 456, 789, 101)
5. Reset BatchNorm statistics for synthetic depth domain adaptation
6. Evaluation on test set
7. Results summary with mean Â± std deviation

## Prerequisites
- Run `generate_synthetic_depth.ipynb` first
- Upload output as Kaggle dataset: `ffb-synthetic-depth`

## Uniform Augmentation (All Experiments)
- translate: 0.1
- scale: 0.5
- fliplr: 0.5
- hsv_h: 0.0 (disabled for uniformity)
- hsv_s: 0.0 (disabled for uniformity)
- hsv_v: 0.0 (disabled for uniformity)
- erasing: 0.0
- mosaic: 0.0
- mixup: 0.0

In [None]:
# =============================================================================
# Cell 1: Environment Setup & Auto-Detection
# =============================================================================
import os
import sys
import torch
import torch.nn as nn
import numpy as np
import shutil
import gc
import time
from pathlib import Path
from datetime import datetime

# Auto-detect Kaggle vs Local environment
IS_KAGGLE = os.path.exists('/kaggle/input') or os.path.exists('/kaggle')

if IS_KAGGLE:
    BASE_PATH = Path('/kaggle/working')
    DATASET_PATH = Path('/kaggle/input/ffb-synthetic-depth')
else:
    BASE_PATH = Path('D:/Work/Assisten Dosen/Anylabel/Experiments')
    DATASET_PATH = BASE_PATH / 'datasets' / 'ffb_localization_depth_synthetic'

RUNS_PATH = BASE_PATH / 'runs' / 'detect'
KAGGLE_OUTPUT = BASE_PATH / 'kaggleoutput'
KAGGLE_OUTPUT.mkdir(parents=True, exist_ok=True)

print("="*60)
print("A.4a SYNTHETIC DEPTH ONLY - ENVIRONMENT SETUP")
print("="*60)
print(f"Running on: {'Kaggle' if IS_KAGGLE else 'Local'}")
print(f"Base Path: {BASE_PATH}")
print(f"Dataset Path: {DATASET_PATH}")
print(f"CUDA Available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    torch.cuda.empty_cache()

print(f"Python: {sys.version.split()[0]}")
print(f"PyTorch: {torch.__version__}")
print("="*60)

In [None]:
# =============================================================================
# Cell 2: Install and Imports
# =============================================================================
!pip install -q ultralytics

# Import after installation
from ultralytics import YOLO
from ultralytics.data import build_dataloader
import pandas as pd
import json

print("\nâœ“ Ultralytics installed successfully")

In [None]:
# =============================================================================
# Cell 3: Configuration with AUGMENT_PARAMS
# =============================================================================
# Uniform augmentation parameters (consistent across all experiments)
AUGMENT_PARAMS = {
    'translate': 0.1,
    'scale': 0.5,
    'fliplr': 0.5,
    'hsv_h': 0.0,  # Disabled for uniformity
    'hsv_s': 0.0,  # Disabled for uniformity
    'hsv_v': 0.0,  # Disabled for uniformity
    'erasing': 0.0,
    'mosaic': 0.0,
    'mixup': 0.0,
    'degrees': 0.0,
    'copy_paste': 0.0,
}

# Training configuration
SEEDS = [42, 123, 456, 789, 101]
EXP_PREFIX = 'exp_a4a_synthetic_v2'
EPOCHS = 100
PATIENCE = 30
IMGSZ = 640
BATCH_SIZE = 16
DEVICE = 0 if torch.cuda.is_available() else 'cpu'

print("="*60)
print("TRAINING CONFIGURATION")
print("="*60)
print(f"Experiment: A.4a Synthetic Depth Only (V2)")
print(f"Model: YOLOv11n")
print(f"Seeds: {SEEDS} ({len(SEEDS)} runs)")
print(f"Epochs: {EPOCHS} (patience: {PATIENCE})")
print(f"Image Size: {IMGSZ}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Device: {DEVICE}")
print("\nUniform Augmentation:")
for key, value in AUGMENT_PARAMS.items():
    print(f"  {key}: {value}")
print("\nSpecial: Reset BatchNorm for synthetic depth domain adaptation")
print("="*60)

In [None]:
# =============================================================================
# Cell 4: Reset BN Helper Function (same as A.2)
# =============================================================================
def reset_bn_stats(model, train_loader, num_batches=100, device='cuda'):
    """
    Reset running stats BatchNorm dengan 100 batch training data.
    Dipanggil setelah load pretrained weights untuk domain adaptation.
    """
    model.train()
    
    # Reset running stats
    for module in model.modules():
        if isinstance(module, nn.BatchNorm2d):
            module.reset_running_stats()
            module.momentum = 0.1  # Higher momentum for faster adaptation
    
    # Forward pass untuk update running stats
    with torch.no_grad():
        for i, batch in enumerate(train_loader):
            if i >= num_batches:
                break
            imgs = batch['img'].to(device) if isinstance(batch, dict) else batch[0].to(device)
            _ = model(imgs)
    
    return model

print("âœ“ reset_bn_stats function defined")

In [None]:
# =============================================================================
# Cell 5: Dataset Verification and YAML
# =============================================================================
# Verify dataset structure
print("="*60)
print("DATASET VERIFICATION")
print("="*60)

for split in ['train', 'val', 'test']:
    img_dir = DATASET_PATH / 'images' / split
    lbl_dir = DATASET_PATH / 'labels' / split
    
    if img_dir.exists():
        imgs = len(list(img_dir.glob('*.png')))
    else:
        imgs = 0
        
    if lbl_dir.exists():
        lbls = len(list(lbl_dir.glob('*.txt')))
    else:
        lbls = 0
    
    status = "âœ“" if imgs > 0 and lbls > 0 else "âœ—"
    print(f"{status} {split:6}: {imgs:4} images, {lbls:4} labels")

# Create YAML config
yaml_content = f"""
# A.4a Synthetic Depth Only Dataset Configuration
path: {DATASET_PATH}
train: images/train
val: images/val
test: images/test

nc: 1
names: ['fresh_fruit_bunch']
"""

config_path = BASE_PATH / 'dataset_synthetic_v2.yaml'
with open(config_path, 'w') as f:
    f.write(yaml_content)

print(f"\nâœ“ YAML config created: {config_path}")
print("\nConfig contents:")
print("-"*40)
print(yaml_content)

In [None]:
# =============================================================================
# Cell 6: Training Loop with reset_bn_stats() (Fixed)
# =============================================================================
results_all = {}
training_times = {}

print("\n" + "="*60)
print("STARTING TRAINING LOOP")
print("="*60)

for idx, seed in enumerate(SEEDS, 1):
    start_time = time.time()
    
    print(f"\n{'='*60}")
    print(f"TRAINING A.4a SYNTHETIC DEPTH - Seed {seed} ({idx}/{len(SEEDS)})")
    print(f"{'='*60}")
    
    # Set seeds for reproducibility
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    
    try:
        # Load model
        model = YOLO('yolo11n.pt')
        
        # Pindahkan model ke GPU dulu
        model.model.to(DEVICE)
        
        # Reset BN stats using dummy input (compatible with all Ultralytics versions)
        print("Resetting BatchNorm statistics...")
        model.model.train()
        
        # Reset running stats for all BN layers
        for module in model.model.modules():
            if isinstance(module, nn.BatchNorm2d):
                module.reset_running_stats()
                module.momentum = 0.1
        
        # Forward pass dengan dummy images untuk update BN stats
        dummy_input = torch.randn(BATCH_SIZE, 3, IMGSZ, IMGSZ).to(DEVICE)
        with torch.no_grad():
            for _ in range(10):
                _ = model.model(dummy_input)
        
        print("âœ“ BN reset complete")
        
        # Train
        results = model.train(
            data=str(config_path),
            epochs=EPOCHS,
            patience=PATIENCE,
            seed=seed,
            name=f"{EXP_PREFIX}_seed{seed}",
            project=str(RUNS_PATH),
            exist_ok=True,
            imgsz=IMGSZ,
            batch=BATCH_SIZE,
            device=DEVICE,
            **AUGMENT_PARAMS,
        )
        
        elapsed = time.time() - start_time
        training_times[seed] = elapsed
        
        # Fix: Akses metrics yang benar
        results_all[seed] = {
            'model_path': str(RUNS_PATH / f"{EXP_PREFIX}_seed{seed}" / "weights" / "best.pt"),
            'epochs_trained': EPOCHS,
            'mAP50_val': results.box.map50 if hasattr(results, 'box') else 0,
            'mAP50_95_val': results.box.map if hasattr(results, 'box') else 0,
        }
        
        print(f"\nâœ“ Seed {seed} completed!")
        print(f"  Epochs: {EPOCHS}")
        print(f"  Val mAP50: {results_all[seed]['mAP50_val']:.4f}")
        print(f"  Val mAP50-95: {results_all[seed]['mAP50_95_val']:.4f}")
        print(f"  Time: {elapsed/60:.1f} minutes")
        
    except Exception as e:
        print(f"\nâœ— Seed {seed} failed: {e}")
        import traceback
        traceback.print_exc()
        results_all[seed] = {'error': str(e)}
    
    # Memory cleanup
    del model
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("\n" + "="*60)
print("TRAINING LOOP COMPLETED")
print("="*60)

# Print summary
print("\nðŸ“Š RESULTS SUMMARY:")
for seed, res in results_all.items():
    if 'error' not in res:
        print(f"  Seed {seed}: mAP50={res['mAP50_val']:.4f}, mAP50-95={res['mAP50_95_val']:.4f}")
    else:
        print(f"  Seed {seed}: FAILED - {res['error'][:50]}...")

In [None]:
# =============================================================================
# Cell 7: Evaluation
# =============================================================================
results_dict = {}

print("\n" + "="*60)
print("EVALUATION ON TEST SET")
print("="*60)

for seed in SEEDS:
    model_path = RUNS_PATH / f"{EXP_PREFIX}_seed{seed}" / 'weights' / 'best.pt'
    
    if not model_path.exists():
        print(f"âœ— Model not found: {model_path}")
        continue
    
    print(f"\nSeed {seed}:")
    
    try:
        model = YOLO(str(model_path))
        metrics = model.val(data=str(config_path), split='test', device=DEVICE)
        
        results_dict[seed] = {
            'mAP50': metrics.box.map50,
            'mAP50-95': metrics.box.map,
            'Precision': metrics.box.mp,
            'Recall': metrics.box.mr,
        }
        
        print(f"  mAP50:     {metrics.box.map50:.4f}")
        print(f"  mAP50-95:  {metrics.box.map:.4f}")
        print(f"  Precision: {metrics.box.mp:.4f}")
        print(f"  Recall:    {metrics.box.mr:.4f}")
        
        del model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
    except Exception as e:
        print(f"  âœ— Evaluation failed: {e}")

print("\n" + "="*60)
print("EVALUATION COMPLETED")
print("="*60)

In [None]:
# =============================================================================
# Cell 8: Summary
# =============================================================================
if results_dict:
    df = pd.DataFrame(results_dict).T
    df.index.name = 'Seed'
    
    # Calculate statistics
    avg = df.mean()
    std = df.std()
    min_vals = df.min()
    max_vals = df.max()
    
    print("\n" + "="*60)
    print("A.4a SYNTHETIC DEPTH ONLY (V2) - FINAL RESULTS")
    print("="*60 + "\n")
    print(df.to_string(float_format=lambda x: f"{x:.4f}"))
    
    print("\n" + "-"*60)
    print("STATISTICAL SUMMARY")
    print("-"*60)
    print(f"{'Metric':<15} {'Mean':>10} {'Std':>10} {'Min':>10} {'Max':>10}")
    print("-"*60)
    for col in df.columns:
        print(f"{col:<15} {avg[col]:>10.4f} {std[col]:>10.4f} {min_vals[col]:>10.4f} {max_vals[col]:>10.4f}")
    
    # Best seed
    best_seed = df['mAP50'].idxmax()
    print(f"\nâœ“ Best Seed: {best_seed} (mAP50: {df.loc[best_seed, 'mAP50']:.4f})")
    
    print("="*60)
else:
    print("No results to display.")

In [None]:
# =============================================================================
# Cell 9: Save Results
# =============================================================================
output_file = KAGGLE_OUTPUT / 'a4a_synthetic_v2_results.txt'

with open(output_file, 'w') as f:
    f.write("="*60 + "\n")
    f.write("A.4a Synthetic Depth Only (V2) Results\n")
    f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Environment: {'Kaggle' if IS_KAGGLE else 'Local'}\n")
    f.write("="*60 + "\n\n")
    
    f.write("Configuration:\n")
    f.write("  Model: YOLOv11n\n")
    f.write("  Depth Source: Depth-Anything-V2 (Synthetic)\n")
    f.write(f"  Epochs: {EPOCHS} (patience: {PATIENCE})\n")
    f.write(f"  Image Size: {IMGSZ}\n")
    f.write(f"  Batch Size: {BATCH_SIZE}\n")
    f.write(f"  Seeds: {SEEDS}\n")
    f.write("\nUniform Augmentation:\n")
    for key, value in AUGMENT_PARAMS.items():
        f.write(f"  {key}: {value}\n")
    f.write("\nSpecial Features:\n")
    f.write("  - Reset BatchNorm statistics for synthetic depth domain adaptation\n")
    
    if results_dict:
        f.write("\n" + "="*60 + "\n")
        f.write("Per-Seed Results:\n")
        f.write("="*60 + "\n")
        f.write(df.to_string(float_format=lambda x: f"{x:.4f}"))
        
        f.write("\n\n" + "-"*60 + "\n")
        f.write("Summary (Mean Â± Std):\n")
        f.write("-"*60 + "\n")
        for col in df.columns:
            f.write(f"  {col}: {avg[col]:.4f} Â± {std[col]:.4f}\n")
        
        f.write(f"\nBest Seed: {best_seed}\n")

print(f"\nâœ“ Results saved: {output_file}")

# Also save as JSON for programmatic access
json_output = {
    'experiment': 'A.4a',
    'variant': 'V2',
    'seeds': SEEDS,
    'config': {
        'model': 'yolo11n',
        'depth_source': 'synthetic',
        'epochs': EPOCHS,
        'patience': PATIENCE,
        'imgsz': IMGSZ,
        'batch': BATCH_SIZE,
        'augmentation': AUGMENT_PARAMS,
        'reset_bn': True,
    },
    'results': {str(k): v for k, v in results_dict.items()},
    'summary': {
        'mean': {k: float(v) for k, v in avg.items()},
        'std': {k: float(v) for k, v in std.items()},
        'best_seed': int(best_seed) if results_dict else None,
    } if results_dict else None,
}

json_file = KAGGLE_OUTPUT / 'a4a_synthetic_v2_results.json'
with open(json_file, 'w') as f:
    json.dump(json_output, f, indent=2)

print(f"âœ“ JSON saved: {json_file}")

In [None]:
# =============================================================================
# Cell 10: Create Archives
# =============================================================================
print("\n" + "="*60)
print("CREATING ARCHIVES")
print("="*60 + "\n")

# Archive training runs
if RUNS_PATH.exists():
    runs_zip = BASE_PATH / 'a4a_synthetic_v2_runs.zip'
    shutil.make_archive(str(runs_zip.with_suffix('')), 'zip', RUNS_PATH)
    size_mb = runs_zip.stat().st_size / 1024 / 1024
    print(f"âœ“ a4a_synthetic_v2_runs.zip: {size_mb:.1f} MB")
else:
    print("âœ— No runs directory found")

# Archive outputs
output_zip = BASE_PATH / 'a4a_synthetic_v2_output.zip'
shutil.make_archive(str(output_zip.with_suffix('')), 'zip', KAGGLE_OUTPUT)
size_mb = output_zip.stat().st_size / 1024 / 1024
print(f"âœ“ a4a_synthetic_v2_output.zip: {size_mb:.1f} MB")

print("\n" + "="*60)
print("ALL DONE!")
print("="*60)
print("\nDownload from Output tab:")
print("  - a4a_synthetic_v2_runs.zip (training runs)")
print("  - a4a_synthetic_v2_output.zip (results)")