# A.3 RGB+Depth 4-Channel with Reset BN

**Experiment:** A.3 - RGB+Depth Fusion (V2)
**Input:** RGB + Real Depth (4-channel RGBD)
**Objective:** Test fusion of RGB with real depth sensor data
**Classes:** 1 (fresh_fruit_bunch)

## Key Features
1. **4-Channel Input**: RGB (3) + Depth (1) = 4 channels
2. **Custom Trainer**: RGBD4ChTrainer with 4-channel support
3. **First Conv Adaptation**: Convert 3ch to 4ch input layer
4. **Reset BN**: Domain adaptation for RGBD input

## Prerequisites
- Upload RGB dataset as: `ffb-localization-rgbd-dataset`
- Or use local dataset at: `datasets/ffb_localization_rgbd/`

## Uniform Augmentation (All Experiments)
- translate: 0.1
- scale: 0.5
- fliplr: 0.5
- hsv_h: 0.0 (disabled for uniformity)
- hsv_s: 0.0 (disabled for uniformity)
- hsv_v: 0.0 (disabled for uniformity)
- erasing: 0.0
- mosaic: 0.0
- mixup: 0.0

In [None]:
# =============================================================================
# Cell 1: Environment Setup & Imports
# =============================================================================
!pip install -q ultralytics

import os
import sys
import torch
import torch.nn as nn
import numpy as np
import cv2
import shutil
import gc
import time
import json
import pandas as pd
from pathlib import Path
from datetime import datetime
from tqdm.auto import tqdm

os.environ["WANDB_DISABLED"] = "true"

# Auto-detect environment
IS_KAGGLE = os.path.exists('/kaggle/input') or os.path.exists('/kaggle')

if IS_KAGGLE:
    BASE_PATH = Path('/kaggle/working')
    DATASET_DIR = Path('/kaggle/input/ffb-localization-rgbd-dataset/ffb_localization_rgbd')
else:
    BASE_PATH = Path('D:/Work/Assisten Dosen/Anylabel/Experiments')
    DATASET_DIR = BASE_PATH / 'datasets' / 'ffb_localization_rgbd'

RUNS_PATH = BASE_PATH / 'runs' / 'detect'
WORK_DIR = BASE_PATH / 'working' / 'rgbd_4ch'
KAGGLE_OUTPUT = BASE_PATH / 'kaggleoutput'
KAGGLE_OUTPUT.mkdir(parents=True, exist_ok=True)

print("="*60)
print("A.3 RGB+DEPTH (4-CH) - ENVIRONMENT SETUP")
print("="*60)
print(f"Running on: {'Kaggle' if IS_KAGGLE else 'Local'}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    torch.cuda.empty_cache()
print(f"Dataset Dir: {DATASET_DIR}")
print(f"Work Dir: {WORK_DIR}")
print("="*60)

In [None]:
# =============================================================================
# Cell 2: Configuration - AUGMENT_PARAMS
# =============================================================================
# Uniform augmentation parameters (consistent across all experiments)
AUGMENT_PARAMS = {
    'translate': 0.1,
    'scale': 0.5,
    'fliplr': 0.5,
    'hsv_h': 0.0,  # Disabled for uniformity
    'hsv_s': 0.0,  # Disabled for uniformity
    'hsv_v': 0.0,  # Disabled for uniformity
    'erasing': 0.0,
    'mosaic': 0.0,
    'mixup': 0.0,
    'degrees': 0.0,
    'copy_paste': 0.0,
}

# Training configuration
SEEDS = [42, 123, 456, 789, 101]
EXP_PREFIX = 'exp_a3_rgbd_v2'
EPOCHS = 100
PATIENCE = 30
IMGSZ = 640
BATCH_SIZE = 16
DEVICE = 0 if torch.cuda.is_available() else 'cpu'

print("="*60)
print("TRAINING CONFIGURATION")
print("="*60)
print(f"Experiment: A.3 RGB+Depth (4-CH) (V2)")
print(f"Model: YOLOv11n")
print(f"Seeds: {SEEDS} ({len(SEEDS)} runs)")
print(f"Epochs: {EPOCHS} (patience: {PATIENCE})")
print(f"Image Size: {IMGSZ}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Device: {DEVICE}")
print(f"EXP_PREFIX: {EXP_PREFIX}")
print("\nUniform Augmentation:")
for key, value in AUGMENT_PARAMS.items():
    print(f"  {key}: {value}")
print("\nSpecial Features:")
print("  - 4-Channel input (RGB+Depth)")
print("  - Custom 4-channel trainer")
print("  - Reset BatchNorm for domain adaptation")
print("="*60)

In [None]:
# =============================================================================
# Cell 3: Helper Functions - convert_conv_to_4ch() and reset_bn_stats()
# =============================================================================
def convert_conv_to_4ch(conv_layer):
    """
    Convert a conv layer from 3ch to 4ch input.
    Copies RGB weights and initializes depth channel as mean of RGB.
    """
    if conv_layer.in_channels == 4:
        return conv_layer
    
    new_conv = nn.Conv2d(
        in_channels=4,
        out_channels=conv_layer.out_channels,
        kernel_size=conv_layer.kernel_size,
        stride=conv_layer.stride,
        padding=conv_layer.padding,
        bias=conv_layer.bias is not None
    )
    
    with torch.no_grad():
        new_conv.weight[:, :3, :, :] = conv_layer.weight.clone()
        new_conv.weight[:, 3:4, :, :] = conv_layer.weight.mean(dim=1, keepdim=True)
        if conv_layer.bias is not None:
            new_conv.bias = nn.Parameter(conv_layer.bias.clone())
    
    return new_conv


def reset_bn_stats(model, train_loader, num_batches=100, device='cuda'):
    """
    Reset running stats BatchNorm dengan 100 batch training data.
    """
    model.train()
    
    for module in model.modules():
        if isinstance(module, nn.BatchNorm2d):
            module.reset_running_stats()
            module.momentum = 0.1
    
    with torch.no_grad():
        for i, batch in enumerate(train_loader):
            if i >= num_batches:
                break
            imgs = batch['img'].to(device) if isinstance(batch, dict) else batch[0].to(device)
            _ = model(imgs)
    
    return model


def ensure_model_4ch(model):
    """
    Ensure model has 4-channel input.
    """
    try:
        if hasattr(model, 'model') and hasattr(model.model, 'model'):
            first_conv = model.model.model[0].conv
            if first_conv.in_channels == 3:
                print("[4ch] Converting model...")
                model.model.model[0].conv = convert_conv_to_4ch(first_conv)
                return True
        elif hasattr(model, 'model') and hasattr(model.model[0], 'conv'):
            first_conv = model.model[0].conv
            if first_conv.in_channels == 3:
                print("[4ch] Converting model...")
                model.model[0].conv = convert_conv_to_4ch(first_conv)
                return True
    except Exception as e:
        print(f"[4ch] Warning: {e}")
    return False

print("âœ“ Helper functions defined: convert_conv_to_4ch(), reset_bn_stats(), ensure_model_4ch()")

In [None]:
# =============================================================================
# Cell 4: Create 4-Channel RGBD Dataset
# =============================================================================
def create_rgbd_dataset(root: Path, work_root: Path) -> None:
    """
    Create 4-channel RGBD dataset by combining RGB (3ch) + Depth (1ch).
    Saves as 4-channel PNG images.
    """
    splits = ("train", "val", "test")
    
    for split in splits:
        rgb_dir = root / "rgb" / split
        depth_dir = root / "depth" / split
        label_dir = root / "labels" / split
        
        if not all([rgb_dir.exists(), depth_dir.exists(), label_dir.exists()]):
            print(f"Warning: Missing folders for {split}")
            continue
        
        rgb_files = {p.name for p in rgb_dir.glob("*.png")}
        depth_files = {p.name for p in depth_dir.glob("*.png")}
        label_files = {p.with_suffix(".png").name for p in label_dir.glob("*.txt")}
        
        keep = rgb_files & depth_files & label_files
        print(f"{split}: {len(keep)} samples")
        
        images_dir = work_root / "images" / split
        images_dir.mkdir(parents=True, exist_ok=True)
        
        labels_out_dir = work_root / "labels" / split
        labels_out_dir.mkdir(parents=True, exist_ok=True)
        
        for fname in tqdm(keep, desc=f"Creating 4ch ({split})"):
            dst_rgbd = images_dir / fname
            
            if not dst_rgbd.exists():
                rgb = cv2.imread(str(rgb_dir / fname), cv2.IMREAD_COLOR)
                depth = cv2.imread(str(depth_dir / fname), cv2.IMREAD_GRAYSCALE)
                
                if rgb is None or depth is None:
                    continue
                
                if depth.shape[:2] != rgb.shape[:2]:
                    depth = cv2.resize(depth, (rgb.shape[1], rgb.shape[0]))
                
                rgbd = np.dstack([rgb, depth])
                cv2.imwrite(str(dst_rgbd), rgbd)
            
            src_label = label_dir / fname.replace(".png", ".txt")
            dst_label = labels_out_dir / fname.replace(".png", ".txt")
            if not dst_label.exists():
                shutil.copy2(src_label, dst_label)
        
        # Verify
        sample = list(images_dir.glob("*.png"))[0]
        test_img = cv2.imread(str(sample), cv2.IMREAD_UNCHANGED)
        print(f"  -> Verified shape: {test_img.shape}")

# Create dataset
print("="*60)
print("CREATING 4-CHANNEL RGBD DATASET")
print("="*60)
create_rgbd_dataset(DATASET_DIR, WORK_DIR)

# Clear cache files
for p in WORK_DIR.rglob("*.cache"):
    try:
        p.unlink()
    except:
        pass
print("\nâœ“ Dataset preparation complete")

In [None]:
# =============================================================================
# Cell 5: Custom RGBD4ChTrainer and RGBD4ChValidator Classes
# =============================================================================
from ultralytics import YOLO
from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
from ultralytics.data import build_dataloader

class RGBD4ChValidator(DetectionValidator):
    """
    Validator yang convert model ke 4ch sebelum validasi.
    """
    def setup_model(self):
        super().setup_model()
        if self.model is not None:
            ensure_model_4ch(self.model)
            print("[Validator] Model 4ch ready")


class RGBD4ChTrainer(DetectionTrainer):
    """
    Trainer yang convert model ke 4-channel.
    """
    def setup_model(self):
        super().setup_model()
        
        first_conv = self.model.model[0].conv
        
        if first_conv.in_channels == 4:
            print("[Trainer] Model sudah 4-channel")
            return
        
        print(f"[Trainer] Converting to 4-channel...")
        self.model.model[0].conv = convert_conv_to_4ch(first_conv)
        print(f"[Trainer] âœ… Converted! Shape: {self.model.model[0].conv.weight.shape}")
    
    def get_validator(self):
        self.loss_names = "box_loss", "cls_loss", "dfl_loss"
        return RGBD4ChValidator(
            self.test_loader,
            save_dir=self.save_dir,
            args=self.args,
            _callbacks=self.callbacks
        )

print("âœ… Custom RGBD4ChTrainer and RGBD4ChValidator defined")

In [None]:
# =============================================================================
# Cell 6: Create Dataset YAML
# =============================================================================
yaml_content = f"""
# A.3 RGB+Depth 4-Channel Dataset Configuration
path: {WORK_DIR}
train: images/train
val: images/val
test: images/test

nc: 1
channels: 4

names:
  0: fresh_fruit_bunch
"""

config_path = WORK_DIR / "dataset_rgbd_v2.yaml"
with open(config_path, 'w') as f:
    f.write(yaml_content)

print(f"âœ… Config saved: {config_path}")
print("\nConfig contents:")
print("-"*40)
print(yaml_content)

In [None]:
# =============================================================================
# Cell 7: Training Loop with 4-Channel Conversion and BN Reset (Fixed)
# =============================================================================
results_all = {}
training_times = {}

print("\n" + "="*60)
print("STARTING TRAINING LOOP")
print("="*60)

for idx, seed in enumerate(SEEDS, 1):
    start_time = time.time()
    
    print(f"\n{'='*60}")
    print(f"TRAINING A.3 RGBD - Seed {seed} ({idx}/{len(SEEDS)})")
    print(f"{'='*60}")
    
    # Set seeds
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    
    try:
        # Create trainer
        trainer = RGBD4ChTrainer(overrides={
            'model': 'yolo11n.pt',
            'data': str(config_path),
            'imgsz': IMGSZ,
            'epochs': EPOCHS,
            'batch': BATCH_SIZE,
            'device': DEVICE,
            'seed': seed,
            'name': f"{EXP_PREFIX}_seed{seed}",
            'project': str(RUNS_PATH),
            'exist_ok': True,
            'pretrained': True,
            'patience': PATIENCE,
            'val': True,
            **AUGMENT_PARAMS,
        })
        
        # Pindahkan model ke GPU dulu
        trainer.model.model.to(DEVICE)
        
        # Reset BN stats using dummy input (compatible with all Ultralytics versions)
        print("Resetting BatchNorm statistics...")
        trainer.model.model.train()
        
        # Reset running stats for all BN layers
        for module in trainer.model.model.modules():
            if isinstance(module, nn.BatchNorm2d):
                module.reset_running_stats()
                module.momentum = 0.1
        
        # Forward pass dengan dummy images (4-channel) untuk update BN stats
        dummy_input = torch.randn(BATCH_SIZE, 4, IMGSZ, IMGSZ).to(DEVICE)
        with torch.no_grad():
            for _ in range(10):
                _ = trainer.model.model(dummy_input)
        
        print("âœ“ BN reset complete")
        
        # Train
        trainer.train()
        
        elapsed = time.time() - start_time
        training_times[seed] = elapsed
        
        # Fix: Simpan hasil dengan struktur yang benar
        results_all[seed] = {
            'model_path': str(RUNS_PATH / f"{EXP_PREFIX}_seed{seed}" / "weights" / "best.pt"),
            'epochs_trained': EPOCHS,
            'completed': True,
        }
        
        print(f"\nâœ“ Seed {seed} completed!")
        print(f"  Time: {elapsed/60:.1f} minutes")
        
    except Exception as e:
        print(f"\nâœ— Seed {seed} failed: {e}")
        import traceback
        traceback.print_exc()
        results_all[seed] = {'error': str(e), 'completed': False}
    
    # Cleanup
    if 'trainer' in locals():
        del trainer
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("\n" + "="*60)
print("TRAINING LOOP COMPLETED")
print("="*60)

# Print summary
print("\nðŸ“Š RESULTS SUMMARY:")
successful = sum(1 for r in results_all.values() if r.get('completed', False))
print(f"Successful: {successful}/{len(SEEDS)}")
for seed, res in results_all.items():
    if res.get('completed', False):
        print(f"  Seed {seed}: âœ“ Completed")
    else:
        print(f"  Seed {seed}: âœ— FAILED - {res.get('error', 'Unknown')[:50]}...")

In [None]:
# =============================================================================
# Cell 8: Evaluation
# =============================================================================
results_dict = {}

print("\n" + "="*60)
print("EVALUATION ON TEST SET")
print("="*60)

for seed in SEEDS:
    model_path = RUNS_PATH / f"{EXP_PREFIX}_seed{seed}" / "weights" / "best.pt"
    
    if not model_path.exists():
        print(f"Model not found: {model_path}")
        continue
    
    print(f"\nSeed {seed}:")
    
    try:
        model = YOLO(str(model_path))
        
        # Convert to 4ch if needed
        first_conv = model.model.model[0].conv
        if first_conv.in_channels == 3:
            print("  Converting to 4ch...")
            model.model.model[0].conv = convert_conv_to_4ch(first_conv)
        
        metrics = model.val(
            data=str(config_path),
            split="test",
            device=DEVICE,
            name=f"test_{EXP_PREFIX}_seed{seed}",
            exist_ok=True,
        )
        
        results_dict[seed] = {
            'mAP50': metrics.box.map50,
            'mAP50-95': metrics.box.map,
            'Precision': metrics.box.mp,
            'Recall': metrics.box.mr,
        }
        
        print(f"  mAP50: {metrics.box.map50:.4f}, mAP50-95: {metrics.box.map:.4f}")
        
        del model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
    except Exception as e:
        print(f"  Evaluation failed: {e}")

print("\n" + "="*60)
print("EVALUATION COMPLETED")
print("="*60)

In [None]:
# =============================================================================
# Cell 9: Summary
# =============================================================================
if results_dict:
    df = pd.DataFrame(results_dict).T
    df.index.name = 'Seed'
    
    avg = df.mean()
    std = df.std()
    min_vals = df.min()
    max_vals = df.max()
    
    print("\n" + "="*60)
    print("A.3 RGB+DEPTH (4-CH) (V2) - FINAL RESULTS")
    print("="*60 + "\n")
    print(df.to_string(float_format=lambda x: f"{x:.4f}"))
    
    print("\n" + "-"*60)
    print("STATISTICAL SUMMARY")
    print("-"*60)
    print(f"{'Metric':<15} {'Mean':>10} {'Std':>10} {'Min':>10} {'Max':>10}")
    print("-"*60)
    for col in df.columns:
        print(f"{col:<15} {avg[col]:>10.4f} {std[col]:>10.4f} {min_vals[col]:>10.4f} {max_vals[col]:>10.4f}")
    
    best_seed = df['mAP50'].idxmax()
    print(f"\nâœ“ Best Seed: {best_seed} (mAP50: {df.loc[best_seed, 'mAP50']:.4f})")
    
    print("="*60)
else:
    print("No results to display.")

In [None]:
# =============================================================================
# Cell 10: Save Results
# =============================================================================
output_file = KAGGLE_OUTPUT / 'a3_rgbd_v2_results.txt'

with open(output_file, 'w') as f:
    f.write("="*60 + "\n")
    f.write("A.3 RGB+Depth (4-CH) (V2) Results\n")
    f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Environment: {'Kaggle' if IS_KAGGLE else 'Local'}\n")
    f.write("="*60 + "\n\n")
    
    f.write("Configuration:\n")
    f.write(f"  Model: YOLOv11n\n")
    f.write(f"  Input: 4-Channel (RGB+Depth)\n")
    f.write(f"  Epochs: {EPOCHS} (patience: {PATIENCE})\n")
    f.write(f"  Image Size: {IMGSZ}\n")
    f.write(f"  Batch Size: {BATCH_SIZE}\n")
    f.write(f"  Seeds: {SEEDS}\n")
    f.write("\nUniform Augmentation:\n")
    for key, value in AUGMENT_PARAMS.items():
        f.write(f"  {key}: {value}\n")
    f.write("\nSpecial Features:\n")
    f.write("  - 4-Channel input (RGB+Depth)\n")
    f.write("  - Custom RGBD4ChTrainer\n")
    f.write("  - Reset BatchNorm for domain adaptation\n")
    
    if results_dict:
        f.write("\n" + "="*60 + "\n")
        f.write("Per-Seed Results:\n")
        f.write("="*60 + "\n")
        f.write(df.to_string(float_format=lambda x: f"{x:.4f}"))
        
        f.write("\n\n" + "-"*60 + "\n")
        f.write("Summary (Mean Â± Std):\n")
        f.write("-"*60 + "\n")
        for col in df.columns:
            f.write(f"  {col}: {avg[col]:.4f} Â± {std[col]:.4f}\n")
        
        f.write(f"\nBest Seed: {best_seed}\n")

print(f"\nâœ“ Results saved: {output_file}")

# JSON output
json_output = {
    'experiment': 'A.3',
    'variant': 'V2',
    'seeds': SEEDS,
    'config': {
        'model': 'yolo11n',
        'input_channels': 4,
        'epochs': EPOCHS,
        'patience': PATIENCE,
        'imgsz': IMGSZ,
        'batch': BATCH_SIZE,
        'augmentation': AUGMENT_PARAMS,
        'reset_bn': True,
    },
    'results': {str(k): v for k, v in results_dict.items()},
    'summary': {
        'mean': {k: float(v) for k, v in avg.items()},
        'std': {k: float(v) for k, v in std.items()},
        'best_seed': int(best_seed) if results_dict else None,
    } if results_dict else None,
}

json_file = KAGGLE_OUTPUT / 'a3_rgbd_v2_results.json'
with open(json_file, 'w') as f:
    json.dump(json_output, f, indent=2)

print(f"âœ“ JSON saved: {json_file}")

In [None]:
# =============================================================================
# Cell 11: Create Archives
# =============================================================================
print("\n" + "="*60)
print("CREATING ARCHIVES")
print("="*60 + "\n")

if RUNS_PATH.exists():
    runs_zip = BASE_PATH / 'a3_rgbd_v2_runs.zip'
    shutil.make_archive(str(runs_zip.with_suffix('')), 'zip', RUNS_PATH)
    size_mb = runs_zip.stat().st_size / 1024 / 1024
    print(f"âœ“ a3_rgbd_v2_runs.zip: {size_mb:.1f} MB")

output_zip = BASE_PATH / 'a3_rgbd_v2_output.zip'
shutil.make_archive(str(output_zip.with_suffix('')), 'zip', KAGGLE_OUTPUT)
size_mb = output_zip.stat().st_size / 1024 / 1024
print(f"âœ“ a3_rgbd_v2_output.zip: {size_mb:.1f} MB")

print("\n" + "="*60)
print("ALL DONE!")
print("="*60)
print("\nDownload from Output tab:")
print("  - a3_rgbd_v2_runs.zip (training runs)")
print("  - a3_rgbd_v2_output.zip (results)")