# A.3 - RGB+Depth (Real) 4-Channel Training - FIXED

**Experiment:** A.3  
**Input:** RGB+Depth (4-channel RGBD)  
**Objective:** Test fusion of RGB and real depth data  
**Classes:** 1 (fresh_fruit_bunch)

## Fix Applied
- Custom RGBD Dataset loader (load RGB + Depth, stack to 4-channel)
- Custom Trainer & Validator with 4-channel support
- Model architecture modification (first conv layer: 3→4 channels)
- Synchronized dataset structure validation

## Cell 1: Setup & Imports

In [None]:
# Install Ultralytics
!pip install -q ultralytics

import os
import sys
import shutil
import cv2
import numpy as np
import torch
import pandas as pd
from pathlib import Path
from datetime import datetime

from ultralytics import YOLO
from ultralytics.data.dataset import YOLODataset
from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator

# Disable WANDB
os.environ["WANDB_DISABLED"] = "true"

# Detect environment
IS_KAGGLE = os.path.exists('/kaggle/input')
print(f"Running on: {'Kaggle' if IS_KAGGLE else 'Local'}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## Cell 2: Dataset Path Setup

In [None]:
# Paths configuration
if IS_KAGGLE:
    DATASET_DIR = Path("/kaggle/input/ffb-localization-rgbd-dataset/ffb_localization_rgbd")
    WORK_DIR = Path("/kaggle/working/ffb_localization_rgbd")
    BASE_PATH = Path("/kaggle/working")
else:
    # Local paths - adjust as needed
    BASE_PATH = Path(r"D:/Work/Assisten Dosen/Anylabel/Experiments")
    DATASET_DIR = BASE_PATH / "datasets" / "ffb_localization_rgbd"
    WORK_DIR = BASE_PATH / "working" / "ffb_localization_rgbd"

RUNS_PATH = BASE_PATH / 'runs' / 'detect'
KAGGLE_OUTPUT = BASE_PATH / 'kaggleoutput'
KAGGLE_OUTPUT.mkdir(parents=True, exist_ok=True)

print(f"Dataset Dir: {DATASET_DIR}")
print(f"Work Dir: {WORK_DIR}")

# Verify dataset structure
for folder in ['rgb', 'depth', 'labels']:
    for split in ['train', 'val', 'test']:
        path = DATASET_DIR / folder / split
        if path.exists():
            count = len(list(path.glob('*.png' if folder != 'labels' else '*.txt')))
            print(f"  {folder}/{split}: {count} files")
        else:
            print(f"  ⚠️ Missing: {path}")

## Cell 3: Sync Dataset & Create Images Folder

In [None]:
# Sync RGBD dataset - keep only paired files
def sync_rgbd_dataset(root: Path, work_root: Path) -> None:
    """Sync RGB and Depth dataset, keeping only paired files"""
    splits = ("train", "val", "test")
    
    for split in splits:
        rgb_dir = root / "rgb" / split
        depth_dir = root / "depth" / split
        label_dir = root / "labels" / split
        
        # Check existence
        if not all([rgb_dir.exists(), depth_dir.exists(), label_dir.exists()]):
            print(f"⚠️ Missing folders for {split}")
            continue
        
        rgb_files = {p.name for p in rgb_dir.glob("*.png")}
        depth_files = {p.name for p in depth_dir.glob("*.png")}
        label_files = {p.with_suffix(".png").name for p in label_dir.glob("*.txt")}
        
        # Find common files
        keep = rgb_files & depth_files & label_files
        
        print(f"{split}: RGB={len(rgb_files)}, Depth={len(depth_files)}, Labels={len(label_files)} -> Keep={len(keep)}")
        
        # Create images folder (symlink/copy RGB for Ultralytics)
        images_dir = work_root / "images" / split
        images_dir.mkdir(parents=True, exist_ok=True)
        
        labels_out_dir = work_root / "labels" / split
        labels_out_dir.mkdir(parents=True, exist_ok=True)
        
        # Copy valid files
        for fname in keep:
            # Copy RGB to images
            src_rgb = rgb_dir / fname
            dst_rgb = images_dir / fname
            if not dst_rgb.exists():
                shutil.copy2(src_rgb, dst_rgb)
            
            # Copy depth
            src_depth = depth_dir / fname
            dst_depth = work_root / "depth" / split / fname
            dst_depth.parent.mkdir(parents=True, exist_ok=True)
            if not dst_depth.exists():
                shutil.copy2(src_depth, dst_depth)
            
            # Copy label
            src_label = label_dir / fname.replace(".png", ".txt")
            dst_label = labels_out_dir / fname.replace(".png", ".txt")
            if not dst_label.exists():
                shutil.copy2(src_label, dst_label)

# Run sync
sync_rgbd_dataset(DATASET_DIR, WORK_DIR)
print(f"\n✓ Dataset synced to: {WORK_DIR}")

## Cell 4: Create YAML Config

In [None]:
# Create YAML config
yaml_content = f"""
# A.3 RGB+Depth Dataset Config
path: {WORK_DIR.as_posix()}
train: images/train
val: images/val
test: images/test
nc: 1
names: ['fresh_fruit_bunch']
channels: 4
"""

config_path = WORK_DIR / 'dataset_rgbd.yaml'
config_path.write_text(yaml_content)

print(f"Config saved: {config_path}")
print("\nConfig content:")
print(yaml_content)

## Cell 5: Custom RGBD Dataset Class

In [None]:
def normalize_depth_to_uint8(depth: np.ndarray) -> np.ndarray:
    """Normalize depth to uint8 range"""
    if depth.dtype == np.uint8:
        return depth
    depth_f = depth.astype(np.float32)
    norm = cv2.normalize(depth_f, None, 0, 255, cv2.NORM_MINMAX)
    return norm.astype(np.uint8)


class RGBDDataset(YOLODataset):
    """Custom YOLO Dataset for RGBD (4-channel) input"""
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
        # Fix buffer issue in Mosaic augmentation
        try:
            from collections import deque
            buf = getattr(self, "buffer", None)
            if buf is None or len(buf) == 0:
                n = len(getattr(self, "im_files", []))
                seed = list(range(min(n, 256)))
                self.buffer = deque(seed, maxlen=1000)
        except Exception:
            pass
    
    @staticmethod
    def img2label_paths(img_paths):
        """Map image paths to label paths"""
        label_paths = []
        for p in img_paths:
            p = str(p)
            if (os.sep + "images" + os.sep) in p:
                p = p.replace(os.sep + "images" + os.sep, os.sep + "labels" + os.sep)
            elif (os.sep + "rgb" + os.sep) in p:
                p = p.replace(os.sep + "rgb" + os.sep, os.sep + "labels" + os.sep)
            label_paths.append(os.path.splitext(p)[0] + ".txt")
        return label_paths
    
    def load_image(self, i):
        """Load RGB and Depth, merge to 4-channel"""
        f = self.im_files[i]
        
        # Load RGB
        rgb = cv2.imread(f)
        if rgb is None:
            raise FileNotFoundError(f"RGB not found: {f}")
        
        h0, w0 = rgb.shape[:2]
        
        # Load Depth (from depth/ folder)
        if (os.sep + "images" + os.sep) in f:
            depth_path = f.replace(os.sep + "images" + os.sep, os.sep + "depth" + os.sep)
        else:
            depth_path = f.replace(os.sep + "rgb" + os.sep, os.sep + "depth" + os.sep)
        
        depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
        if depth is None:
            raise FileNotFoundError(f"Depth not found: {depth_path}")
        
        # Process depth
        if depth.ndim == 3:
            depth = depth[:, :, 0]
        
        # Resize depth to match RGB if needed
        if depth.shape[:2] != (h0, w0):
            depth = cv2.resize(depth, (w0, h0), interpolation=cv2.INTER_NEAREST)
        
        # Normalize and add channel dimension
        depth = normalize_depth_to_uint8(depth)[:, :, None]
        
        # Stack RGB + Depth = 4 channels
        img = np.concatenate([rgb, depth], axis=2)
        
        # Resize like YOLODataset
        r = self.imgsz / max(h0, w0)
        if r != 1:
            interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
        
        return img, (h0, w0), img.shape[:2]

print("✓ RGBDDataset class defined")

## Cell 6: Custom Trainer & Validator

In [None]:
class RGBDTrainer(DetectionTrainer):
    """Custom Trainer for RGBD dataset"""
    
    def build_dataset(self, img_path, mode="train", batch=None):
        """Build RGBD dataset"""
        stride_t = self.model.stride
        stride = int(stride_t.max()) if hasattr(stride_t, "max") else int(stride_t)
        
        return RGBDDataset(
            data=self.data,
            task=self.args.task,
            img_path=img_path,
            imgsz=self.args.imgsz,
            batch_size=batch,
            augment=mode == "train",
            hyp=self.args,
            rect=mode == "val",
            cache=self.args.cache,
            single_cls=False,
            stride=stride,
            pad=0.0,
            prefix=f"{mode}: ",
        )


class RGBDValidator(DetectionValidator):
    """Custom Validator for RGBD dataset"""
    
    def build_dataset(self, img_path, mode="val", batch=None):
        """Build RGBD dataset for validation"""
        stride_t = getattr(self, "stride", 32)
        stride = int(stride_t.max()) if hasattr(stride_t, "max") else int(stride_t)
        
        return RGBDDataset(
            data=self.data,
            task=self.args.task,
            img_path=img_path,
            imgsz=self.args.imgsz,
            batch_size=batch,
            augment=False,
            hyp=self.args,
            rect=True,
            cache=self.args.cache,
            single_cls=False,
            stride=stride,
            pad=0.0,
            prefix=f"{mode}: ",
        )

print("✓ RGBDTrainer and RGBDValidator defined")

## Cell 7: Model Modification (4-Channel Input)

In [None]:
def adapt_first_conv_to_4ch(det_model):
    """
    Modify YOLO model's first conv layer to accept 4 channels (RGB+Depth).
    Copies RGB weights and initializes depth channel from mean of RGB.
    """
    first = det_model.model[0]
    conv = first.conv if hasattr(first, "conv") else first
    
    if conv.in_channels == 4:
        print("Model already has 4 input channels")
        return
    
    print(f"Modifying first conv: {conv.in_channels} -> 4 channels")
    
    # Create new 4-channel conv
    new_conv = torch.nn.Conv2d(
        in_channels=4,
        out_channels=conv.out_channels,
        kernel_size=conv.kernel_size,
        stride=conv.stride,
        padding=conv.padding,
        bias=(conv.bias is not None),
    )
    
    # Initialize weights
    with torch.no_grad():
        # Copy RGB weights (first 3 channels)
        new_conv.weight[:, :conv.in_channels] = conv.weight
        # Initialize depth channel (4th) from mean of RGB
        new_conv.weight[:, conv.in_channels:] = conv.weight.mean(dim=1, keepdim=True)
        
        if conv.bias is not None:
            new_conv.bias[:] = conv.bias
    
    # Replace conv layer
    if hasattr(first, "conv"):
        first.conv = new_conv
    else:
        det_model.model[0] = new_conv
    
    # Update model config
    det_model.model.yaml["ch"] = 4
    print("✓ Model modified for 4-channel input")

print("✓ adapt_first_conv_to_4ch function defined")

## Cell 8: Verify Dataset Count

In [None]:
# Verify dataset counts
train_dir = WORK_DIR / "images" / "train"
val_dir = WORK_DIR / "images" / "val"
test_dir = WORK_DIR / "images" / "test"

print("Dataset counts:")
print(f"  Train: {len(list(train_dir.glob('*.png')))} images")
print(f"  Val:   {len(list(val_dir.glob('*.png')))} images")
print(f"  Test:  {len(list(test_dir.glob('*.png')))} images")

# Clear old caches
for p in WORK_DIR.rglob("*.cache"):
    try:
        p.unlink()
    except Exception:
        pass
print("✓ Cache cleared")

## Cell 9: Training Configuration

In [None]:
# Training configuration
SEEDS = [42, 123, 456, 789, 101]
EXP_PREFIX = 'exp_a3_rgbd'

IMGSZ = 640
EPOCHS = 100
BATCH = 16
PATIENCE = 30
DEVICE = "0" if torch.cuda.is_available() else "cpu"

print(f"Training Config:")
print(f"  Seeds: {SEEDS}")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch: {BATCH}")
print(f"  Image Size: {IMGSZ}")
print(f"  Device: {DEVICE}")
print(f"  Patience: {PATIENCE}")

## Cell 10: Training Loop

In [None]:
# Training loop with 5 seeds
for seed in SEEDS:
    print(f"\n{'='*60}")
    print(f"TRAINING A.3 RGBD (4-CH) - Seed {seed} ({SEEDS.index(seed)+1}/{len(SEEDS)})")
    print(f"{'='*60}\n")
    
    exp_name = f"{EXP_PREFIX}_seed{seed}"
    
    # Set seeds
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    
    # Load model and modify for 4-channel
    model = YOLO("yolo11n.pt")
    adapt_first_conv_to_4ch(model.model)
    
    # Train with custom trainer
    results = model.train(
        data=str(config_path),
        imgsz=IMGSZ,
        epochs=EPOCHS,
        batch=BATCH,
        seed=seed,
        device=DEVICE,
        name=exp_name,
        exist_ok=True,
        patience=PATIENCE,
        hsv_h=0.0,  # Disable HSV for fair RGBD comparison
        hsv_s=0.0,
        hsv_v=0.0,
        trainer=RGBDTrainer,
    )
    
    print(f"\nSeed {seed} training complete!")
    print(f"  mAP50: {results.results_dict.get('metrics/mAP50(B)', 0):.3f}")

## Cell 11: Evaluation on Test Set

In [None]:
# Evaluation on test set
results_dict = {}

print("\n" + "="*60)
print("EVALUATION ON TEST SET")
print("="*60)

for seed in SEEDS:
    model_path = RUNS_PATH / f"{EXP_PREFIX}_seed{seed}" / "weights" / "best.pt"
    
    if not model_path.exists():
        print(f"\n⚠️ Model not found: {model_path}")
        continue
    
    print(f"\n{'='*40}")
    print(f"Seed {seed}:")
    print(f"{'='*40}")
    
    # Load model
    model = YOLO(str(model_path))
    
    # Validate on test set with custom validator
    metrics = model.val(
        data=str(config_path),
        split="test",
        device=DEVICE,
        name=f"test_{EXP_PREFIX}_seed{seed}",
        exist_ok=True,
        validator=RGBDValidator,
    )
    
    # Store results
    results_dict[seed] = {
        'mAP50': metrics.box.map50,
        'mAP50-95': metrics.box.map,
        'Precision': metrics.box.mp,
        'Recall': metrics.box.mr
    }
    
    print(f"  mAP50:     {metrics.box.map50:.3f}")
    print(f"  mAP50-95:  {metrics.box.map:.3f}")
    print(f"  Precision: {metrics.box.mp:.3f}")
    print(f"  Recall:    {metrics.box.mr:.3f}")

## Cell 12: Results Summary

In [None]:
# Calculate statistics
df = pd.DataFrame(results_dict).T
df.index.name = 'Seed'

# Calculate mean and std
avg = df.mean()
std = df.std()

print("\n" + "="*60)
print("A.3 RGB+DEPTH (REAL) - FINAL RESULTS")
print("="*60 + "\n")
print(df.to_string(float_format=lambda x: f"{x:.3f}"))

print("\n" + "-"*60)
print("SUMMARY (Mean ± Std)")
print("-"*60)
for col in df.columns:
    print(f"  {col}: {avg[col]:.3f} ± {std[col]:.3f}")

# Store for saving
summary_results = {
    'df': df,
    'avg': avg,
    'std': std
}

## Cell 13: Save Results

In [None]:
# Save results to file
output_file = KAGGLE_OUTPUT / 'a3_rgbd_results.txt'

with open(output_file, 'w') as f:
    f.write("="*60 + "\n")
    f.write("A.3 RGB+Depth (Real) 4-Channel Results\n")
    f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write("Training: epochs=100, patience=30, HSV=disabled\n")
    f.write(f"Seeds: {SEEDS}\n")
    f.write("="*60 + "\n\n")
    
    f.write("Per-Seed Results:\n")
    f.write(df.to_string(float_format=lambda x: f"{x:.3f}"))
    f.write("\n\n" + "-"*60 + "\n")
    f.write("Summary (Mean ± Std):\n")
    for col in df.columns:
        f.write(f"  {col}: {avg[col]:.3f} ± {std[col]:.3f}\n")

print(f"✓ Results saved: {output_file}")

## Cell 14: Create Archives

In [None]:
# Create zip archives for download
if RUNS_PATH.exists():
    shutil.make_archive(str(BASE_PATH / 'a3_runs'), 'zip', RUNS_PATH)
    zip_size = (BASE_PATH / 'a3_runs.zip').stat().st_size / 1024 / 1024
    print(f"✓ a3_runs.zip: {zip_size:.1f} MB")

shutil.make_archive(str(BASE_PATH / 'a3_output'), 'zip', KAGGLE_OUTPUT)
print("✓ a3_output.zip created")

print("\n" + "="*60)
print("TRAINING COMPLETE!")
print("="*60)
print("\nDownload files from the Output tab:")
print("  - a3_runs.zip (training runs)")
print("  - a3_output.zip (results)")