# 🌊 YOLO-UDD v2.0 - Kaggle Training

**Simple 6-Cell Training - Just Run All!** ⚡

## 📋 Before You Start:
1. **Enable GPU**: Settings → Accelerator → **GPU T4 x2** → Save
2. **Dataset**: Google Drive link already configured (automatic download)
3. **Run**: Click **"Run All"** and wait ~10 hours

## ⏱️ Training Info:
- **Time**: ~10 hours (100 epochs)
- **Expected mAP**: 70-72%
- **No restarts needed!** ✅

---

In [None]:
# ======================================================================
# 📦 CELL 1: Environment Setup
# ======================================================================
import os
import sys

print("="*70)
print("🔧 CELL 1: Environment Setup")
print("="*70)

# Check and fix NumPy version FIRST
print("\n[Step 1/3] Checking NumPy version...")
try:
    import numpy as np
    numpy_ver = np.__version__
    
    if numpy_ver.startswith('2.'):
        print(f"  ⚠️  NumPy {numpy_ver} detected (will cause crashes)")
        print("  🔧 Installing NumPy 1.26.4...")
        !pip uninstall -y numpy > /dev/null 2>&1
        !pip install -q numpy==1.26.4
        print("  ✅ NumPy 1.26.4 installed")
    else:
        print(f"  ✅ NumPy {numpy_ver} OK")
except Exception as e:
    print(f"  ⚠️  NumPy check issue: {e}")
    !pip install -q numpy==1.26.4

# Set working directory
print("\n[Step 2/3] Setting up workspace...")
WORK_DIR = '/kaggle/working'
REPO_DIR = '/kaggle/working/YOLO-UDD-v2.0'
os.chdir(WORK_DIR)
print(f"  ✅ Working directory: {WORK_DIR}")

# Clone repository
print("\n[Step 3/3] Cloning repository...")
if os.path.exists(REPO_DIR):
    import shutil
    shutil.rmtree(REPO_DIR)

!git clone -q https://github.com/kshitijkhede/YOLO-UDD-v2.0.git

if os.path.exists(REPO_DIR):
    os.chdir(REPO_DIR)
    if REPO_DIR not in sys.path:
        sys.path.insert(0, REPO_DIR)
    print(f"  ✅ Repository ready: {REPO_DIR}")
else:
    raise Exception("Clone failed!")

print("\n" + "="*70)
print("✅ Cell 1 Complete - Environment Ready!")
print("="*70)

In [None]:
# ======================================================================
# 📦 CELL 2: Verification & Dependencies
# ======================================================================
import os
import sys
import torch

print("="*70)
print("📦 CELL 2: Verification & Dependencies")
print("="*70)

# Ensure we're in the repository
REPO_DIR = '/kaggle/working/YOLO-UDD-v2.0'
if not os.path.exists(REPO_DIR):
    print(f"\n❌ Repository not found at {REPO_DIR}")
    print("⚠️  Please re-run Cell 1")
    raise Exception("Repository not found! Re-run Cell 1")

os.chdir(REPO_DIR)
if REPO_DIR not in sys.path:
    sys.path.insert(0, REPO_DIR)

# Verify repository structure
print("\n[Step 1/3] Verifying repository...")
required = ['models/', 'scripts/', 'utils/', 'configs/', 'data/', 'scripts/train.py']
all_ok = True
for item in required:
    if os.path.exists(item):
        print(f"  ✅ {item}")
    else:
        print(f"  ❌ {item} MISSING")
        all_ok = False

if not all_ok:
    raise Exception("Repository incomplete! Re-run Cell 1")

# Check GPU
print("\n[Step 2/3] Checking GPU...")
if torch.cuda.is_available():
    print(f"  ✅ GPU: {torch.cuda.get_device_name(0)}")
    print(f"  ✅ Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("  ⚠️  NO GPU! Enable: Settings → GPU T4 x2")

# Install dependencies
print("\n[Step 3/3] Installing dependencies (~2 min)...")
!pip install -q torch>=2.0.0 torchvision>=0.15.0 albumentations>=1.3.0 \
    opencv-python-headless>=4.7.0 pycocotools>=2.0.6 tensorboard>=2.12.0 \
    tqdm pyyaml scikit-learn matplotlib seaborn gdown

print("  ✅ Dependencies installed")

print("\n" + "="*70)
print("✅ Cell 2 Complete - System Ready!")
print("="*70)

In [None]:
# ======================================================================
# 📦 CELL 3: Dataset Setup
# ======================================================================
import os
import gdown

print("="*70)
print("📦 CELL 3: Dataset Setup")
print("="*70)

# Download from Google Drive
FILE_ID = '17oRYriPgBnW9zowwmhImxdUpmHwOjgIp'
GDRIVE_URL = f'https://drive.google.com/uc?id={FILE_ID}'
ZIP_PATH = '/kaggle/working/trashcan.zip'

print("\n☁️  Downloading dataset from Google Drive (~180 MB)...")
gdown.download(GDRIVE_URL, ZIP_PATH, quiet=False)

if os.path.exists(ZIP_PATH):
    size = os.path.getsize(ZIP_PATH) / (1024 * 1024)
    print(f"  ✅ Downloaded: {size:.1f} MB")
    
    print("  📦 Extracting...")
    # Extract using Python zipfile (handles Windows paths better)
    import zipfile
    with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall('/kaggle/working/')
    print("  ✅ Extracted")
    
    # Show what was extracted
    print("\n🔍 Checking extracted contents...")
    work_contents = [d for d in os.listdir('/kaggle/working/') if not d.startswith('.') and d != 'trashcan.zip']
    print(f"  Contents of /kaggle/working/: {work_contents}")
    
    # Try multiple possible paths
    possible_paths = [
        '/kaggle/working/trashcan',
        '/kaggle/working/trashcan/trashcan',
        '/kaggle/working/data/trashcan',
        '/kaggle/working'
    ]
    
    DATASET_PATH = None
    for path in possible_paths:
        print(f"  Checking: {path}")
        if os.path.exists(path):
            # Check for images and annotations subdirectories
            has_images = os.path.exists(os.path.join(path, 'images'))
            has_annotations = os.path.exists(os.path.join(path, 'annotations'))
            print(f"    Path exists | images: {has_images} | annotations: {has_annotations}")
            
            if has_images and has_annotations:
                DATASET_PATH = path
                print(f"    ✅ FOUND DATASET HERE!")
                break
    
    # If still not found, search for images/annotations directories
    if not DATASET_PATH:
        print("\n🔍 Searching for dataset structure...")
        for root, dirs, files in os.walk('/kaggle/working/'):
            if 'images' in dirs and 'annotations' in dirs:
                DATASET_PATH = root
                print(f"  ✅ Found dataset at: {DATASET_PATH}")
                break
    
    if DATASET_PATH:
        print(f"\n✅ DATASET FOUND: {DATASET_PATH}")
        
        # Verify structure
        print("\n📂 Dataset Structure:")
        images_dir = os.path.join(DATASET_PATH, 'images')
        annotations_dir = os.path.join(DATASET_PATH, 'annotations')
        
        # Show image counts
        for split in ['train', 'val', 'test']:
            img_path = os.path.join(images_dir, split)
            if os.path.exists(img_path):
                count = len([f for f in os.listdir(img_path) if f.endswith(('.jpg', '.png', '.jpeg'))])
                print(f"  ✅ {split}: {count:,} images")
        
        # Show annotations
        if os.path.exists(annotations_dir):
            json_files = [f for f in os.listdir(annotations_dir) if f.endswith('.json')]
            for jf in json_files:
                size = os.path.getsize(os.path.join(annotations_dir, jf)) / (1024 * 1024)
                print(f"  ✅ {jf} ({size:.1f} MB)")
        
        print("\n" + "="*70)
        print("✅ Cell 3 Complete - Dataset Ready!")
        print("="*70)
    else:
        print("\n❌ Dataset not found!")
        print("\nDebug - Complete directory tree:")
        for root, dirs, files in os.walk('/kaggle/working/'):
            level = root.replace('/kaggle/working/', '').count(os.sep)
            if level < 3:  # Limit depth
                indent = '  ' * level
                print(f"{indent}{os.path.basename(root)}/")
                subindent = '  ' * (level + 1)
                for d in dirs[:5]:
                    print(f"{subindent}{d}/")
                for f in files[:5]:
                    print(f"{subindent}{f}")
        raise Exception("Dataset not found after extraction!")
else:
    raise Exception("Download failed!")

In [None]:
# ======================================================================
# ��️  CELL 4: Build Model
# ======================================================================
import os
import sys
import torch

print("="*70)
print("🏗️  CELL 4: Build Model")
print("="*70)

# Ensure correct paths
REPO_DIR = '/kaggle/working/YOLO-UDD-v2.0'
os.chdir(REPO_DIR)
if REPO_DIR not in sys.path:
    sys.path.insert(0, REPO_DIR)

print("\n[Step 1/2] Importing YOLO-UDD...")
from models.yolo_udd import YOLO_UDD
print("  ✅ YOLO-UDD imported")

print("\n[Step 2/2] Building model...")
model = YOLO_UDD(num_classes=22, pretrained_backbone=True)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"  ✅ Model built successfully")
print(f"  📊 Total parameters: {total_params:,}")
print(f"  📊 Trainable parameters: {trainable_params:,}")

# Test forward pass
print("\n[Test] Forward pass...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
dummy_input = torch.randn(1, 3, 640, 640).to(device)
with torch.no_grad():
    output = model(dummy_input)
print(f"  ✅ Output shape: {output.shape}")

print("\n" + "="*70)
print("✅ Cell 4 Complete - Model Ready!")
print("="*70)

In [None]:
# ======================================================================
# 🚀 CELL 5: Start Training
# ======================================================================
import subprocess
import sys
import os

print("="*70)
print("🚀 CELL 5: Training (will take ~10 hours)")
print("="*70)

# Ensure we're in the repository
REPO_DIR = '/kaggle/working/YOLO-UDD-v2.0'
os.chdir(REPO_DIR)

# Training command
cmd = [
    sys.executable,
    'scripts/train.py',
    '--data_dir', '/kaggle/working/trashcan',
    '--config', 'configs/train_config.yaml',
    '--output_dir', '/kaggle/working/runs/training',
    '--epochs', '100',
    '--batch_size', '8'
]

print("\n📝 Training Configuration:")
print(f"  Dataset: /kaggle/working/trashcan")
print(f"  Epochs: 100")
print(f"  Batch size: 8")
print(f"  Output: /kaggle/working/runs/training")

print("\n🏃 Starting training...\n")
print("="*70)

# Run training
result = subprocess.run(cmd, capture_output=False, text=True)

if result.returncode == 0:
    print("\n" + "="*70)
    print("✅ Cell 5 Complete - Training Finished!")
    print("="*70)
else:
    print("\n" + "="*70)
    print("❌ Training failed!")
    print("="*70)
    raise Exception(f"Training failed with code {result.returncode}")

In [None]:
# ======================================================================
# 📊 CELL 6: Results & Download
# ======================================================================
import os
from IPython.display import FileLink

print("="*70)
print("📊 CELL 6: Results")
print("="*70)

# Check for checkpoints
checkpoint_dir = '/kaggle/working/runs/training/checkpoints'

if os.path.exists(checkpoint_dir):
    checkpoints = [f for f in os.listdir(checkpoint_dir) if f.endswith('.pt')]
    
    if checkpoints:
        print("\n✅ Training completed successfully!\n")
        print("📦 Available checkpoints:")
        for ckpt in sorted(checkpoints):
            path = os.path.join(checkpoint_dir, ckpt)
            size = os.path.getsize(path) / (1024 * 1024)
            print(f"  • {ckpt} ({size:.1f} MB)")
        
        # Show best checkpoint
        best_ckpt = os.path.join(checkpoint_dir, 'best.pt')
        if os.path.exists(best_ckpt):
            print(f"\n⭐ Best checkpoint: {best_ckpt}")
        
        print("\n💡 To download: Click the folder icon on left → Navigate to checkpoints → Download")
        
        print("\n" + "="*70)
        print("🎉 ALL DONE! Training Complete!")
        print("="*70)
    else:
        print("\n⚠️  No checkpoints found")
else:
    print("\n❌ Checkpoint directory not found")
    print("Check if training completed successfully")