In [None]:
# EDL Model Training Test Notebook
# Install dependencies if needed
import sys
import subprocess

def install_if_missing(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Install required packages
install_if_missing("torch")
install_if_missing("opencv-python")
install_if_missing("pyyaml")

print("Dependencies installed successfully!")
print("PyTorch version:", __import__("torch").__version__)
print("OpenCV version:", __import__("cv2").__version__)

In [None]:
# Import EDL modules
import os
import sys
from pathlib import Path

# Add parent directory to path to import EDL
sys.path.append(str(Path('..').absolute()))

# Import EDL components
from EDL.model import MiniYOLO
from EDL.data import YOLOTxtDataset, collate_fn
from EDL.engine import train_loop, assign_targets, compute_loss, decode_predictions
from EDL.utils import seed_everything, parse_device, make_dir, now_str

import torch
import yaml
import numpy as np

print("EDL modules imported successfully!")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")

In [None]:
# Create sample dataset for testing
import cv2
import random

# Create test dataset directory structure
make_dir("test_dataset/images/train")
make_dir("test_dataset/images/val")
make_dir("test_dataset/labels/train")
make_dir("test_dataset/labels/val")

# Generate sample images and labels
def create_sample_data(split, num_samples=10):
    for i in range(num_samples):
        # Create random 640x640 RGB image
        img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
        img_path = f"test_dataset/images/{split}/sample_{i:03d}.jpg"
        cv2.imwrite(img_path, img)
        
        # Create random YOLO format labels (class cx cy w h normalized)
        num_objects = random.randint(1, 5)
        labels = []
        for _ in range(num_objects):
            cls = 0  # single class for testing
            cx = random.uniform(0.1, 0.9)
            cy = random.uniform(0.1, 0.9)
            w = random.uniform(0.05, 0.3)
            h = random.uniform(0.05, 0.3)
            labels.append(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")
        
        # Save label file
        label_path = f"test_dataset/labels/{split}/sample_{i:03d}.txt"
        with open(label_path, 'w') as f:
            f.write('\n'.join(labels))

# Generate train and val samples
create_sample_data("train", 20)
create_sample_data("val", 10)

print("Sample dataset created!")
print("Train images:", len(list(Path("test_dataset/images/train").glob("*.jpg"))))
print("Val images:", len(list(Path("test_dataset/images/val").glob("*.jpg"))))

In [None]:
# Create dataset YAML configuration
dataset_config = {
    'path': str(Path('test_dataset').absolute()),
    'train': 'images/train',
    'val': 'images/val',
    'names': ['person']  # single class for testing
}

# Save dataset YAML
with open('test_data.yaml', 'w') as f:
    yaml.dump(dataset_config, f, default_flow_style=False)

print("Dataset YAML created:")
print(yaml.dump(dataset_config, default_flow_style=False))

In [None]:
# Test dataset loading
from torch.utils.data import DataLoader

# Load dataset
train_ds = YOLOTxtDataset('test_data.yaml', split='train', img_size=640)
val_ds = YOLOTxtDataset('test_data.yaml', split='val', img_size=640)

print(f"Train dataset: {len(train_ds)} samples")
print(f"Val dataset: {len(val_ds)} samples")
print(f"Number of classes: {train_ds.num_classes}")
print(f"Class names: {train_ds.names}")

# Test data loading
train_dl = DataLoader(train_ds, batch_size=4, shuffle=True, collate_fn=collate_fn)
sample_batch = next(iter(train_dl))

print(f"\nSample batch:")
print(f"Images shape: {sample_batch['images'].shape}")
print(f"Number of label tensors: {len(sample_batch['labels'])}")
print(f"First image labels shape: {sample_batch['labels'][0].shape}")
print(f"Paths: {len(sample_batch['paths'])}")

In [None]:
# Test model initialization and forward pass
device = parse_device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model
model = MiniYOLO(num_classes=1, channels=128).to(device)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Test forward pass
imgs = sample_batch['images'].to(device)
print(f"Input shape: {imgs.shape}")

with torch.no_grad():
    pred = model(imgs)
    print(f"Output shape: {pred.shape}")
    print(f"Model stride: {model.stride}")

# Verify output channels
expected_channels = 1 + model.num_classes + 4  # obj + classes + box
print(f"Expected output channels: {expected_channels}")
print(f"Actual output channels: {pred.shape[1]}")

# Test loss computation
labels = [l.to(device) for l in sample_batch['labels']]
B, _, H, W = imgs.shape
S = H // 16
obj_t, cls_t, box_t, mask = assign_targets(labels, S, model.num_classes)
obj_t, cls_t, box_t, mask = obj_t.to(device), cls_t.to(device), box_t.to(device), mask.to(device)

loss, logs = compute_loss(pred, obj_t, cls_t, box_t, mask, model.num_classes)
print(f"\nLoss test:")
print(f"Total loss: {logs['loss']:.4f}")
print(f"Obj loss: {logs['obj_loss']:.4f}")
print(f"Cls loss: {logs['cls_loss']:.4f}")
print(f"Box loss: {logs['box_loss']:.4f}")

In [None]:
# Quick training test (few epochs)
import time

seed_everything(42)

# Training setup
model = MiniYOLO(num_classes=1).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=5e-4)

# Create output directory
make_dir("runs/test_train")

# Quick training loop
model.train()
num_epochs = 3
print(f"Starting quick training for {num_epochs} epochs...")

for epoch in range(num_epochs):
    epoch_loss = 0.0
    num_batches = 0
    start_time = time.time()
    
    for batch_idx, batch in enumerate(train_dl):
        imgs = batch['images'].to(device)
        labels = [l.to(device) for l in batch['labels']]
        
        # Forward pass
        pred = model(imgs)
        
        # Compute targets and loss
        B, _, H, W = imgs.shape
        S = H // 16
        obj_t, cls_t, box_t, mask = assign_targets(labels, S, model.num_classes)
        obj_t, cls_t, box_t, mask = obj_t.to(device), cls_t.to(device), box_t.to(device), mask.to(device)
        
        loss, logs = compute_loss(pred, obj_t, cls_t, box_t, mask, model.num_classes)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
        optimizer.step()
        
        epoch_loss += logs['loss']
        num_batches += 1
        
        if batch_idx % 2 == 0:  # Log every 2 batches
            print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}, Loss: {logs['loss']:.4f}")
    
    avg_loss = epoch_loss / max(1, num_batches)
    epoch_time = time.time() - start_time
    print(f"Epoch {epoch+1} completed: avg_loss={avg_loss:.4f}, time={epoch_time:.1f}s")

# Save test model
test_weights_path = "runs/test_train/test_model.pt"
meta = {
    'imgsz': 640,
    'stride': 16,
    'names': ['person'],
    'num_classes': 1,
}
torch.save({'model': model.state_dict(), 'meta': meta}, test_weights_path)
print(f"Test model saved to: {test_weights_path}")

In [None]:
# Test full training using EDL train script
# Create an args-like object for the training function
class TrainingArgs:
    def __init__(self):
        self.data = 'test_data.yaml'
        self.epochs = 5
        self.batch = 4
        self.imgsz = 640
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.lr = 1e-3
        self.workers = 0  # Set to 0 for notebook to avoid multiprocessing issues
        self.out = 'runs/test_full_train'
        self.seed = 42
        self.num_classes = None  # Will use from dataset
        self.log_interval = 2

# Test using the actual train_loop function
print("Testing full training pipeline...")
args = TrainingArgs()

# Run training
try:
    train_loop(args)
    print("✅ Training completed successfully!")
except Exception as e:
    print(f"❌ Training failed: {e}")
    import traceback
    traceback.print_exc()