# üßë‚Äçüíª Human Detection - YOLOv8 Training

Notebook n√†y h∆∞·ªõng d·∫´n train model YOLOv8 ƒë·ªÉ nh·∫≠n di·ªán ng∆∞·ªùi.

## üìã Y√™u c·∫ßu
- Google Colab v·ªõi GPU (Runtime ‚Üí Change runtime type ‚Üí T4 GPU)
- D·ªØ li·ªáu l∆∞u tr√™n Google Drive t·∫°i: `My Drive/Colab Notebooks/content/Human`

## 1Ô∏è‚É£ Setup Environment

In [None]:
# Install required packages
!pip install ultralytics -q

# Verify GPU
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

# Define Data Path
# Path: /content/gdrive/My Drive/Colab Notebooks/content/Human
# Structure expected: Human/0 (no person) and Human/1 (person)
RAW_DATA_PATH = '/content/gdrive/My Drive/Colab Notebooks/content/Human'

import os
if os.path.exists(RAW_DATA_PATH):
    print(f"‚úÖ Found data path: {RAW_DATA_PATH}")
    print("Contents:")
    !ls "{RAW_DATA_PATH}"
else:
    print(f"‚ùå Path not found: {RAW_DATA_PATH}")
    print("Creating dummy folders for testing/verification...")
    # Remove this in production if you are sure data exists
    # os.makedirs(f"{RAW_DATA_PATH}/0", exist_ok=True)
    # os.makedirs(f"{RAW_DATA_PATH}/1", exist_ok=True)

## 2Ô∏è‚É£ Prepare YOLO Dataset

Chuy·ªÉn ƒë·ªïi d·ªØ li·ªáu t·ª´ Drive (format classification) sang YOLO detection format.

In [None]:
import shutil
from pathlib import Path
import random

# Create YOLO dataset structure locally in Colab VM
DATASET_DIR = Path('./dataset')
if DATASET_DIR.exists():
    shutil.rmtree(DATASET_DIR)
    
for split in ['train', 'val']:
    (DATASET_DIR / split / 'images').mkdir(parents=True, exist_ok=True)
    (DATASET_DIR / split / 'labels').mkdir(parents=True, exist_ok=True)

print("Created local dataset structure:")
!tree ./dataset -L 2

In [None]:
def create_full_image_label(label_path: Path, class_id: int = 0):
    """
    T·∫°o label YOLO v·ªõi bounding box full ·∫£nh.
    """
    if class_id == 1:  # Person
        with open(label_path, 'w') as f:
            f.write(f"0 0.5 0.5 1.0 1.0\n")
    else:  # No person
        with open(label_path, 'w') as f:
            pass

def process_drive_data(data_path: str, output_dir: Path, split_ratio: float = 0.8):
    all_images = []
    base_dir = Path(data_path)
    
    # Check folders 0 and 1
    for class_id in [0, 1]:
        class_dir = base_dir / str(class_id)
        if not class_dir.exists():
            print(f"‚ö†Ô∏è Warning: Folder {class_dir} not found!")
            continue
            
        print(f"Scanning {class_dir}...")
        count = 0
        for img_path in class_dir.glob('*'):
            if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']:
                all_images.append((img_path, class_id))
                count += 1
        print(f"  Found {count} images in class {class_id}")

    if not all_images:
        return 0, 0

    # Shuffle and split
    random.shuffle(all_images)
    split_idx = int(len(all_images) * split_ratio)
    train_images = all_images[:split_idx]
    val_images = all_images[split_idx:]
    
    print(f"Total: {len(all_images)} | Train: {len(train_images)} | Val: {len(val_images)}")
    
    # Process (copy from Drive to Colab local)
    for split, images in [('train', train_images), ('val', val_images)]:
        for idx, (img_path, class_id) in enumerate(images):
            # Copy image
            img_name = f"{split}_{idx:05d}{img_path.suffix}"
            dst_img = output_dir / split / 'images' / img_name
            shutil.copy(img_path, dst_img)
            
            # Create label
            label_name = f"{split}_{idx:05d}.txt"
            dst_label = output_dir / split / 'labels' / label_name
            create_full_image_label(dst_label, class_id)
            
    return len(train_images), len(val_images)

In [None]:
# Start processing
train_count, val_count = process_drive_data(RAW_DATA_PATH, DATASET_DIR)
print(f"\nProcessing complete. Ready for training.")

In [None]:
# Create data.yaml
data_yaml = f"""
path: {DATASET_DIR.absolute()}
train: train/images
val: val/images
names:
  0: person
nc: 1
"""

with open(DATASET_DIR / 'data.yaml', 'w') as f:
    f.write(data_yaml)

print("Created data.yaml")

## 3Ô∏è‚É£ Train YOLOv8

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8m.pt')

TRAINING_CONFIG = {
    'data': str(DATASET_DIR / 'data.yaml'),
    'epochs': 100,
    'imgsz': 640,
    'batch': 16,
    'patience': 20,
    'save': True,
    'project': 'runs/detect',
    'name': 'human_detection',
    'exist_ok': True,
    'augment': True,
}

if train_count > 0:
    model.train(**TRAINING_CONFIG)
else:
    print("‚ùå Kh√¥ng th·ªÉ train v√¨ kh√¥ng c√≥ d·ªØ li·ªáu!")

## 4Ô∏è‚É£ Save Model to Drive

In [None]:
# C·∫•u h√¨nh ƒë∆∞·ªùng d·∫´n l∆∞u model tr√™n Drive
# S·∫Ω l∆∞u t·∫°i c√πng th∆∞ m·ª•c dataset: .../content/Human/models/
DRIVE_MODEL_DIR = f"{RAW_DATA_PATH}/models"
os.makedirs(DRIVE_MODEL_DIR, exist_ok=True)

best_weight_path = 'runs/detect/human_detection/weights/best.pt'

if os.path.exists(best_weight_path):
    shutil.copy(best_weight_path, f"{DRIVE_MODEL_DIR}/best.pt")
    print(f"‚úÖ ƒê√£ l∆∞u model v√†o Drive: {DRIVE_MODEL_DIR}/best.pt")
    
    # Optional: Download v·ªÅ m√°y
    from google.colab import files
    files.download(best_weight_path)
else:
    print("Warning: Kh√¥ng th·∫•y file best.pt")