# PPE Detection Model Training on Google Colab

This notebook trains a YOLOv11 model to detect Personal Protective Equipment (PPE) items.

## Setup Instructions
1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU (T4)
2. **Upload dataset**: Run the upload cell below
3. **Start training**: Run all cells sequentially

## Training Time
- **GPU (T4)**: 30-60 minutes
- **CPU**: 1-3 hours (not recommended)


## Step 1: Install Dependencies


In [None]:
# Install Ultralytics YOLO
!pip install ultralytics -q

# Verify installation
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("‚ö†Ô∏è WARNING: No GPU detected! Training will be very slow.")
    print("Please enable GPU: Runtime ‚Üí Change runtime type ‚Üí GPU")


## Step 2: Upload Dataset

Upload your `ppe_dataset.zip` file. This should contain:
- `datasets/images/train/` - Training images
- `datasets/images/val/` - Validation images
- `datasets/images/test/` - Test images
- `datasets/labels/train/` - Training labels
- `datasets/labels/val/` - Validation labels
- `datasets/labels/test/` - Test labels
- `datasets/data.yaml` - Dataset configuration


In [None]:
from google.colab import files
import zipfile
import os

# Upload dataset ZIP file
print("üì§ Please upload your ppe_dataset.zip file:")
uploaded = files.upload()

# Find the uploaded ZIP file
zip_filename = None
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        zip_filename = filename
        break

if zip_filename is None:
    raise ValueError("No ZIP file found! Please upload ppe_dataset.zip")

print(f"‚úÖ Found ZIP file: {zip_filename}")

# Extract dataset
print("\nüì¶ Extracting dataset...")
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall('.')

print("‚úÖ Dataset extracted successfully!")

# Verify dataset structure
dataset_path = 'datasets'
if os.path.exists(dataset_path):
    print(f"\nüìÅ Dataset structure:")
    print(f"   - Images train: {len(os.listdir(os.path.join(dataset_path, 'images', 'train'))) if os.path.exists(os.path.join(dataset_path, 'images', 'train')) else 0} files")
    print(f"   - Images val: {len(os.listdir(os.path.join(dataset_path, 'images', 'val'))) if os.path.exists(os.path.join(dataset_path, 'images', 'val')) else 0} files")
    print(f"   - Images test: {len(os.listdir(os.path.join(dataset_path, 'images', 'test'))) if os.path.exists(os.path.join(dataset_path, 'images', 'test')) else 0} files")
    print(f"   - Labels train: {len(os.listdir(os.path.join(dataset_path, 'labels', 'train'))) if os.path.exists(os.path.join(dataset_path, 'labels', 'train')) else 0} files")
    print(f"   - Labels val: {len(os.listdir(os.path.join(dataset_path, 'labels', 'val'))) if os.path.exists(os.path.join(dataset_path, 'labels', 'val')) else 0} files")
    print(f"   - data.yaml: {'‚úÖ' if os.path.exists(os.path.join(dataset_path, 'data.yaml')) else '‚ùå'}")
else:
    raise ValueError(f"Dataset folder '{dataset_path}' not found after extraction!")


## Step 3: Update Dataset Path (if needed)

Update the `data.yaml` file to use absolute paths for Colab.


In [None]:
import yaml
import os

# Read current data.yaml
yaml_path = 'datasets/data.yaml'
with open(yaml_path, 'r') as f:
    data = yaml.safe_load(f)

# Update path to current directory
current_dir = os.path.abspath('.')
data['path'] = os.path.join(current_dir, 'datasets')

# Save updated yaml
with open(yaml_path, 'w') as f:
    yaml.dump(data, f, default_flow_style=False)

print(f"‚úÖ Updated data.yaml:")
print(f"   Path: {data['path']}")
print(f"   Train: {data['train']}")
print(f"   Val: {data['val']}")
print(f"   Test: {data['test']}")
print(f"   Classes: {len(data['names'])}")


## Step 4: Start Training

This will train the **YOLOv12** model on your PPE dataset.

**Training Parameters:**
- Model: YOLOv12n (nano - latest, better accuracy)
- Epochs: 200 (extended training for maximum accuracy)
- Batch size: 16 (optimized for stability)
- Image size: 640x640
- Early stopping: 40 epochs patience (stops automatically if no improvement)
- Device: GPU (auto-detected)

**Note:** Training may finish early if the model stops improving (early stopping)


In [None]:
from ultralytics import YOLO
import torch
import os

print("=" * 60)
print("Training YOLOv12 on Construction-PPE Dataset")
print("=" * 60)

# Check GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"\nüñ•Ô∏è  Device: {device}")
if device == 'cuda':
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("   ‚ö†Ô∏è  WARNING: No GPU! Training will be very slow.")

# Load pre-trained YOLOv12 nano model (latest, better accuracy)
print("\n[1/3] Loading YOLOv12n model...")
print("   YOLOv12: 1.2% better mAP than YOLOv11, same speed")
model = YOLO("yolo12n.pt")
print("‚úÖ Model loaded\n")

# Dataset path
dataset_yaml = 'datasets/data.yaml'
print(f"üìÅ Dataset: {dataset_yaml}")

# Training parameters
print("\n[2/3] Starting training...")
print("   - Epochs: 200 (extended training for maximum accuracy)")
print("   - Batch size: 16 (optimized for stability)")
print("   - Image size: 640")
print("   - Early stopping patience: 40 epochs (stops if no improvement)")
print("\n‚è≥ Training will take 1.5-3 hours on GPU...")
print("   (May finish earlier if model plateaus - early stopping)")
print("   (You can monitor progress below)\n")

# Train the model
try:
    results = model.train(
        data=dataset_yaml,
        epochs=200,  # Extended training - early stopping prevents overfitting
        imgsz=640,
        batch=16,  # Smaller batch for better stability
        device=device,
        project="runs/detect",
        name="ppe_detection",
        exist_ok=True,
        patience=40,  # Early stopping - stops if no improvement for 40 epochs
        save=True,
        plots=True,
        workers=4,
        cache=True,  # Cache images in RAM
        amp=True,  # Mixed precision for faster training
    )
    
    print("\n" + "=" * 60)
    print("[3/3] Training Complete!")
    print("=" * 60)
    
    # Show results
    best_model_path = "runs/detect/ppe_detection/weights/best.pt"
    if os.path.exists(best_model_path):
        file_size = os.path.getsize(best_model_path) / (1024 * 1024)  # MB
        print(f"\n‚úÖ Model saved to: {best_model_path}")
        print(f"‚úÖ Model size: {file_size:.1f} MB")
    else:
        print("\n‚ö†Ô∏è  Model file not found!")
        
except Exception as e:
    print(f"\n‚ùå Training error: {e}")
    import traceback
    traceback.print_exc()


## Step 5: Download Trained Model

Download the trained `best.pt` model to your Mac.


In [None]:
from google.colab import files
import os

best_model_path = "runs/detect/ppe_detection/weights/best.pt"

if os.path.exists(best_model_path):
    print("üì• Downloading trained model...")
    print(f"   File: {best_model_path}")
    
    file_size = os.path.getsize(best_model_path) / (1024 * 1024)  # MB
    print(f"   Size: {file_size:.1f} MB")
    
    # Download file
    files.download(best_model_path)
    
    print("\n‚úÖ Download started!")
    print("\nüìã Next steps:")
    print("   1. Save the downloaded 'best.pt' file")
    print("   2. Copy it to your Mac:")
    print("      InsolareSafetySystem/flaskServer/runs/detect/ppe_detection/weights/")
    print("   3. Replace the existing best.pt file")
    print("   4. Restart your Flask server")
else:
    print("‚ùå Model file not found!")
    print("   Training may not have completed successfully.")
    print("   Check the training output above for errors.")
