In [None]:
# Run preprocessing (set FORCE_PREPROCESS=True to rebuild)\nFORCE_PREPROCESS = False\n\nneed_build = not (\n    os.path.isdir(os.path.join(OUTPUT_DIR, 'train')) and\n    os.path.isdir(os.path.join(OUTPUT_DIR, 'val')) and\n    os.path.isdir(os.path.join(OUTPUT_DIR, 'test'))\n)\n\nif FORCE_PREPROCESS or need_build:\n    print('Running preprocessing...')\n    preprocess_dataset(RAW_IMAGES_DIR, RAW_LABELS_DIR, OUTPUT_DIR)\nelse:\n    print('Using existing processed splits at:', OUTPUT_DIR)\n\ntrain_dir = os.path.join(OUTPUT_DIR, 'train')\nval_dir = os.path.join(OUTPUT_DIR, 'val')\ntest_dir = os.path.join(OUTPUT_DIR, 'test')\n\nprint('train_dir:', train_dir)\nprint('val_dir  :', val_dir)\nprint('test_dir :', test_dir)

In [None]:
# Dataset Preprocessing (embedded from scripts/preprocess_dataset.py)\nimport shutil\n\nIMG_EXTS = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']\n\ndef _find_image_path(base_dir: str, stem: str):\n    for ext in IMG_EXTS:\n        p = os.path.join(base_dir, f"{stem}{ext}")\n        if os.path.exists(p):\n            return p\n    return None\n\ndef preprocess_dataset(images_dir: str, labels_dir: str, output_dir: str):\n    """\n    Map images into class folders per YOLO label files, preserving splits.\n      output_dir/\n        train/<class_id>/image.jpg\n        val/<class_id>/image.jpg\n        test/<class_id>/image.jpg\n    """\n    os.makedirs(output_dir, exist_ok=True)\n    totals = {'train': 0, 'val': 0, 'test': 0}\n    missing_images = 0\n\n    for subset in ['train', 'val', 'test']:\n        subset_images_dir = os.path.join(images_dir, subset)\n        subset_labels_dir = os.path.join(labels_dir, subset)\n        if not os.path.isdir(subset_images_dir) or not os.path.isdir(subset_labels_dir):\n            print(f"Skipping {subset}: missing images or labels directory")\n            continue\n        label_files = [f for f in os.listdir(subset_labels_dir) if f.endswith('.txt')]\n        for label_file in label_files:\n            stem = label_file[:-4]\n            label_path = os.path.join(subset_labels_dir, label_file)\n            class_ids = set()\n            with open(label_path, 'r') as fh:\n                for line in fh:\n                    line = line.strip()\n                    if not line:\n                        continue\n                    parts = line.split()\n                    if not parts:\n                        continue\n                    class_ids.add(parts[0])\n            img_path = _find_image_path(subset_images_dir, stem)\n            if not img_path:\n                missing_images += 1\n                continue\n            for cid in class_ids:\n                class_dir = os.path.join(output_dir, subset, str(cid))\n                os.makedirs(class_dir, exist_ok=True)\n                shutil.copy(img_path, os.path.join(class_dir, os.path.basename(img_path)))\n                totals[subset] += 1\n    print('Copy summary (copies, not unique images):', totals)\n    if missing_images:\n        print(f"Missing images for {missing_images} label files (different extensions?)")

In [None]:
# Colab/Local Setup and Paths\n# - Set BASE_DATA_DIR to where your data lives.\n# - Expects the following layout:\n#     BASE_DATA_DIR/\n#       images/{train,val,test}/image*.jpg|jpeg|png\n#       labels/{train,val,test}/image*.txt   # YOLO format\n#   The preprocessing below will create:\n#       image_label_mapping/{train,val,test}/{class_id}/image*.jpg\n\nimport os\n\nIN_COLAB = 'COLAB_GPU' in os.environ or os.getenv('COLAB_RELEASE_TAG') is not None\n\n# UPDATE THIS IF NEEDED (Colab default points to /content/data)\nBASE_DATA_DIR = '/content/data' if IN_COLAB else '/workspaces/ppe-compliance-detection/data'\n\nRAW_IMAGES_DIR = os.path.join(BASE_DATA_DIR, 'images')\nRAW_LABELS_DIR = os.path.join(BASE_DATA_DIR, 'labels')\nOUTPUT_DIR = os.path.join(BASE_DATA_DIR, 'image_label_mapping')\n\nprint('IN_COLAB:', IN_COLAB)\nprint('RAW_IMAGES_DIR:', RAW_IMAGES_DIR, 'exists:', os.path.isdir(RAW_IMAGES_DIR))\nprint('RAW_LABELS_DIR:', RAW_LABELS_DIR, 'exists:', os.path.isdir(RAW_LABELS_DIR))\nprint('OUTPUT_DIR:', OUTPUT_DIR, 'exists:', os.path.isdir(OUTPUT_DIR))

# PPE Compliance Detection Project

This notebook demonstrates the development of a deep learning model for detecting Personal Protective Equipment (PPE) compliance in construction environments. The goal is to classify images into multiple classes, including both compliant and non-compliant scenarios.

## Project Overview
- **Dataset**: Images of workers wearing or missing PPE (e.g., helmets, gloves, vests).
- **Objective**: Build a model to detect PPE compliance and identify missing equipment.
- **Approach**: Use transfer learning with a pre-trained MobileNetV2 model.

## Steps
1. Data Loading and Preprocessing
2. Model Definition and Compilation
3. Model Training with Callbacks
4. Evaluation and Visualization
5. Model Export for Deployment

## Data Preparation and Cleaning

In this section, we will prepare the dataset for training. This includes:
- Applying data augmentation techniques to improve model generalization.
- Normalizing the image data to ensure consistent input to the model.
- Use generators to load data from directories

In [1]:
# Import Required Libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
import os

2025-12-18 07:57:26.980905: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-18 07:57:44.699742: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-18 07:58:02.678574: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [None]:
# Load and Preprocess Data (using processed split folders)\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tensorflow.keras.applications.mobilenet_v2 import preprocess_input\n\n# If paths are not defined (e.g., running this cell alone), set defaults\nif 'train_dir' not in globals():\n    BASE_DATA_DIR = '/content/data' if ('COLAB_GPU' in os.environ or os.getenv('COLAB_RELEASE_TAG')) else '/workspaces/ppe-compliance-detection/data'\n    OUTPUT_DIR = os.path.join(BASE_DATA_DIR, 'image_label_mapping')\n    train_dir = os.path.join(OUTPUT_DIR, 'train')\n    val_dir = os.path.join(OUTPUT_DIR, 'val')\n    test_dir = os.path.join(OUTPUT_DIR, 'test')\n\n# Data augmentation + MobileNetV2 preprocessing for training\ntrain_datagen = ImageDataGenerator(\n    preprocessing_function=preprocess_input,\n    rotation_range=20,\n    width_shift_range=0.2,\n    height_shift_range=0.2,\n    shear_range=0.2,\n    zoom_range=0.2,\n    horizontal_flip=True,\n    fill_mode='nearest'\n)\n\n# Only preprocessing for val/test\nval_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)\n\n# Generators\ntrain_generator = train_datagen.flow_from_directory(\n    train_dir, target_size=(224, 224), batch_size=32, class_mode='categorical'\n)\n\nval_generator = val_test_datagen.flow_from_directory(\n    val_dir, target_size=(224, 224), batch_size=32, class_mode='categorical'\n)\n\ntest_generator = val_test_datagen.flow_from_directory(\n    test_dir, target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False\n)\n\nprint('Classes:', train_generator.class_indices)

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


## Exploratory Data Analysis (EDA)

In this section, we will:
- Visualize the distribution of classes in the dataset.
- Analyze the image dimensions and aspect ratios.
- Identify any potential issues, such as class imbalance or missing data.

In [None]:
# Exploratory Data Analysis (EDA)
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Class distribution using counts from generator
labels = list(train_generator.class_indices.keys())
counts = np.bincount(train_generator.classes)

plt.figure(figsize=(10, 6))
plt.bar(range(len(labels)), counts, color='tab:blue')
plt.title('Class Distribution')
plt.xlabel('Classes')
plt.ylabel('Frequency')
plt.xticks(ticks=range(len(labels)), labels=labels, rotation=45)
plt.tight_layout()
plt.show()

# Inspect image dimensions from a single batch (fast and representative)
batch_x, batch_y = next(train_generator)
print('Sample batch shape:', batch_x.shape)  # (batch, height, width, channels)

heights = batch_x.shape[1]
widths = batch_x.shape[2]

plt.figure(figsize=(6, 4))
plt.bar(['height', 'width'], [heights, widths], color=['tab:green', 'tab:orange'])
plt.title('Sample Image Dimensions')
plt.ylabel('Pixels')
plt.show()

## Model Selection and Parameter Tuning

In this section, we will:
- Define the model architecture using transfer learning with MobileNetV2.
- Compile the model with appropriate loss functions and metrics.
- Tune hyperparameters such as learning rate, batch size, and number of epochs.

In [None]:
# Model Selection and Parameter Tuning
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D

# Load the pre-trained MobileNetV2 model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(len(train_generator.class_indices), activation='softmax')(x)

# Define the model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

## Model Training with Callbacks
In this section, we will:
- Includes early stopping and model checkpointing.
- Trains the model for up to 20 epochs.

In [None]:
# Model Training with Callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(filepath='best_model.h5', save_best_only=True)
]

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=callbacks
)

## Evaluation and Visualization:
In this section, we will:
- Plots training and validation accuracy/loss.
- Evaluates the model on the test set and prints the results.

In [None]:
# Evaluation and Visualization
import matplotlib.pyplot as plt

# Plot training history
plt.figure(figsize=(12, 4))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

## Model Export for Deployment:
In this section, we will save the trained model as ppe_compliance_model.h5.

In [None]:
# Model Export for Deployment

# Save the trained model
model.save('ppe_compliance_model.h5')
print("Model saved as 'ppe_compliance_model.h5'")