In [8]:
import os
import shutil

# do not run this cell on every run.

def reset_runtime_directory(runtime_path):
    """
    Deletes all files and folders in the specified runtime directory
    and recreates the directory.

    Parameters:
        runtime_path (str): Path to the runtime directory.
    """
    # Check if the path exists
    if os.path.exists(runtime_path):
        # Delete all files and folders in the directory
        shutil.rmtree(runtime_path)
        print(f"Deleted all files and folders in: {runtime_path}")
    else:
        print(f"Path does not exist: {runtime_path}")

    # Recreate the runtime directory
    os.makedirs(runtime_path, exist_ok=True)
    print(f"Recreated the runtime directory: {runtime_path}")

runtime_path = "/content"
reset_runtime_directory(runtime_path)

Deleted all files and folders in: /content
Recreated the runtime directory: /content


In [1]:
import os
import zipfile
from google.colab import files

def download_and_extract_kaggle_dataset():
    files.upload()

    !mkdir -p ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json

    !kaggle datasets download -d ismailnasri20/driver-drowsiness-dataset-ddd

    dataset_zip = 'driver-drowsiness-dataset-ddd.zip'
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall('./driver_drowsiness_dataset')

# Example usage
download_and_extract_kaggle_dataset()


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/ismailnasri20/driver-drowsiness-dataset-ddd
License(s): unknown
Downloading driver-drowsiness-dataset-ddd.zip to /content
100% 2.58G/2.58G [01:57<00:00, 25.0MB/s]
100% 2.58G/2.58G [01:57<00:00, 23.6MB/s]


In [2]:
import os

def setup_directories_and_labels(base_dir, train_dir, val_dir):
    """
    Sets up directories for training and validation datasets and initializes labels.

    Parameters:
        base_dir (str): Base directory containing the dataset.
        train_dir (str): Directory to store training data.
        val_dir (str): Directory to store validation data.

    Returns:
        dict: A dictionary containing labels.
    """
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    labels = {'Drowsy': 0, 'Non Drowsy': 1}
    return labels

base_dir = './driver_drowsiness_dataset/Driver Drowsiness Dataset (DDD)'
train_dir = './data_split/train'
val_dir = './data_split/val'

labels = setup_directories_and_labels(base_dir, train_dir, val_dir)
print("Labels:", labels)

Labels: {'Drowsy': 0, 'Non Drowsy': 1}


In [3]:
def _copy_files_with_labels(source_dir, file_list, target_dir, label_value):
    """
    Copies files and creates corresponding label files.

    Parameters:
        source_dir (str): Source directory of the files.
        file_list (list): List of file names to copy.
        target_dir (str): Target directory for the copied files.
        label_value (int): The label value to write in the label file.
    """
    for file_name in file_list:
        # Copy the image file
        shutil.copy(os.path.join(source_dir, file_name), os.path.join(target_dir, file_name))

        # Create a corresponding label file
        label_file = os.path.splitext(file_name)[0] + '.txt'
        label_file_path = os.path.join(target_dir, label_file)
        with open(label_file_path, 'w') as f:
            f.write(f"{label_value} 0.5 0.5 1.0 1.0\n")

In [4]:
import os
import random
import shutil

def balance_classes(base_dir, train_dir, val_dir, labels):
    """
    Equalizes the number of samples from each class and splits them into train and validation sets.

    Parameters:
        base_dir (str): Path to the base dataset directory containing labeled data.
        train_dir (str): Path to the directory where training data will be stored.
        val_dir (str): Path to the directory where validation data will be stored.
        labels (dict): A dictionary where the key is the label name and the value is the label id.
    """
    # Ensure train and validation directories exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    # Determine the minimum class size
    min_class_size = min(
        len(os.listdir(os.path.join(base_dir, label_name)))
        for label_name in labels.keys()
    )

    # Process each class
    for label_name, label_value in labels.items():
        label_dir = os.path.join(base_dir, label_name)
        all_files = os.listdir(label_dir)

        # Balance the class by sampling a subset
        subset_files = random.sample(all_files, min_class_size)

        # Split into train and validation sets
        train_size = int(0.8 * len(subset_files))
        train_files = subset_files[:train_size]
        val_files = subset_files[train_size:]

        # Create subdirectories for the label
        train_label_dir = os.path.join(train_dir, label_name.lower())
        val_label_dir = os.path.join(val_dir, label_name.lower())
        os.makedirs(train_label_dir, exist_ok=True)
        os.makedirs(val_label_dir, exist_ok=True)

        # Copy files and create label files for training data
        _copy_files_with_labels(label_dir, train_files, train_label_dir, label_value)

        # Copy files and create label files for validation data
        _copy_files_with_labels(label_dir, val_files, val_label_dir, label_value)

    print("Data balanced and split into train and validation sets.")

balance_classes(base_dir, train_dir, val_dir, labels)

Data balanced and split into train and validation sets.


In [5]:
import cv2

def _process_and_save_image(file_path, augmentation):
    """
    Reads an image, applies augmentations, and saves the processed image.

    Parameters:
        file_path (str): Path to the image file.
        augmentation (albumentations.Compose): Augmentation pipeline to apply.
    """
    # Read the image
    image = cv2.imread(file_path)
    if image is None:
        print(f"Failed to read image: {file_path}")
        return

    # Apply augmentations
    augmented = augmentation(image=image)
    augmented_image = augmented["image"]

    # Save the processed image
    cv2.imwrite(file_path, augmented_image)
    print(f"Processed and saved: {file_path}")

In [6]:
import os
import cv2
from albumentations import HorizontalFlip, RandomBrightnessContrast, Rotate, GaussianBlur, Compose

def apply_light_augmentation_in_folder(folder_path):
    """
    Applies light augmentation, including Gaussian Blur, to all .png images in the given folder and subfolders.

    Parameters:
        folder_path (str): Path to the folder containing the images.
    """
    # Define light augmentations
    augmentation = Compose([
        HorizontalFlip(p=0.2),  # Small chance for horizontal flip
        Rotate(limit=5, p=0.2),  # Slight rotation (max ±5 degrees)
        GaussianBlur(blur_limit=(3, 5), p=0.06),  # Gaussian blur with kernel size between 3x3 and 5x5
        RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),  # Small brightness/contrast change
    ])

    # Process images in the folder and subfolders
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.png'):  # Check file extension (case insensitive)
                file_path = os.path.join(root, file)
                _process_and_save_image(file_path, augmentation)

train_drowsy_path = "/content/data_split/train/drowsy"
train_non_drowsy_path = "/content/data_split/train/non drowsy"

apply_light_augmentation_in_folder(train_drowsy_path)
apply_light_augmentation_in_folder(train_non_drowsy_path)

  check_for_updates()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed and saved: /content/data_split/train/non drowsy/j0462.png
Processed and saved: /content/data_split/train/non drowsy/y1497.png
Processed and saved: /content/data_split/train/non drowsy/d0436.png
Processed and saved: /content/data_split/train/non drowsy/o0706.png
Processed and saved: /content/data_split/train/non drowsy/zb0207.png
Processed and saved: /content/data_split/train/non drowsy/p0141.png
Processed and saved: /content/data_split/train/non drowsy/y1285.png
Processed and saved: /content/data_split/train/non drowsy/m0364.png
Processed and saved: /content/data_split/train/non drowsy/j0135.png
Processed and saved: /content/data_split/train/non drowsy/zc0471.png
Processed and saved: /content/data_split/train/non drowsy/x0482.png
Processed and saved: /content/data_split/train/non drowsy/a0654.png
Processed and saved: /content/data_split/train/non drowsy/y1488.png
Processed and saved: /content/data_split/train/no

In [7]:
import os
from ultralytics import YOLO

def create_yaml_train_and_evaluate(train_dir, val_dir, output_yaml_path, model_path, epochs=1, img_size=128, batch_size=24):
    """
    Creates a YAML configuration file, trains a YOLO model, and evaluates its performance.

    Parameters:
        train_dir (str): Path to the training data directory.
        val_dir (str): Path to the validation data directory.
        output_yaml_path (str): Path to save the YAML configuration file.
        model_path (str): Path to the pre-trained YOLO model file.
        epochs (int): Number of training epochs. Default is 1.
        img_size (int): Image size for training. Default is 128.
        batch_size (int): Batch size for training. Default is 24.
    """
    # Create YAML configuration file
    yaml_content = f"""
path: {os.path.abspath('./data_split')}
train: {os.path.abspath(train_dir)}
val: {os.path.abspath(val_dir)}
nc: 2
names: ['drowsy', 'non_drowsy']
weights: [3.0, 3.0]
"""
    with open(output_yaml_path, 'w') as f:
        f.write(yaml_content)
    print(f"YAML configuration file created at: {output_yaml_path}")

    # Train the YOLO model
    model = YOLO(model_path)
    model.train(data=output_yaml_path, epochs=epochs, imgsz=img_size, batch=batch_size)
    print("Model training completed.")

    # Evaluate the model
    metrics = model.val()
    precision = metrics.results_dict['metrics/precision(B)']
    recall = metrics.results_dict['metrics/recall(B)']
    f1_score = 2 * (precision * recall) / (precision + recall + 1e-6)

    # Print evaluation metrics
    print("Evaluation Results:")
    print("F1-Score:", f1_score)
    print("Precision:", precision)
    print("Recall:", recall)

    return f1_score, precision, recall

train_dir = './data_split/train'
val_dir = './data_split/val'
train_data_path = './data_split/data.yaml'
pretrained_model_path = 'yolov8n.pt'

f1_score, precision, recall = create_yaml_train_and_evaluate(
    train_dir, val_dir, train_data_path, pretrained_model_path,
    epochs=1, img_size=128, batch_size=24
)

YAML configuration file created at: ./data_split/data.yaml
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 315MB/s]


Ultralytics 8.3.65 🚀 Python-3.11.11 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=./data_split/data.yaml, epochs=1, time=None, patience=100, batch=24, imgsz=128, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sh

100%|██████████| 5.35M/5.35M [00:00<00:00, 158MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/data_split/train/drowsy... 31112 images, 0 backgrounds, 0 corrupt: 100%|██████████| 31112/31112 [00:24<00:00, 1247.81it/s]


[34m[1mtrain: [0mNew cache created: /content/data_split/train/drowsy.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/data_split/val/drowsy... 7778 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7778/7778 [00:07<00:00, 1034.86it/s]


[34m[1mval: [0mNew cache created: /content/data_split/val/drowsy.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005625000000000001), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 128 train, 128 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1     0.275G     0.2843     0.9519     0.9635         24        128: 100%|██████████| 1297/1297 [03:34<00:00,  6.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 163/163 [00:53<00:00,  3.07it/s]


                   all       7778       7778      0.988      0.982      0.995      0.988

1 epochs completed in 0.077 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.65 🚀 Python-3.11.11 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 163/163 [00:47<00:00,  3.44it/s]


                   all       7778       7778      0.988      0.981      0.995      0.988
                drowsy       3889       3889      0.976      0.999      0.995      0.995
            non_drowsy       3889       3889      0.999      0.964      0.995      0.982
Speed: 0.0ms preprocess, 0.3ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns/detect/train[0m
Model training completed.
Ultralytics 8.3.65 🚀 Python-3.11.11 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /content/data_split/val/drowsy.cache... 7778 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7778/7778 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 325/325 [00:51<00:00,  6.28it/s]


                   all       7778       7778      0.988      0.982      0.995      0.988
                drowsy       3889       3889      0.977      0.999      0.995      0.995
            non_drowsy       3889       3889      0.999      0.964      0.995      0.982
Speed: 0.0ms preprocess, 0.6ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns/detect/train2[0m
Evaluation Results:
F1-Score: 0.9850755244459597
Precision: 0.988321071110269
Recall: 0.9818522175600267
