You can run the cells till the headlines without operations or preloading. The output will be saved as mp4 in the file section. The following sections contain code that shows how the weights used in the code are obtained.


Person Model: Detects 'person'

Model 1: Detects  'mask'

Model 2: Detects 'hairnet'

Model 3: Detects 'safety vest','helmet'


convert 720x1280 to 360x640

The person model detects only the 'person' class.

Accessory detections are performed within the person bounding boxes.



In [None]:
!git clone https://github.com/ifzhang/ByteTrack.git
%cd ByteTrack
!pip install -r requirements.txt
!pip install cython_bbox
!python setup.py build_ext --inplace
!pip install ultralytics
!pip install loguru
!pip install lap

Cloning into 'ByteTrack'...
remote: Enumerating objects: 2007, done.[K
remote: Total 2007 (delta 0), reused 0 (delta 0), pack-reused 2007 (from 1)[K
Receiving objects: 100% (2007/2007), 79.60 MiB | 13.69 MiB/s, done.
Resolving deltas: 100% (1141/1141), done.
/content/ByteTrack
Collecting loguru (from -r requirements.txt (line 5))
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting thop (from -r requirements.txt (line 10))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ninja (from -r requirements.txt (line 11))
  Downloading ninja-1.11.1.2-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)
Collecting lap (from -r requirements.txt (line 14))
  Downloading lap-0.5.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)
Collecting motmetrics (from -r requirements.txt (line 15))
  Downloading motmetrics-1.4.0-py3-none-any.whl.metadata (20 kB)
Col

In [None]:
import os
import cv2
import torch
import gdown
import warnings
import logging
import numpy as np
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"  # best-all.pt
weights_2_url = "https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0"  # best_hairnet_2.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # ppe.pt
weights_4_url = "https://drive.google.com/uc?id=1W9vdVxUCIa6YEwV5kmxdyFOKxiMQR7Qm"  #safety-best.pt

download_file(weights_1_url, "best-all.pt")
download_file(weights_2_url, "best_hairnet_2.pt")
download_file(weights_3_url, "ppe.pt")
download_file(weights_4_url, "safety-best.pt")

person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('best-all.pt').to(device)      # Detects masks
model2 = YOLO('best_hairnet_2.pt').to(device)   # Detects hairnets
model3 = YOLO('ppe.pt').to(device)
model4 = YOLO('safety-best.pt').to(device)
#ppe_model = YOLO('ppe.pt').to(device)


logging.info(f"Model1 class names: {model1.names}")
logging.info(f"Model2 class names: {model2.names}")
logging.info(f"Model3 class names: {model3.names}")
logging.info(f"Model4 class names: {model4.names}")
logging.info(f"Person model class names: {person_model.names}")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/ByteTrack/original_video.mp4
100%|██████████| 31.3M/31.3M [00:01<00:00, 19.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/ByteTrack/best-all.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 92.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0
To: /content/ByteTrack/best_hairnet_2.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 41.4MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=11e28449-b91a-4c6b-a664-06dd58773168
To: /content/ByteTrack/ppe.pt
100%|██████████| 87.6M/87.6M [00:01<00:00, 62.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1W9vdVxUCIa6YEwV5kmxdyFOKxiMQR7Qm
To: /content/ByteTrack/safety-best.pt
100%|██████████| 22.5M/22

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...



100%|██████████| 6.25M/6.25M [00:00<00:00, 421MB/s]


In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.8, 'hair net': 0.8, 'hardhat': 0.9, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize states for a new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states for all accessories
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True
    elif person_states[person_id]['hair net']['state']:
        person_states[person_id]['hardhat']['state'] = False


# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

def resolve_conflicts(detected_objects):
    if 'hair net' in detected_objects and 'hardhat' in detected_objects:
        if detected_objects['hair net'] > detected_objects['hardhat']:
            del detected_objects['hardhat']
        else:
            del detected_objects['hair net']
    return detected_objects


# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_7.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)
            detected_objects = resolve_conflicts(detected_objects)


            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 1.9ms preprocess, 9.4ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 22.9ms
Speed: 2.7ms preprocess, 22.9ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 hair-net, 14.3ms
Speed: 1.5ms preprocess, 14.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.8ms
Speed: 1.5ms preprocess, 18.8ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 15.5ms
Speed: 3.5ms preprocess, 15.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.5ms
Speed: 1.5ms preprocess, 23.5ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 hair-net, 18.5ms
Speed: 1.6ms preprocess, 18.5ms inference, 4

#bytetrack

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.8, 'hair net': 0.8, 'hardhat': 0.9, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize states for a new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states for all accessories
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True  # Ensure hairnet is True if hardhat is True
    elif not person_states[person_id]['hardhat']['state']:
        pass  # Hairnet detection remains independent when hardhat is not detecte


# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_5.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.0ms preprocess, 10.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.1ms
Speed: 1.6ms preprocess, 18.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 9.0ms
Speed: 1.7ms preprocess, 9.0ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.1ms
Speed: 1.8ms preprocess, 18.1ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 13.5ms
Speed: 2.1ms preprocess, 13.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.1ms
Speed: 1.6ms preprocess, 18.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 14.1ms
Speed: 3.0ms preprocess, 14.1ms

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.8, 'hair net': 0.8, 'hardhat': 0.8, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False


# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_noif.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.1ms preprocess, 8.7ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.6ms
Speed: 3.9ms preprocess, 18.6ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 10.1ms
Speed: 1.5ms preprocess, 10.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 19.3ms
Speed: 1.5ms preprocess, 19.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 11.8ms
Speed: 2.1ms preprocess, 11.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 17.3ms
Speed: 1.6ms preprocess, 17.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 9.5ms
Speed: 1.5ms preprocess, 9.5ms 

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.8, 'hair net': 0.8, 'hardhat': 0.8, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_4.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 3.3ms preprocess, 9.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 16.6ms
Speed: 4.3ms preprocess, 16.6ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 10.1ms
Speed: 1.6ms preprocess, 10.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 15.8ms
Speed: 1.5ms preprocess, 15.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 10.2ms
Speed: 2.1ms preprocess, 10.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 15.8ms
Speed: 1.6ms preprocess, 15.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 9.6ms
Speed: 1.9ms preprocess, 9.6ms 

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.8, 'hair net': 0.8, 'hardhat': 0.8, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_5.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.7, 'hair net': 0.7, 'hardhat': 0.7, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    # Only set hairnet to True if it is detected or hardhat is in use
    if person_states[person_id]['hardhat']['state'] and detected_objects.get('hair net', 0) > CONF_THRESHOLDS['hair net']:
        person_states[person_id]['hair net']['state'] = True


# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_3.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.2ms preprocess, 12.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 21.8ms
Speed: 1.9ms preprocess, 21.8ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 13.4ms
Speed: 1.5ms preprocess, 13.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 21.6ms
Speed: 1.6ms preprocess, 21.6ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 17.1ms
Speed: 4.1ms preprocess, 17.1ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 25.2ms
Speed: 1.8ms preprocess, 25.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 11.8ms
Speed: 1.8ms preprocess, 11.8

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.7, 'safety vest': 0.75}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    # Only set hairnet to True if it is detected or hardhat is in use
    if person_states[person_id]['hardhat']['state'] and detected_objects.get('hair net', 0) > CONF_THRESHOLDS['hair net']:
        person_states[person_id]['hair net']['state'] = True


# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_2.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.2ms preprocess, 9.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 17.3ms
Speed: 2.1ms preprocess, 17.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 10.6ms
Speed: 2.0ms preprocess, 10.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 17.8ms
Speed: 1.8ms preprocess, 17.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 11.6ms
Speed: 2.4ms preprocess, 11.6ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 16.9ms
Speed: 2.0ms preprocess, 16.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 10.5ms
Speed: 1.8ms preprocess, 10.5m

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.7, 'hair net': 0.7, 'hardhat': 0.7, 'safety vest': 0.8}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack_1.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 1.9ms preprocess, 10.2ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 21.8ms
Speed: 1.6ms preprocess, 21.8ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 12.3ms
Speed: 1.6ms preprocess, 12.3ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 20.4ms
Speed: 1.6ms preprocess, 20.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 12.4ms
Speed: 2.1ms preprocess, 12.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 19.7ms
Speed: 1.6ms preprocess, 19.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 9.8ms
Speed: 1.7ms preprocess, 9.8ms

In [None]:
import os
import cv2
import torch
import numpy as np
import logging
from ultralytics import YOLO
from yolox.tracker.byte_tracker import BYTETracker
np.float = float
# Setup logging
logging.basicConfig(level=logging.INFO)

# Initialize YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('/content/ByteTrack/yolov8n.pt').to(device)  # Person detection
model1 = YOLO('/content/ByteTrack/ppe.pt').to(device)            # Accessory detection
model2 = YOLO('/content/ByteTrack/best-all.pt').to(device)       # Accessory detection
ppe_model = YOLO('/content/ByteTrack/ppe.pt').to(device)         # Accessory detection
models = [model1, model2, ppe_model]

# Constants
CONF_THRESHOLD_PERSON = 0.8
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.7, 'safety vest': 0.75}
HOLD_FRAMES = 30
person_states = {}

# ByteTrack parameters
class ByteTrackArgs:
    def __init__(self):
        self.track_thresh = 0.6
        self.match_thresh = 0.8
        self.track_buffer = 30
        self.mot20 = False

tracker_args = ByteTrackArgs()
tracker = BYTETracker(tracker_args, frame_rate=30)

# Accessory state updating
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model.predict(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = model.names[cls_id].lower()
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Main video processing function
def main():
    video_path = "/content/ByteTrack/original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Configure output video
    output_video_path = "processed_video_bytetrack.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Person detection
        results = person_model.predict(frame, conf=CONF_THRESHOLD_PERSON)
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([xmin, ymin, xmax, ymax, float(conf)])


        # Skip empty detections
        if len(detections) == 0:
            continue

        # Convert detections to NumPy array
        detections = np.array(detections)

        # Track using ByteTrack
        online_tracks = tracker.update(detections, [frame_width, frame_height], [frame_width, frame_height])

        # Process tracks
        for track in online_tracks:
            if not track.is_activated:
                continue

            track_id = track.track_id
            xmin, ymin, xmax, ymax = map(int, track.tlbr)

            # Ensure bounding box is within frame
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Accessory detection
            detected_objects = process_accessory_detections(person_crop, models)

            # Update states
            update_accessory_states(track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 28.8ms
Speed: 2.0ms preprocess, 28.8ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 12.6ms
Speed: 1.8ms preprocess, 12.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 Mask, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 19.6ms
Speed: 1.8ms preprocess, 19.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 11.9ms
Speed: 3.3ms preprocess, 11.9ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 18.4ms
Speed: 2.9ms preprocess, 18.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 human, 1 helmet, 11.2ms
Speed: 1.6ms preprocess, 11.2ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-M

# deep sort tracking


In [None]:
!pip install deep-sort-realtime
!git clone https://github.com/ZQPei/deep_sort_pytorch.git
!pip install ultralytics

Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2
Cloning into 'deep_sort_pytorch'...
remote: Enumerating objects: 1078, done.[K
remote: Counting objects: 100% (352/352), done.[K
remote: Compressing objects: 100% (258/258), done.[K
remote: Total 1078 (delta 155), reused 261 (delta 90), pack-reused 726 (from 1)[K
Receiving objects: 100% (1078/1078), 29.15 MiB | 1.01 MiB/s, done.
Resolving deltas: 100% (506/506), done.
Collecting ultralytics
  Downloading ultralytics-8.3.47-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralyti

In [None]:
from deep_sort_realtime.deepsort_tracker import DeepSort
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"  # best-all.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # best-helmet-vest-mask.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # ppe.pt
weights_4_url = "https://drive.google.com/uc?id=1W9vdVxUCIa6YEwV5kmxdyFOKxiMQR7Qm"  #safety-best.pt

download_file(weights_1_url, "best-all.pt")
download_file(weights_2_url, "best-helmet-vest-maskt.pt")
download_file(weights_3_url, "ppe.pt")
download_file(weights_4_url, "safety-best.pt")

person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('best-all.pt').to(device)      # Detects masks
model2 = YOLO('best-helmet-vest-maskt.pt').to(device)   # Detects hairnets
model3 = YOLO('ppe.pt').to(device)
model4 = YOLO('safety-best.pt').to(device)
#ppe_model = YOLO('ppe.pt').to(device)


logging.info(f"Model1 class names: {model1.names}")
logging.info(f"Model2 class names: {model2.names}")
logging.info(f"Model3 class names: {model3.names}")
logging.info(f"Model4 class names: {model4.names}")
logging.info(f"Person model class names: {person_model.names}")



Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 145MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/best-all.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 110MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/best-helmet-vest-maskt.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 96.2MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=3c05435f-d773-406f-858d-c59fe251413f
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:01<00:00, 77.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1W9vdVxUCIa6YEwV5kmxdyFOKxiMQR7Qm
To: /content/safety-best.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 188MB/s]

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...



100%|██████████| 6.25M/6.25M [00:00<00:00, 194MB/s]


In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

models = [model1, model2, ppe_model]

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=30, n_init=5, max_iou_distance=0.4, nn_budget=100)

# Constants
CONF_THRESHOLD_PERSON = 0.8
results = person_model( conf=CONF_THRESHOLD_PERSON, classes=[0])

CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 30
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model, model_name in zip(models, ['model1', 'model2', 'ppe_model']):
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

def smooth_bounding_box(track, bbox_history, smoothing_window=5):
    track_id = track.track_id
    xmin, ymin, xmax, ymax = map(int, track.to_tlbr())

    # Initialize history for new track
    if track_id not in bbox_history:
        bbox_history[track_id] = []

    # Add current bounding box to history
    bbox_history[track_id].append((xmin, ymin, xmax, ymax))
    if len(bbox_history[track_id]) > smoothing_window:
        bbox_history[track_id].pop(0)  # Keep history within window size

    # Average bounding box coordinates
    smoothed_bbox = tuple(map(
        lambda coords: int(sum(coords) / len(coords)),
        zip(*bbox_history[track_id])
    ))

    return smoothed_bbox



# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_stable_04.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLDS['mask'], classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        bbox_history = {}  # For smoothing
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get and smooth bounding box
            xmin, ymin, xmax, ymax = smooth_bounding_box(track, bbox_history)

            # Ensure bounding box is within frame dimensions
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person from the frame for accessory detection
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories and update states
            detected_objects = process_accessory_detections([person_crop], models)
            update_accessory_states(track.track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track.track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.6ms preprocess, 27.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 8.3ms
Speed: 2.6ms preprocess, 8.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 29.6ms
Speed: 2.4ms preprocess, 29.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 8.5ms
Speed: 2.2ms preprocess, 8.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 25.9ms
Speed: 3.0ms preprocess, 25.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 16.6ms
Speed: 2.2ms preprocess, 16.6ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 26.0ms
Speed: 2.5ms preprocess, 26.0ms inference, 1.5ms po

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

models = [model1, model2, ppe_model]

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=30, n_init=5, max_iou_distance=0.6, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.7
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 30
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model, model_name in zip(models, ['model1', 'model2', 'ppe_model']):
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

def smooth_bounding_box(track, bbox_history, smoothing_window=5):
    track_id = track.track_id
    xmin, ymin, xmax, ymax = map(int, track.to_tlbr())

    # Initialize history for new track
    if track_id not in bbox_history:
        bbox_history[track_id] = []

    # Add current bounding box to history
    bbox_history[track_id].append((xmin, ymin, xmax, ymax))
    if len(bbox_history[track_id]) > smoothing_window:
        bbox_history[track_id].pop(0)  # Keep history within window size

    # Average bounding box coordinates
    smoothed_bbox = tuple(map(
        lambda coords: int(sum(coords) / len(coords)),
        zip(*bbox_history[track_id])
    ))

    return smoothed_bbox



# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_stable_06_smoothtracker.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLDS['mask'], classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        bbox_history = {}  # For smoothing
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get and smooth bounding box
            xmin, ymin, xmax, ymax = smooth_bounding_box(track, bbox_history)

            # Ensure bounding box is within frame dimensions
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person from the frame for accessory detection
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories and update states
            detected_objects = process_accessory_detections([person_crop], models)
            update_accessory_states(track.track_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[track.track_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.2ms preprocess, 25.6ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 10.1ms
Speed: 3.7ms preprocess, 10.1ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 29.0ms
Speed: 2.5ms preprocess, 29.0ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 7.7ms
Speed: 2.4ms preprocess, 7.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.4ms
Speed: 2.2ms preprocess, 23.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 14.2ms
Speed: 2.5ms preprocess, 14.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 25.9ms
Speed: 2.1ms preprocess, 25.9ms inference, 4.7ms 

In [None]:
#download the dataset
import shutil
dataset_path = '/content/video'
shutil.make_archive('video', 'zip', dataset_path)
from google.colab import files

# Zip dosyasını indir
files.download(dataset_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=15, n_init=6, max_iou_distance=0.6, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.7
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 30
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model, model_name in zip(models, ['model1', 'model2', 'ppe_model']):
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True



# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_stable_06.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLDS['mask'], classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            person_crop = frame[ymin:ymax, xmin:xmax]
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections(person_crop, models)
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 3.2ms preprocess, 32.9ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 22.8ms
Speed: 2.3ms preprocess, 22.8ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 30.5ms
Speed: 2.3ms preprocess, 30.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 10.9ms
Speed: 2.0ms preprocess, 10.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.5ms
Speed: 3.1ms preprocess, 23.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 10.5ms
Speed: 9.2ms preprocess, 10.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.4ms
Speed: 3.2ms preprocess, 23.4ms inference, 1.8m

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=15, n_init=3, max_iou_distance=0.5, nn_budget=100)

# Constants
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 30
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model, model_name in zip(models, ['model1', 'model2', 'ppe_model']):
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_stable_05.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLDS['mask'], classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            person_crop = frame[ymin:ymax, xmin:xmax]
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections(person_crop, models)
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.3ms preprocess, 41.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 18.6ms
Speed: 4.1ms preprocess, 18.6ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 30.7ms
Speed: 2.1ms preprocess, 30.7ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 11.9ms
Speed: 2.2ms preprocess, 11.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 34.3ms
Speed: 4.5ms preprocess, 34.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 22.6ms
Speed: 2.1ms preprocess, 22.6ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 28.3ms
Speed: 2.2ms preprocess, 28.3ms inference, 1.5m

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=15, n_init=3, max_iou_distance=0.7, nn_budget=100)

# Constants
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 30
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model, model_name in zip(models, ['model1', 'model2', 'ppe_model']):
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_stable.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLDS['mask'], classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            person_crop = frame[ymin:ymax, xmin:xmax]
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections(person_crop, models)
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.4ms preprocess, 24.8ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 9.4ms
Speed: 2.6ms preprocess, 9.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.4ms
Speed: 2.3ms preprocess, 23.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 17.7ms
Speed: 2.1ms preprocess, 17.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 25.0ms
Speed: 2.2ms preprocess, 25.0ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 14.9ms
Speed: 2.6ms preprocess, 14.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.4ms
Speed: 2.5ms preprocess, 23.4ms inference, 5.7ms 

In [None]:
print(model1.names)
print(model2.names)
print(model3.names)
print(model4.names)

{0: 'human', 1: 'helmet', 2: 'vest', 3: 'hair-net', 4: 'mask'}
{0: 'Helmet', 1: 'Mask', 2: 'Safety Vest'}
{0: 'Hardhat', 1: 'Mask', 2: 'NO-Hardhat', 3: 'NO-Mask', 4: 'NO-Safety Vest', 5: 'Person', 6: 'Safety Cone', 7: 'Safety Vest', 8: 'machinery', 9: 'vehicle'}
{0: 'safety vest'}


In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=15, n_init=3, max_iou_distance=0.7, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.6
HOLD_FRAMES = 30  # Number of frames to maintain state after loss
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = set()
    results_model1 = models[0](person_crop)  # Mask detection
    results_model2 = models[1](person_crop)  # Hairnet detection
    results_ppe = models[2](person_crop)    # Hardhat and safety vest detection

    for results, model in zip([results_model1, results_model2, results_ppe], models):
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                if cls_name in ['mask', 'hair net', 'hardhat', 'safety vest']:
                    detected_objects.add(cls_name)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize states for a new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update accessory states
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Output video configuration
    output_video_path = "processed_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people with YOLO
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        # Process each track
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get bounding box and track ID
            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            # Ensure bounding box is within frame dimensions
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person from the frame
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.4ms preprocess, 32.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 12.7ms
Speed: 2.2ms preprocess, 12.7ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 32.4ms
Speed: 2.2ms preprocess, 32.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 15.9ms
Speed: 4.8ms preprocess, 15.9ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 32.3ms
Speed: 2.9ms preprocess, 32.3ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 22.1ms
Speed: 2.4ms preprocess, 22.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 41.7ms
Speed: 2.5ms preprocess, 41.7ms inference, 1.7m

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)      # Detects masks
model2 = YOLO('best-all.pt').to(device)   # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)         # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=15, n_init=3, max_iou_distance=0.7, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.6
HOLD_FRAMES = 30  # Keep detection valid for 1seconds (at 30 FPS)

# Accessory states for each person
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = set()
    results_model1 = models[0](person_crop)  # Mask detection
    results_model2 = models[1](person_crop)  # Hairnet detection
    results_ppe = models[2](person_crop)    # Hardhat and safety vest detection

    for results, model in zip([results_model1, results_model2, results_ppe], models):
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                if cls_name in ['mask', 'hair net', 'hardhat', 'safety vest']:
                    detected_objects.add(cls_name)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize state for new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states for each accessory
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames  # Reset hold counter
            state_info['state'] = True  # Set state to True
        else:
            # Decrement hold counter
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                state_info['state'] = False  # Set state to False when counter reaches zero

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Output video configuration
    output_video_path = "processed_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people with YOLO
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        # Process each track
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get bounding box and track ID
            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            # Ensure bounding box is within frame dimensions
            xmin, ymin = max(0, xmin), max(0, ymin)
            xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

            # Crop the person from the frame
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the person's bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.5ms preprocess, 23.7ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 10.1ms
Speed: 3.5ms preprocess, 10.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 24.5ms
Speed: 2.2ms preprocess, 24.5ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 9.8ms
Speed: 2.2ms preprocess, 9.8ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 27.1ms
Speed: 3.7ms preprocess, 27.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 14.2ms
Speed: 3.7ms preprocess, 14.2ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 22.6ms
Speed: 2.3ms preprocess, 22.6ms inference, 1.5ms 

In [None]:
from deep_sort_realtime.deepsort_tracker import DeepSort
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "ppe.pt")
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('mask_best.pt').to(device)      # Detects masks
model2 = YOLO('hairnet_best.pt').to(device)   # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)         # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=30, n_init=3, max_iou_distance=0.7, max_cosine_distance=0.4, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.6  # Confidence threshold for YOLO
HOLD_FRAMES = 90      # Hold detection for 15 frames (about 0.5 seconds at 30 FPS)

# Initialize accessory states for tracking
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections within person bounding boxes
def process_accessory_detections(person_crop, models):
    detected_objects = set()

    # Model1: Mask detection
    results_model1 = models[0](person_crop)
    for result in results_model1:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            if normalize_class_name(cls_name) == 'mask':
                detected_objects.add('mask')

    # Model2: Hairnet detection
    results_model2 = models[1](person_crop)
    for result in results_model2:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[1].names[cls_id]
            if normalize_class_name(cls_name) == 'hair net':
                detected_objects.add('hair net')

    # PPE Model: Hardhat and Safety Vest detection
    results_ppe = models[2](person_crop)
    for result in results_ppe:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[2].names[cls_id]
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['hardhat', 'safety vest']:
                detected_objects.add(cls_name_normalized)

    return detected_objects

# Update accessory states with temporal smoothing
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize state for new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states for each accessory
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects

        if detected:
            # If detected, reset hold_counter and set state to True
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            # Decrement hold_counter if not detected
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                # If hold_counter expires, set state to False
                state_info['state'] = False

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Output video configuration
    output_video_path = "detected_video_deepsort_90fps.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people with YOLO
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])  # Only detect 'person'
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                # w = xmax - xmin
                # h = ymax - ymin
                # x_center = xmin + w / 2
                # y_center = ymin + h / 2
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        # Process each track
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get bounding box and track ID
            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            # Crop the person from the frame
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the person's bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)  # Blue for person
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)  # Green if present, Red otherwise
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 102MB/s] 
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/hairnet_best.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 31.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 38.9MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=3a37efbc-ef8e-419e-89cb-1da42b6e78ea
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:01<00:00, 79.7MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.2ms preprocess, 14.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 9.3ms
Speed: 2.3ms preprocess, 9.3ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 34.1ms
Speed: 2.4ms preprocess, 34.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 15.4ms
Speed: 1.9ms preprocess, 15.4ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Helmet, 15.2ms
Speed: 2.7ms preprocess, 15.2ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 16.9ms
Speed: 2.2ms preprocess, 16.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 36.1ms
Speed: 2.9ms preprocess, 36.1ms inference, 1.4ms postprocess per image at shape (1, 3, 64

In [None]:
!pip install deep-sort-realtime
!git clone https://github.com/ZQPei/deep_sort_pytorch.git
!pip install ultralytics

Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2
Cloning into 'deep_sort_pytorch'...
remote: Enumerating objects: 1078, done.[K
remote: Counting objects: 100% (352/352), done.[K
remote: Compressing objects: 100% (258/258), done.[K
remote: Total 1078 (delta 155), reused 261 (delta 90), pack-reused 726 (from 1)[K
Receiving objects: 100% (1078/1078), 29.15 MiB | 15.75 MiB/s, done.
Resolving deltas: 100% (506/506), done.


In [None]:
from deep_sort_realtime.deepsort_tracker import DeepSort
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "ppe.pt")
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('mask_best.pt').to(device)      # Detects masks
model2 = YOLO('hairnet_best.pt').to(device)   # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)         # Detects hardhat and safety vest

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=30, n_init=3, max_iou_distance=0.7, max_cosine_distance=0.4, nn_budget=100)

# Constants
CONF_THRESHOLD = 0.6  # Confidence threshold for YOLO
HOLD_FRAMES = 30      # Hold detection for 15 frames (about 0.5 seconds at 30 FPS)

# Initialize accessory states for tracking
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections within person bounding boxes
def process_accessory_detections(person_crop, models):
    detected_objects = set()

    # Model1: Mask detection
    results_model1 = models[0](person_crop)
    for result in results_model1:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            if normalize_class_name(cls_name) == 'mask':
                detected_objects.add('mask')

    # Model2: Hairnet detection
    results_model2 = models[1](person_crop)
    for result in results_model2:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[1].names[cls_id]
            if normalize_class_name(cls_name) == 'hair net':
                detected_objects.add('hair net')

    # PPE Model: Hardhat and Safety Vest detection
    results_ppe = models[2](person_crop)
    for result in results_ppe:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[2].names[cls_id]
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['hardhat', 'safety vest']:
                detected_objects.add(cls_name_normalized)

    return detected_objects

# Update accessory states with temporal smoothing
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        # Initialize state for new person
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    # Update states for each accessory
    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects

        if detected:
            # If detected, reset hold_counter and set state to True
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            # Decrement hold_counter if not detected
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                # If hold_counter expires, set state to False
                state_info['state'] = False

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Output video configuration
    output_video_path = "detected_video_deepsort.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people with YOLO
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])  # Only detect 'person'
        detections = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                # w = xmax - xmin
                # h = ymax - ymin
                # x_center = xmin + w / 2
                # y_center = ymin + h / 2
                conf = float(box.conf[0])
                detections.append([[xmin, ymin, xmax, ymax], conf])

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        # Process each track
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 0:
                continue

            # Get bounding box and track ID
            xmin, ymin, xmax, ymax = map(int, track.to_tlbr())
            person_id = track.track_id

            # Crop the person from the frame
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the person's bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

            # Draw bounding box and accessory labels
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)  # Blue for person
            label_y = ymin - 10
            for obj, state_info in person_states[person_id].items():
                color = (0, 255, 0) if state_info['state'] else (0, 0, 255)  # Green if present, Red otherwise
                label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 73.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/hairnet_best.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 39.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 41.6MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=1d11b682-0baa-4b06-8e3c-d4db26f4e40f
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:01<00:00, 58.5MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.4ms preprocess, 12.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 13.6ms
Speed: 2.5ms preprocess, 13.6ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 39.3ms
Speed: 2.5ms preprocess, 39.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 16.8ms
Speed: 2.4ms preprocess, 16.8ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 Helmet, 17.8ms
Speed: 4.5ms preprocess, 17.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 helmet, 1 vest, 20.0ms
Speed: 3.9ms preprocess, 20.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x320 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 44.2ms
Speed: 3.7ms preprocess, 44.2ms inference, 1.5ms postprocess per image at shape (1, 3, 

In [None]:
# Load YOLO models
person_model = YOLO('yolov8n.pt').to(device)  # YOLOv8 for detecting people
model1 = YOLO('ppe.pt').to(device)           # Detects masks
model2 = YOLO('best-all.pt').to(device)      # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)        # Detects hardhat and safety vest

# Constants
CONF_THRESHOLD = 0.6
CONF_THRESHOLDS = {'mask': 0.5, 'hair net': 0.5, 'hardhat': 0.6, 'safety vest': 0.6}
HOLD_FRAMES = 20  # Frames to keep the detection stable
person_states = {}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = {}
    for model in models:
        results = model(person_crop)
        for result in results:
            for box in result.boxes:
                cls_id = int(box.cls[0])
                cls_name = normalize_class_name(model.names[cls_id])
                conf = float(box.conf[0])
                if cls_name in CONF_THRESHOLDS and conf >= CONF_THRESHOLDS[cls_name]:
                    detected_objects[cls_name] = max(detected_objects.get(cls_name, 0), conf)
    return detected_objects

# Update accessory states
def update_accessory_states(person_id, detected_objects, person_states, hold_frames):
    if person_id not in person_states:
        person_states[person_id] = {
            'hardhat': {'state': False, 'hold_counter': 0},
            'safety vest': {'state': False, 'hold_counter': 0},
            'mask': {'state': False, 'hold_counter': 0},
            'hair net': {'state': False, 'hold_counter': 0},
        }

    for obj, state_info in person_states[person_id].items():
        detected = obj in detected_objects
        if detected:
            state_info['hold_counter'] = hold_frames  # Reset hold counter
            state_info['state'] = True  # Set state to True
        else:
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1  # Decrement hold counter
            else:
                state_info['state'] = False  # Set state to False

    # Enforce logical consistency: hardhat implies hairnet
    if person_states[person_id]['hardhat']['state']:
        person_states[person_id]['hair net']['state'] = True

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "processed_video_without_tracker.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])  # Only detect 'person'
        person_id = 0  # Assign a unique ID for each person detected in the frame
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                xmin, ymin = max(0, xmin), max(0, ymin)
                xmax, ymax = min(frame_width, xmax), min(frame_height, ymax)

                # Crop the person from the frame
                person_crop = frame[ymin:ymax, xmin:xmax]

                # Detect accessories within the person's bounding box
                models = [model1, model2, ppe_model]
                detected_objects = process_accessory_detections(person_crop, models)

                # Update accessory states
                update_accessory_states(person_id, detected_objects, person_states, HOLD_FRAMES)

                # Draw bounding box and accessory labels
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
                label_y = ymin - 10
                for obj, state_info in person_states[person_id].items():
                    color = (0, 255, 0) if state_info['state'] else (0, 0, 255)
                    label = f"{obj}: {'using' if state_info['state'] else 'removed'}"
                    cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                    label_y -= 15

                person_id += 1

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 1.9ms preprocess, 9.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 16.8ms
Speed: 1.5ms preprocess, 16.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 9.0ms
Speed: 2.1ms preprocess, 9.0ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Hardhat, 2 NO-Masks, 1 NO-Safety Vest, 1 Person, 16.8ms
Speed: 1.4ms preprocess, 16.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 helmet, 9.2ms
Speed: 1.4ms preprocess, 9.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 2 NO-Masks, 1 NO-Safety Vest, 1 Person, 20.0ms
Speed: 1.4ms preprocess, 20.0ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 16.4ms
Speed: 1.9ms preprocess, 16.4ms inference, 1.8ms postprocess per i

In [None]:
!pip install ultralytics
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "ppe.pt")


person_model = YOLO('yolov8n.pt').to(device)   # YOLOv8 for detecting people
model1 = YOLO('mask_best.pt').to(device)       # Detects masks
model2 = YOLO('hairnet_best.pt').to(device)    # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)          # Detects hardhat and safety vest

# Confidence threshold
CONF_THRESHOLD = 0.6

# Accessory states with temporal smoothing
# Define a reasonable hold time based on FPS
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
HOLD_FRAMES = int(fps * 1.0)  # hold for half a second after losing detection

# Initialize accessory states
accessory_states = {
    'hardhat': {'state': False, 'hold_counter': 0},
    'safety vest': {'state': False, 'hold_counter': 0},
    'mask': {'state': False, 'hold_counter': 0},
    'hair net': {'state': False, 'hold_counter': 0},
}

STATE_CHANGE_THRESHOLDS = {
    'hardhat': 60,
    'safety vest': 60,
    'mask': 60,
    'hair net': 60,
}

def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections inside person bounding boxes
def process_accessory_detections(person_crops, models):
    detected_objects = set()

    # Model1: Mask detection
    results_model1 = models[0](person_crops)
    for result in results_model1:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            if normalize_class_name(cls_name) == 'mask':
                detected_objects.add('mask')

    # Model2: Hairnet detection
    results_model2 = models[1](person_crops)
    for result in results_model2:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[1].names[cls_id]
            if normalize_class_name(cls_name) == 'hair net':
                detected_objects.add('hair net')

    # PPE Model: Hardhat and Safety Vest detection
    results_ppe = models[2](person_crops)
    for result in results_ppe:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[2].names[cls_id]
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['hardhat', 'safety vest']:
                detected_objects.add(cls_name_normalized)

    return detected_objects

def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj, state_info in accessory_states.items():
        detected = (obj in detected_objects)

        if detected:
            # If detected this frame, reset hold_counter and set state to True
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            # Not detected this frame; decrement hold_counter
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                # hold_counter has expired, set state to False
                state_info['state'] = False

        # Update person's object_presence with the current state
        person['object_presence'][obj] = state_info['state']

def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])  # Only detect 'person'
        persons = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                persons.append((xmin, ymin, xmax, ymax))

        # Process accessory detection for each person
        for person_box in persons:
            xmin, ymin, xmax, ymax = person_box
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the person's bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            person = {'bbox': person_box, 'object_presence': {}}
            update_accessory_states(person, detected_objects, accessory_states, hold_frames=10)

            # Draw bounding box and labels
            color = (255, 0, 0)  # Blue for person bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)

            label_y = ymin - 10
            for obj, present in person['object_presence'].items():
                obj_color = (0, 255, 0) if present else (0, 0, 255)  # Green if present, Red otherwise
                label = f"{obj}: {'using' if present else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, obj_color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()




Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:01<00:00, 22.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0
To: /content/hairnet_best.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 41.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 41.6MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=d5bc6be1-72a7-4e76-91d8-30faa9ce3f0a
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:01<00:00, 58.9MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 1.6ms preprocess, 25.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 9.3ms
Speed: 2.2ms preprocess, 9.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Helmet, 9.8ms
Speed: 1.7ms preprocess, 9.8ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 10.2ms
Speed: 1.5ms preprocess, 10.2ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 25.7ms
Speed: 1.6ms preprocess, 25.7ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 10.2ms
Speed: 2.2ms preprocess, 10.2ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Helmet, 10.1ms
Speed: 1.6ms preprocess, 10.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 9.8ms
Speed: 1

In [None]:
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "ppe.pt")


person_model = YOLO('yolov8n.pt').to(device)   # YOLOv8 for detecting people
model1 = YOLO('mask_best.pt').to(device)       # Detects masks
model2 = YOLO('hairnet_best.pt').to(device)    # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)          # Detects hardhat and safety vest

# Confidence threshold
CONF_THRESHOLD = 0.6

# Accessory states with temporal smoothing
accessory_states = {
    'hardhat': {'state': False, 'counter': 30,'hold_counter': 30},
    'safety vest': {'state': False, 'counter': 0,'hold_counter': 0},
    'mask': {'state': False, 'counter': 30,'hold_counter': 30},
    'hair net': {'state': False, 'counter': 30, 'hold_counter': 30},
}

STATE_CHANGE_THRESHOLDS = {
    'hardhat': 30,
    'safety vest': 30,
    'mask': 30,
    'hair net': 30,
}

def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections inside person bounding boxes
def process_accessory_detections(person_crops, models):
    detected_objects = set()

    # Model1: Mask detection
    results_model1 = models[0](person_crops)
    for result in results_model1:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            if normalize_class_name(cls_name) == 'mask':
                detected_objects.add('mask')

    # Model2: Hairnet detection
    results_model2 = models[1](person_crops)
    for result in results_model2:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[1].names[cls_id]
            if normalize_class_name(cls_name) == 'hair net':
                detected_objects.add('hair net')

    # PPE Model: Hardhat and Safety Vest detection
    results_ppe = models[2](person_crops)
    for result in results_ppe:
        for box in result.boxes:
            cls_id = int(box.cls[0])
            cls_name = models[2].names[cls_id]
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['hardhat', 'safety vest']:
                detected_objects.add(cls_name_normalized)

    return detected_objects

# Update accessory states with temporal smoothing
def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj in accessory_states.keys():
        detected = obj in detected_objects
        state_info = accessory_states[obj]

        if detected:
            # Reset hold_counter and set state to True
            state_info['hold_counter'] = hold_frames
            state_info['state'] = True
        else:
            # Decrement hold_counter if not detected
            if state_info['hold_counter'] > 0:
                state_info['hold_counter'] -= 1
            else:
                # Increment counter for state change
                state_info['counter'] += 1
                if state_info['counter'] >= hold_frames:
                    state_info['state'] = False
                    state_info['counter'] = 0  # Reset counter when state changes

        # Update the person's object presence state
        person['object_presence'][obj] = state_info['state']

def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people
        results = person_model(frame, conf=CONF_THRESHOLD, classes=[0])  # Only detect 'person'
        persons = []
        for box in results[0].boxes:
            cls_id = int(box.cls[0])
            if person_model.names[cls_id] == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                persons.append((xmin, ymin, xmax, ymax))

        # Process accessory detection for each person
        for person_box in persons:
            xmin, ymin, xmax, ymax = person_box
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Detect accessories within the person's bounding box
            models = [model1, model2, ppe_model]
            detected_objects = process_accessory_detections([person_crop], models)

            # Update accessory states
            person = {'bbox': person_box, 'object_presence': {}}
            update_accessory_states(person, detected_objects, accessory_states, hold_frames=10)

            # Draw bounding box and labels
            color = (255, 0, 0)  # Blue for person bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)

            label_y = ymin - 10
            for obj, present in person['object_presence'].items():
                obj_color = (0, 255, 0) if present else (0, 0, 255)  # Green if present, Red otherwise
                label = f"{obj}: {'using' if present else 'removed'}"
                cv2.putText(frame, label, (xmin, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, obj_color, 2)
                label_y -= 15

        # Write frame to output video
        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count} frames.")

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 168MB/s]
Downloading...
From: https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0
To: /content/hairnet_best.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 157MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 165MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=3dd4f604-93bd-4a10-8024-34efd50e4a1d
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:00<00:00, 148MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.7ms preprocess, 25.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 17.2ms
Speed: 2.6ms preprocess, 17.2ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Helmet, 17.4ms
Speed: 1.8ms preprocess, 17.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 17.4ms
Speed: 1.7ms preprocess, 17.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 27.2ms
Speed: 1.5ms preprocess, 27.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 17.0ms
Speed: 2.1ms preprocess, 17.0ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 1 Helmet, 13.7ms
Speed: 1.6ms preprocess, 13.7ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 11.3ms
Spe

In [None]:
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "ppe.pt")
# Load models
model1 = YOLO('mask_best.pt').to(device)       # Detects masks
model2 = YOLO('hairnet_best.pt').to(device)    # Detects hairnets
ppe_model = YOLO('ppe.pt').to(device)          # Replaces model3, detects 'hardhat' and 'safety vest'

# Confidence threshold
CONF_THRESHOLD = 0.5

# Updated accessory states
accessory_states = {
    'hardhat': {'state': False, 'counter': 0},
    'safety vest': {'state': False, 'counter': 0},
    'mask': {'state': False, 'counter': 0},
    'hair net': {'state': False, 'counter': 0, 'hold_counter': 0},
}

# Temporal smoothing thresholds
STATE_CHANGE_THRESHOLDS = {
    'hardhat': 30,
    'safety vest': 30,
    'mask': 30,
    'hair net': 30,
}

# Normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Process accessory detections
def process_accessory_detections(person_crop, models):
    detected_objects = set()

    # Model1: Mask detection
    results_model1 = models[0](person_crop)
    for box in results_model1[0].boxes:
        cls_id = int(box.cls[0])
        cls_name = models[0].names[cls_id]
        cls_name_normalized = normalize_class_name(cls_name)
        if cls_name_normalized == 'mask':
            detected_objects.add(cls_name_normalized)

    # Model2: Hairnet detection
    results_model2 = models[1](person_crop)
    for box in results_model2[0].boxes:
        cls_id = int(box.cls[0])
        cls_name = models[1].names[cls_id]
        cls_name_normalized = normalize_class_name(cls_name)
        if cls_name_normalized == 'hair net':
            detected_objects.add(cls_name_normalized)

    # PPE Model (Replaces Model3): Hardhat and Safety Vest detection
    results_ppe = models[2](person_crop)
    for box in results_ppe[0].boxes:
        cls_id = int(box.cls[0])
        cls_name = models[2].names[cls_id]
        cls_name_normalized = normalize_class_name(cls_name)
        if cls_name_normalized in ['hardhat', 'safety vest']:
            detected_objects.add(cls_name_normalized)

    return detected_objects

# Update accessory states with temporal smoothing
def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj in accessory_states.keys():
        detected = obj in detected_objects
        state_info = accessory_states[obj]
        current_state = state_info['state']

        # Temporal smoothing
        if detected != current_state:
            state_info['counter'] += 1
            if state_info['counter'] >= hold_frames:
                state_info['state'] = detected
                state_info['counter'] = 0
        else:
            state_info['counter'] = 0

        person['object_presence'][obj] = state_info['state']

# Main function
def main():
    video_path = "original_video.mp4"
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Output video writer
    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process person detections (Assumes single person in frame)
        person = {'bbox': (0, 0, frame_width, frame_height), 'object_presence': {}}
        person_crop = frame

        # Detect accessories
        models = [model1, model2, ppe_model]  # Include the updated ppe_model
        detected_objects = process_accessory_detections([person_crop], models)

        # Update accessory states
        update_accessory_states(person, detected_objects, accessory_states, hold_frames=10)

        # Annotate frame
        for obj, present in person['object_presence'].items():
            color = (0, 255, 0) if present else (0, 0, 255)
            label = f"{obj}: {'using' if present else 'removed'}"
            cv2.putText(frame, label, (50, 50 + 30 * list(accessory_states.keys()).index(obj)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        # Write frame to output video
        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    print(f"Processed video saved at {os.path.abspath(output_video_path)}")

if __name__ == "__main__":
    main()


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 163MB/s]
Downloading...
From: https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0
To: /content/hairnet_best.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 91.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 186MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=37d2ff35-8b08-46e6-84bf-d28cfd8aff21
To: /content/ppe.pt
100%|██████████| 87.6M/87.6M [00:00<00:00, 90.9MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 3.5ms preprocess, 16.2ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Hardhat, 1 Mask, 1 NO-Safety Vest, 1 Person, 21.0ms
Speed: 2.3ms preprocess, 21.0ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Helmet, 1 Safety Vest, 10.3ms
Speed: 2.1ms preprocess, 10.3ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 15.8ms
Speed: 2.1ms preprocess, 15.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Hardhat, 1 Mask, 1 NO-Safety Vest, 1 Person, 21.3ms
Speed: 2.1ms preprocess, 21.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Helmet, 1 Safety Vest, 13.5ms
Speed: 3.8ms preprocess, 13.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 13.4ms
Speed: 2.4ms preprocess, 13.4ms inference, 0.8ms postprocess per 

In [None]:
!pip install ultralytics
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0"  # hairnet_best.pt
weights_2_url = "https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB"  # mask_best.pt
weights_3_url = "https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg"  # safety_vest_best.pt

download_file(weights_1_url, "hairnet_best.pt")
download_file(weights_2_url, "mask_best.pt")
download_file(weights_3_url, "safety_vest_best.pt")

def resize_video(input_path, output_path, resize_width=360, resize_height=640):
    cap = cv2.VideoCapture(input_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    if frame_width != resize_width or frame_height != resize_height:
        logging.info(f"Resizing video from ({frame_width}, {frame_height}) to "
                     f"({resize_width}, {resize_height})")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_resized = cv2.VideoWriter(output_path, fourcc, fps,
                                      (resize_width, resize_height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_resized = cv2.resize(frame, (resize_width, resize_height))
            out_resized.write(frame_resized)

        cap.release()
        out_resized.release()
        return output_path
    else:
        logging.info("Resizing not required.")
        cap.release()
        return input_path

resized_video_path = 'resized_video.mp4'
resized_video_path = resize_video(video_path, resized_video_path)

model1 = YOLO('mask_best.pt').to(device)       # Detects 'helmet'and 'mask'
model2 = YOLO('hairnet_best.pt').to(device)    # Detects 'hair net'
model3 = YOLO('safety_vest_best.pt').to(device)  # Detects 'safety vest'
person_model = YOLO('yolov8n.pt').to(device)   # Detects 'person'

# Adjust confidence and IoU thresholds
model1.conf = 0.5
model2.conf = 0.5
model3.conf = 0.5
person_model.conf = 0.5

model1.iou = 0.45
model2.iou = 0.45
model3.iou = 0.45
person_model.iou = 0.45

# Print model class names for verification
logging.info(f"Model1 class names: {model1.names}")
logging.info(f"Model2 class names: {model2.names}")
logging.info(f"Model3 class names: {model3.names}")
logging.info(f"Person model class names: {person_model.names}")

# Function to normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Updated DETECTION_CLASSES
DETECTION_CLASSES = ['person', 'Hardhat', 'Safety Vest', 'mask', 'hair net']

# Initialize accessory state buffers and counters
accessory_states = {
    'hardhat': {'state': False, 'counter': 0},
    'mask': {'state': False, 'counter': 0},
    'hair net': {'state': False, 'counter': 0, 'hold_counter': 0},
    'safety vest': {'state': False, 'counter': 0},
}

# Set thresholds for state change confirmation
STATE_CHANGE_THRESHOLDS = {
    'hardhat': 20,
    'mask': 20,
    'hair net': 20,
    'safety vest':20,
}

# Function to process person detections and select the highest-confidence person
def process_person_detections(frame, person_model):
    results_person = person_model(frame, classes=[0])
    person_detections = []

    for result in results_person:
        boxes = result.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = person_model.names[cls_id]
            if cls_name == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                person_info = {
                    'bbox': (xmin, ymin, xmax, ymax),
                    'conf': conf,
                    'object_presence': {normalize_class_name(cls): False
                                        for cls in DETECTION_CLASSES if cls != 'person'}
                }
                person_detections.append(person_info)
    # Select the person detection with the highest confidence
    if person_detections:
        person_detections.sort(key=lambda x: x['conf'], reverse=True)
        person = person_detections[0]
        return [person]
    else:
        return []

# Function to process accessory detections
def process_accessory_detections(person_crops, models):
    detected_objects_list = []

    results_model1 = models[0](person_crops)
    results_model2 = models[1](person_crops)
    results_model3 = models[2](person_crops)

    for idx in range(len(person_crops)):
        highest_confidences = {}
        # Process results for model1 (helmet, mask)
        result_model1 = results_model1[idx]
        boxes = result_model1.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            conf = float(box.conf[0])
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized == 'mask':
                if (cls_name_normalized not in highest_confidences or
                        conf > highest_confidences[cls_name_normalized]):
                    highest_confidences[cls_name_normalized] = conf


        # Process results for model2 (hair net)
        result_model2 = results_model2[idx]
        boxes = result_model2.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = models[1].names[cls_id]
            conf = float(box.conf[0])
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized == 'hair net':
                if (cls_name_normalized not in highest_confidences or
                        conf > highest_confidences[cls_name_normalized]):
                    highest_confidences[cls_name_normalized] = conf

        # Process results for model3 (safety vest)
        result_model3 = results_model3[idx]
        boxes = result_model3.boxes
        highest_confidence_safety_vest = 0
        safety_vest_detected = False
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = models[2].names[cls_id]
            conf = float(box.conf[0])
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['Hardhat', 'Safety Mask']:
                if (cls_name_normalized not in highest_confidences or
                        conf > highest_confidences[cls_name_normalized]):
                    highest_confidences[cls_name_normalized] = conf

        detected_objects = set(highest_confidences.keys())
        detected_objects_list.append(detected_objects)
    return detected_objects_list

# Function to update accessory states with temporal smoothing

# Update accessory states function
def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj in person['object_presence'].keys():
        obj_normalized = normalize_class_name(obj)

        # Debugging
        if obj_normalized not in accessory_states:
            print(f"Error: {obj_normalized} not found in accessory_states.")
            print(f"Available keys: {list(accessory_states.keys())}")
            continue

        detected = obj_normalized in detected_objects

        state_info = accessory_states[obj_normalized]
        current_state = state_info['state']

        if obj_normalized == 'hair net':
            if detected:
                state_info['hold_counter'] = hold_frames
                current_state = True
            else:
                if state_info.get('hold_counter', 0) > 0:
                    state_info['hold_counter'] -= 1
                    current_state = True
                else:
                    current_state = False
            state_info['hold_counter'] = state_info.get('hold_counter', 0)
        else:
            # Temporal smoothing logic for other accessories
            counter = state_info['counter']
            threshold = STATE_CHANGE_THRESHOLDS.get(obj_normalized, 5)
            if detected != current_state:
                counter += 1
                if counter >= threshold:
                    current_state = detected
                    counter = 0
            else:
                counter = 0
            state_info['counter'] = counter
        state_info['state'] = current_state
        person['object_presence'][obj_normalized] = current_state

def main():
    # Initialize video capture
    cap = cv2.VideoCapture(resized_video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate hold frames for 'hair net'
    HOLD_DURATION_SECONDS = 1  # Adjust as needed
    hold_frames = int(fps * HOLD_DURATION_SECONDS)

    # Define the codec and create VideoWriter object
    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps,
                          (frame_width, frame_height))

    # Frame-by-frame processing
    frame_count = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    logging.info(f"Total frames to process: {total_frames}")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Process person detections and select highest-confidence person
        persons = process_person_detections(frame, person_model)

        if persons:
            # Since we only have one person, process that person
            person = persons[0]
            xmin, ymin, xmax, ymax = person['bbox']
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Process accessory detections
            models = [model1, model2, model3]
            detected_objects_list = process_accessory_detections([person_crop], models)

            # Update accessory states for the person
            detected_objects = detected_objects_list[0]
            update_accessory_states(person, detected_objects, accessory_states, hold_frames)

            # Draw bounding box and labels for the person
            xmin, ymin, xmax, ymax = person['bbox']
            conf = person['conf']
            object_presence = person['object_presence']

            # Draw person bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            cv2.putText(frame, f'Person {conf:.2f}', (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

            # Starting position for labels
            label_x = xmin
            label_y = ymin - 30
            dy = 20

            for i, (obj, present) in enumerate(object_presence.items()):
                if present:
                    color = (0, 255, 0)  # Green
                    label = f"{obj}: using"
                else:
                    color = (0, 0, 255)  # Red
                    label = f"{obj}: removed"
                cv2.putText(frame, label, (label_x, label_y - i * dy),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)
        frame_count += 1

        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count}/{total_frames} frames.")

    logging.info(f"Processing complete. Total frames processed: {frame_count}")
    cap.release()
    out.release()

    # Function to display video in Google Colab
    def display_video(path, width=800):
        video_tag = f"""
        <video width="{width}" height="auto" controls>
            <source src="{path}" type="video/mp4">
        </video>
        """
        return HTML(video_tag)

    # Display the output video
    display_video(output_video_path)

if __name__ == '__main__':
    main()



Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 109MB/s]
Downloading...
From: https://drive.google.com/uc?id=11jxQFEYr4GV84YoxXCQjMHweZMrjRoG0
To: /content/hairnet_best.pt
100%|██████████| 22.5M/22.5M [00:00<00:00, 190MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Z3-XuG6ln-slgioy3RRFVgrqx1XO5YDB
To: /content/mask_best.pt
100%|██████████| 6.24M/6.24M [00:00<00:00, 194MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg
From (redirected): https://drive.google.com/uc?id=1o9pJQHl2vRV2MStwNjlcMZJ7qarxYhmg&confirm=t&uuid=a034d102-e9ac-425a-8a4e-f323a42bdfc3
To: /content/safety_vest_best.pt
100%|██████████| 87.6M/87.6M [00:00<00:00, 166MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
0: 640x224 1 Helmet, 15.4ms
Speed: 1.4ms preprocess, 15.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 14.8ms
Speed: 1.2ms preprocess, 14.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 23.3ms
Speed: 1.3ms preprocess, 23.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x384 1 person, 10.4ms
Speed: 1.4ms preprocess, 10.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 (no detections), 10.4ms
Speed: 1.3ms preprocess, 10.4ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 (no detections), 10.7ms
Speed: 2.8ms preprocess, 10.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 224)

0: 640x224 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 21.3ms
Speed: 1.3ms preprocess, 21.3ms inference, 1.8ms postp

In [None]:
from ultralytics import YOLO
import cv2
import math

def video_detection(path_x):
    video_capture = path_x
    #Create a Webcam Object
    cap=cv2.VideoCapture(video_capture)
    frame_width=int(cap.get(3))
    frame_height=int(cap.get(4))
    out=cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P','G'), 10, (frame_width, frame_height))

    model=YOLO("ppe.pt")
    classNames = ['Hardhat', 'Mask', 'NO-Hardhat', 'NO-Mask', 'NO-Safety Vest', 'Person', 'Safety Cone',
                'Safety Vest', 'machinery', 'vehicle']
    while True:
        success, img = cap.read()
        results=model(img,stream=True)
        for r in results:
            boxes=r.boxes
            for box in boxes:
                x1,y1,x2,y2=box.xyxy[0]
                x1,y1,x2,y2=int(x1), int(y1), int(x2), int(y2)
                print(x1,y1,x2,y2)
                conf=math.ceil((box.conf[0]*100))/100
                cls=int(box.cls[0])
                class_name=classNames[cls]
                label=f'{class_name}{conf}'
                t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0]
                print(t_size)
                c2 = x1 + t_size[0], y1 - t_size[1] - 3
                if class_name == 'Mask' or class_name == 'Hardhat' or class_name == 'Safety Vest':
                    color=(0, 255,0)

                elif class_name == 'NO-Hardhat' or class_name == 'NO-Mask' or class_name == 'NO-Safety Vest':
                    color = (0,0,255)

                elif class_name == 'machinery' or class_name == 'vehicle':
                    color = (0, 149, 255)
                else:
                    color = (85,45,255)
                if conf>0.5:
                    cv2.rectangle(img, (x1,y1), (x2,y2), color,3)
                    cv2.rectangle(img, (x1,y1), c2, color, -1, cv2.LINE_AA)  # filled
                    cv2.putText(img, label, (x1,y1-2),0, 1,[255,255,255], thickness=1,lineType=cv2.LINE_AA)

        yield img
        #out.write(img)
        #cv2.imshow("image", img)
        #if cv2.waitKey(1) & 0xFF==ord('1'):
            #break
    #out.release()
cv2.destroyAllWindows()

In [None]:
#from git weigths
#test on video
!pip install ultralytics
from ultralytics import YOLO
import cv2
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# Clone the YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5

# Install requirements
%cd yolov5
!pip install -r requirements.txt

# Import and load the model
import torch # Import torch

# Load the model using torch.hub.load
model = torch.hub.load('ultralytics/yolov5', 'custom', path='/content/best-vinayakmane.pt')

conf_threshold = 0.5

video_path = "orj_1.mp4"
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "output_video_vinayakname.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (360, 640))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_resized = cv2.resize(frame, (360, 640))

    results = model.predict(frame_resized, conf=conf_threshold)

    if len(results[0].boxes) > 0:
        print(f"Detections found: {len(results[0].boxes)}")
    else:
        print("No detections.")

    annotated_frame = results[0].plot()
    out.write(annotated_frame)

cap.release()
out.release()
print(f"Processed video saved at {output_path}")

Looking in indexes: https://download.pytorch.org/whl/cpu
fatal: destination path 'yolov5' already exists and is not an empty directory.
/content/yolov5


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-12-6 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
custom_YOLOv5m summary: 232 layers, 7257306 parameters, 0 gradients
Adding AutoShape... 


Processed video saved at output_video_vinayakname.mp4


In [None]:
from ultralytics import YOLO
import cv2
import os

# Load the YOLO model
model = YOLO('/content/ppe.pt')

# Confidence threshold
conf_threshold = 0.5

# Input video path
video_path = "orj_1.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Output video path
output_path = "output_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))  # Match original video dimensions

if not cap.isOpened():
    print("Error: Unable to open the video file.")
    exit()

print(f"Processing video: {video_path}")
print(f"Original Dimensions: {width}x{height}, FPS: {fps}")

# Process video frame-by-frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Predict using the YOLO model
    results = model.predict(frame, conf=conf_threshold)

    # Annotate the frame with the detection results
    annotated_frame = results[0].plot()

    # Write the annotated frame to the output video
    out.write(annotated_frame)

    # Optional: Display the frame in real-time
    #cv2.imshow("Detection", annotated_frame)
    #if cv2.waitKey(1) & 0xFF == ord('q'):
    #    break

# Release resources
cap.release()
out.release()
#cv2.destroyAllWindows()

print(f"Processed video saved at {os.path.abspath(output_path)}")


In [None]:
from ultralytics import YOLO
import cv2
import os

# Load the YOLO model
model = YOLO('/content/ppe.pt')

# Confidence threshold
conf_threshold = 0.5

# Input video path
video_path = "orj_1.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Output video path
output_path = "output_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))  # Match original video dimensions

if not cap.isOpened():
    print("Error: Unable to open the video file.")
    exit()

print(f"Processing video: {video_path}")
print(f"Original Dimensions: {width}x{height}, FPS: {fps}")

# Process video frame-by-frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Predict using the YOLO model
    results = model.predict(frame, conf=conf_threshold)

    # Annotate the frame with the detection results
    annotated_frame = results[0].plot()

    # Write the annotated frame to the output video
    out.write(annotated_frame)

    # Optional: Display the frame in real-time
    #cv2.imshow("Detection", annotated_frame)
    #if cv2.waitKey(1) & 0xFF == ord('q'):
    #    break

# Release resources
cap.release()
out.release()
#cv2.destroyAllWindows()

print(f"Processed video saved at {os.path.abspath(output_path)}")


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Speed: 2.0ms preprocess, 19.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 19.3ms
Speed: 2.2ms preprocess, 19.3ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 19.0ms
Speed: 1.8ms preprocess, 19.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 21.0ms
Speed: 2.1ms preprocess, 21.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 20.0ms
Speed: 2.4ms preprocess, 20.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 19.0ms
Speed: 2.2ms preprocess, 19.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person

In [None]:
#from git weigths
#test on video
from ultralytics import YOLO
import cv2

model = YOLO('/content/ppe.pt')

conf_threshold = 0.5

video_path = "orj_1.mp4"
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "output_video_snehilsanyal.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (360, 640))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_resized = cv2.resize(frame, (360, 640))

    results = model.predict(frame_resized, conf=conf_threshold)

    if len(results[0].boxes) > 0:
        print(f"Detections found: {len(results[0].boxes)}")
    else:
        print("No detections.")

    annotated_frame = results[0].plot()
    out.write(annotated_frame)

cap.release()
out.release()
print(f"Processed video saved at {output_path}")

Processed video saved at output_video_snehilsanyal.mp4


In [None]:
!pip install ultralytics

import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO
import numpy as np

# Suppress warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# Function to download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

# Download video and weights
video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"  # all_best.pt
download_file(weights_url, "all_best.pt")

def resize_video(input_path, output_path, resize_width=360, resize_height=640):
    cap = cv2.VideoCapture(input_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    if frame_width != resize_width or frame_height != resize_height:
        logging.info(f"Resizing video from ({frame_width}, {frame_height}) to "
                     f"({resize_width}, {resize_height})")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_resized = cv2.VideoWriter(output_path, fourcc, fps,
                                      (resize_width, resize_height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_resized = cv2.resize(frame, (resize_width, resize_height))
            out_resized.write(frame_resized)

        cap.release()
        out_resized.release()
        return output_path
    else:
        logging.info("Resizing not required.")
        cap.release()
        return input_path

resized_video_path = 'resized_video.mp4'
resized_video_path = resize_video(video_path, resized_video_path)

# Load models
model1 = YOLO('all_best.pt').to(device)
person_model = YOLO('yolov8n.pt').to(device)  # Detects 'person'

# Adjust confidence thresholds
model1.conf = 0.5
person_model.conf = 0.5

# Print model class names for verification
logging.info(f"Model1 class names: {model1.names}")
print(f"Person model class names: {person_model.names}")

# Function to normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Updated DETECTION_CLASSES
DETECTION_CLASSES = ['person', 'helmet', 'vest', 'mask', 'hair net']

# Initialize accessory state buffers and counters
accessory_states = {
    'helmet': {'state': False, 'counter': 0},
    'mask': {'state': False, 'counter': 0},
    'hair net': {'state': False, 'counter': 0, 'hold_counter': 0},
    'vest': {'state': False, 'counter': 0},
}

# Set thresholds for state change confirmation
STATE_CHANGE_THRESHOLDS = {
    'helmet': 5,
    'mask': 20,
    'hair net': 10,
    'vest': 10,
}

# Confidence thresholds for accessories
class_confidences = {
    'helmet': 0.5,
    'mask': 0.4,
    'hair net': 0.3,
    'vest': 0.5
}

# Function to calculate IoU
def calculate_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    if interArea == 0:
        return 0.0

    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# Function to check if accessory is in expected region
def is_accessory_in_region(accessory_box, person_box, region='torso'):
    xmin_p, ymin_p, xmax_p, ymax_p = person_box
    person_height = ymax_p - ymin_p
    person_width = xmax_p - xmin_p

    if region == 'torso':
        # Define torso region
        torso_ymin = ymin_p + int(person_height * 0.3)
        torso_ymax = ymin_p + int(person_height * 0.7)
        torso_region = (xmin_p, torso_ymin, xmax_p, torso_ymax)
        region_box = torso_region
    elif region == 'head':
        # Define head region
        head_ymin = ymin_p
        head_ymax = ymin_p + int(person_height * 0.3)
        head_region = (xmin_p, head_ymin, xmax_p, head_ymax)
        region_box = head_region
    else:
        return False

    # Calculate IoU between accessory_box and the region
    iou = calculate_iou(accessory_box, region_box)
    return iou > 0.1  # Adjust threshold as needed

# Function to process person detections and select the highest-confidence person
def process_person_detections(frame, person_model):
    results_person = person_model(frame, classes=[0])
    person_detections = []

    for result in results_person:
        boxes = result.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = person_model.names[cls_id]
            if cls_name == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                person_info = {
                    'bbox': (xmin, ymin, xmax, ymax),
                    'conf': conf,
                    'object_presence': {normalize_class_name(cls): False
                                        for cls in DETECTION_CLASSES if cls != 'person'}
                }
                person_detections.append(person_info)
    # Select the person detection with the highest confidence
    if person_detections:
        person_detections.sort(key=lambda x: x['conf'], reverse=True)
        person = person_detections[0]
        return [person]
    else:
        return []

# Function to process accessory detections with spatial validation and conflict resolution
def process_accessory_detections(person_crops, models, person_boxes):
    detected_objects_list = []
    results_model1 = models[0](person_crops)

    for idx in range(len(person_crops)):
        highest_confidences = {}
        detected_boxes = {}
        result_model1 = results_model1[idx]
        boxes = result_model1.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            conf = float(box.conf[0])
            cls_name_normalized = normalize_class_name(cls_name)
            xmin_a, ymin_a, xmax_a, ymax_a = map(int, box.xyxy[0].tolist())
            accessory_box = (xmin_a, ymin_a, xmax_a, ymax_a)

            required_conf = class_confidences.get(cls_name_normalized, 0.5)
            if conf < required_conf:
                continue  # Skip detections below the threshold

            # Spatial validation
            if cls_name_normalized in ['helmet', 'mask', 'hair net']:
                if not is_accessory_in_region(accessory_box, person_boxes[idx], region='head'):
                    continue
            elif cls_name_normalized == 'vest':
                if not is_accessory_in_region(accessory_box, person_boxes[idx], region='torso'):
                    continue

            # Update highest confidences
            if (cls_name_normalized not in highest_confidences or
                    conf > highest_confidences[cls_name_normalized]):
                highest_confidences[cls_name_normalized] = conf
                detected_boxes[cls_name_normalized] = accessory_box

        # Accessory conflict resolution
        head_accessories = ['helmet', 'hair net', 'mask']
        detected_head_accessories = set(highest_confidences.keys()).intersection(head_accessories)

        if len(detected_head_accessories) > 1:
            # Apply priority: helmet > hair net > mask
            for accessory in ['helmet', 'hair net', 'mask']:
                if accessory in detected_head_accessories:
                    # Keep only the highest priority accessory
                    detected_head_accessories = {accessory}
                    break

            # Remove lower priority accessories
            for accessory in head_accessories:
                if accessory not in detected_head_accessories and accessory in highest_confidences:
                    del highest_confidences[accessory]

        detected_objects = set(highest_confidences.keys())
        detected_objects_list.append(detected_objects)
    return detected_objects_list

# Function to update accessory states with temporal smoothing
def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj in person['object_presence'].keys():
        obj_normalized = normalize_class_name(obj)
        detected = obj_normalized in detected_objects

        state_info = accessory_states[obj_normalized]
        current_state = state_info['state']

        if obj_normalized == 'hair net':
            if detected:
                state_info['hold_counter'] = hold_frames
                current_state = True
            else:
                if state_info.get('hold_counter', 0) > 0:
                    state_info['hold_counter'] -= 1
                    current_state = True
                else:
                    current_state = False
            state_info['hold_counter'] = state_info.get('hold_counter', 0)
        else:
            # Temporal smoothing logic for other accessories
            counter = state_info['counter']
            threshold = STATE_CHANGE_THRESHOLDS.get(obj_normalized, 5)
            if detected != current_state:
                counter += 1
                if counter >= threshold:
                    current_state = detected
                    counter = 0
            else:
                counter = 0
            state_info['counter'] = counter
        state_info['state'] = current_state
        person['object_presence'][obj_normalized] = current_state

def main():
    # Initialize video capture
    cap = cv2.VideoCapture(resized_video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate hold frames for 'hair net'
    HOLD_DURATION_SECONDS = 1  # Adjust as needed
    hold_frames = int(fps * HOLD_DURATION_SECONDS)

    # Define the codec and create VideoWriter object
    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps,
                          (frame_width, frame_height))

    # Frame-by-frame processing
    frame_count = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    logging.info(f"Total frames to process: {total_frames}")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Process person detections and select highest-confidence person
        persons = process_person_detections(frame, person_model)

        if persons:
            # Since we only have one person, process that person
            person = persons[0]
            xmin, ymin, xmax, ymax = person['bbox']
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Process accessory detections with spatial validation
            models = [model1]
            person_boxes = [person['bbox']]
            detected_objects_list = process_accessory_detections([person_crop], models, person_boxes)

            # Update accessory states for the person
            detected_objects = detected_objects_list[0]
            update_accessory_states(person, detected_objects, accessory_states, hold_frames)

            # Draw bounding box and labels for the person
            xmin, ymin, xmax, ymax = person['bbox']
            conf = person['conf']
            object_presence = person['object_presence']

            # Draw person bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            cv2.putText(frame, f'Person {conf:.2f}', (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

            # Starting position for labels
            label_x = xmin
            label_y = ymin - 30
            dy = 20

            for i, (obj, present) in enumerate(object_presence.items()):
                if present:
                    color = (0, 255, 0)  # Green
                    label = f"{obj}: using"
                else:
                    color = (0, 0, 255)  # Red
                    label = f"{obj}: removed"
                cv2.putText(frame, label, (label_x, label_y - i * dy),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)
        frame_count += 1

        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count}/{total_frames} frames.")

    logging.info(f"Processing complete. Total frames processed: {frame_count}")
    cap.release()
    out.release()

    # Function to display video in Google Colab
    def display_video(path, width=800):
        video_tag = f"""
        <video width="{width}" height="auto" controls>
            <source src="{path}" type="video/mp4">
        </video>
        """
        return HTML(video_tag)

    # Display the output video
    display_video(output_video_path)

if __name__ == '__main__':
    main()


Collecting ultralytics
  Downloading ultralytics-8.3.40-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.40-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.5/898.5 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.40 ultralytics-thop-2.0.12
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 95.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/all_best.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 19.8MB/s]


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 90.0MB/s]


Person model class names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67:

In [None]:
!pip install ultralytics
import os
import cv2
import torch
import gdown
import warnings
import logging
from IPython.display import HTML
from ultralytics import YOLO

# do not show warnings
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
logging.info(f"Using device: {device}")

# download files from Google Drive
def download_file(url, output_path):
    try:
        gdown.download(url, output_path, quiet=False)
    except Exception as e:
        logging.error(f"Error downloading {url}: {e}")
        raise

video_url = "https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1"
video_path = "original_video.mp4"
download_file(video_url, video_path)

weights_1_url = "https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb"

download_file(weights_1_url, "all_best.pt")


def resize_video(input_path, output_path, resize_width=360, resize_height=640):
    cap = cv2.VideoCapture(input_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    if frame_width != resize_width or frame_height != resize_height:
        logging.info(f"Resizing video from ({frame_width}, {frame_height}) to "
                     f"({resize_width}, {resize_height})")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_resized = cv2.VideoWriter(output_path, fourcc, fps,
                                      (resize_width, resize_height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_resized = cv2.resize(frame, (resize_width, resize_height))
            out_resized.write(frame_resized)

        cap.release()
        out_resized.release()
        return output_path
    else:
        logging.info("Resizing not required.")
        cap.release()
        return input_path

resized_video_path = 'resized_video.mp4'
resized_video_path = resize_video(video_path, resized_video_path)

model1 = YOLO('all_best.pt').to(device)

person_model = YOLO('yolov8n.pt').to(device)   # Detects 'person'

# Adjust confidence and IoU thresholds
model1.conf = 0.5
person_model.conf = 0.5

#model1.iou = 0.45
#person_model.iou = 0.45

# Print model class names for verification
logging.info(f"Model1 class names: {model1.names}")
print(f"Person model class names: {model1.names}")

# Function to normalize class names
def normalize_class_name(name):
    return name.replace('_', ' ').replace('-', ' ').lower()

# Updated DETECTION_CLASSES
DETECTION_CLASSES = ['person', 'helmet', 'vest', 'mask', 'hair net']

# Initialize accessory state buffers and counters
accessory_states = {
    'helmet': {'state': False, 'counter': 0},
    'mask': {'state': False, 'counter': 0},
    'hair net': {'state': False, 'counter': 0, 'hold_counter': 0},
    'vest': {'state': False, 'counter': 0},
}

# Set thresholds for state change confirmation
STATE_CHANGE_THRESHOLDS = {
    'helmet': 5,
    'mask': 20,
    'hair net': 10,
    'vest':10,
}

# Function to process person detections and select the highest-confidence person
def process_person_detections(frame, person_model):
    results_person = person_model(frame, classes=[0])
    person_detections = []

    for result in results_person:
        boxes = result.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = person_model.names[cls_id]
            if cls_name == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                person_info = {
                    'bbox': (xmin, ymin, xmax, ymax),
                    'conf': conf,
                    'object_presence': {normalize_class_name(cls): False
                                        for cls in DETECTION_CLASSES if cls != 'person'}
                }
                person_detections.append(person_info)
    # Select the person detection with the highest confidence
    if person_detections:
        person_detections.sort(key=lambda x: x['conf'], reverse=True)
        person = person_detections[0]
        return [person]
    else:
        return []

# Function to process accessory detections
def process_accessory_detections(person_crops, models):
    detected_objects_list = []

    results_model1 = models[0](person_crops)

    for idx in range(len(person_crops)):
        highest_confidences = {}
        # Process results for model1 (helmet, mask)
        result_model1 = results_model1[idx]
        boxes = result_model1.boxes
        for box in boxes:
            cls_id = int(box.cls[0])
            cls_name = models[0].names[cls_id]
            conf = float(box.conf[0])
            cls_name_normalized = normalize_class_name(cls_name)
            if cls_name_normalized in ['helmet', 'mask','vest','hairnet']:
                if (cls_name_normalized not in highest_confidences or
                        conf > highest_confidences[cls_name_normalized]):
                    highest_confidences[cls_name_normalized] = conf

        detected_objects = set(highest_confidences.keys())
        detected_objects_list.append(detected_objects)
    return detected_objects_list

# Function to update accessory states with temporal smoothing
def update_accessory_states(person, detected_objects, accessory_states, hold_frames):
    for obj in person['object_presence'].keys():
        obj_normalized = normalize_class_name(obj)
        detected = obj_normalized in detected_objects

        state_info = accessory_states[obj_normalized]
        current_state = state_info['state']

        if obj_normalized == 'hair net':
            if detected:
                state_info['hold_counter'] = hold_frames
                current_state = True
            else:
                if state_info.get('hold_counter', 0) > 0:
                    state_info['hold_counter'] -= 1
                    current_state = True
                else:
                    current_state = False
            state_info['hold_counter'] = state_info.get('hold_counter', 0)
        else:
            # Temporal smoothing logic for other accessories
            counter = state_info['counter']
            threshold = STATE_CHANGE_THRESHOLDS.get(obj_normalized, 5)
            if detected != current_state:
                counter += 1
                if counter >= threshold:
                    current_state = detected
                    counter = 0
            else:
                counter = 0
            state_info['counter'] = counter
        state_info['state'] = current_state
        person['object_presence'][obj_normalized] = current_state

def main():
    # Initialize video capture
    cap = cv2.VideoCapture(resized_video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate hold frames for 'hair net'
    HOLD_DURATION_SECONDS = 1  # Adjust as needed
    hold_frames = int(fps * HOLD_DURATION_SECONDS)

    # Define the codec and create VideoWriter object
    output_video_path = "detected_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps,
                          (frame_width, frame_height))

    # Frame-by-frame processing
    frame_count = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    logging.info(f"Total frames to process: {total_frames}")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Process person detections and select highest-confidence person
        persons = process_person_detections(frame, person_model)

        if persons:
            # Since we only have one person, process that person
            person = persons[0]
            xmin, ymin, xmax, ymax = person['bbox']
            person_crop = frame[ymin:ymax, xmin:xmax]

            # Process accessory detections
            models = [model1]
            detected_objects_list = process_accessory_detections([person_crop], models)

            # Update accessory states for the person
            detected_objects = detected_objects_list[0]
            update_accessory_states(person, detected_objects, accessory_states, hold_frames)

            # Draw bounding box and labels for the person
            xmin, ymin, xmax, ymax = person['bbox']
            conf = person['conf']
            object_presence = person['object_presence']

            # Draw person bounding box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            cv2.putText(frame, f'Person {conf:.2f}', (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

            # Starting position for labels
            label_x = xmin
            label_y = ymin - 30
            dy = 20

            for i, (obj, present) in enumerate(object_presence.items()):
                if present:
                    color = (0, 255, 0)  # Green
                    label = f"{obj}: using"
                else:
                    color = (0, 0, 255)  # Red
                    label = f"{obj}: removed"
                cv2.putText(frame, label, (label_x, label_y - i * dy),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)
        frame_count += 1

        if frame_count % 30 == 0:
            logging.info(f"Processed {frame_count}/{total_frames} frames.")

    logging.info(f"Processing complete. Total frames processed: {frame_count}")
    cap.release()
    out.release()

    # Function to display video in Google Colab
    def display_video(path, width=800):
        video_tag = f"""
        <video width="{width}" height="auto" controls>
            <source src="{path}" type="video/mp4">
        </video>
        """
        return HTML(video_tag)

    # Display the output video
    display_video(output_video_path)

if __name__ == '__main__':
    main()



Downloading...
From: https://drive.google.com/uc?id=130btvPHooNMgRY6AWBkhullJeHViJJg1
To: /content/original_video.mp4
100%|██████████| 31.3M/31.3M [00:00<00:00, 51.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VVRD5WrZ3CV2EOPwXjv9ZyY53rdHqwHb
To: /content/all_best.pt
100%|██████████| 6.25M/6.25M [00:00<00:00, 165MB/s]


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
0: 640x288 1 vest, 1 mask, 13.2ms
Speed: 1.8ms preprocess, 13.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 288)

0: 640x384 1 person, 13.6ms
Speed: 1.6ms preprocess, 13.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x288 1 vest, 1 mask, 16.5ms
Speed: 1.5ms preprocess, 16.5ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 288)

0: 640x384 1 person, 11.0ms
Speed: 1.6ms preprocess, 11.0ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x288 1 vest, 1 mask, 11.6ms
Speed: 2.3ms preprocess, 11.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 288)

0: 640x384 1 person, 13.8ms
Speed: 1.6ms preprocess, 13.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x288 1 vest, 1 mask, 9.3ms
Speed: 1.7ms preprocess, 9.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 288)

0: 640x384 1 person, 9.0ms
Speed: 1

In [None]:
!pip install ultralytics
import os
import cv2
import torch
import warnings
from ultralytics import YOLO
from collections import deque
from IPython.display import HTML

# Suppress warnings
warnings.filterwarnings("ignore")

# Load YOLO models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
person_model = YOLO('yolov8n.pt').to(device)  # Model for person detection
accessory_model = YOLO('best-all.pt').to(device)  # Accessory model

# Set detection thresholds
person_model.conf = 0.5
accessory_model.conf = 0.5
accessory_model.iou = 0.35

# Spatial validation
def is_within_zone(person_bbox, object_bbox, expected_area):
    person_xmin, person_ymin, person_xmax, person_ymax = person_bbox
    obj_xmin, obj_ymin, obj_xmax, obj_ymax = object_bbox

    if expected_area == 'head':
        head_ymin = person_ymin
        head_ymax = person_ymin + (person_ymax - person_ymin) // 3
        return head_ymin <= obj_ymin <= head_ymax
    elif expected_area == 'torso':
        torso_ymin = person_ymin + (person_ymax - person_ymin) // 3
        torso_ymax = person_ymin + 2 * (person_ymax - person_ymin) // 3
        return torso_ymin <= obj_ymin <= torso_ymax
    return False

# Temporal smoothing
history_length = 10
detection_history = {
    'helmet': deque(maxlen=history_length),
    'mask': deque(maxlen=history_length),
    'hairnet': deque(maxlen=history_length),
    'vest': deque(maxlen=history_length),
}

def smooth_detections(current_state, accessory):
    detection_history[accessory].append(current_state)
    return sum(detection_history[accessory]) > (len(detection_history[accessory]) // 2)

# Process person detections
def process_person_detections(frame, person_model):
    results_person = person_model(frame, classes=[0])  # Detect persons only
    persons = []
    for result in results_person:
        for box in result.boxes:
            cls_name = person_model.names[int(box.cls[0])]
            if cls_name == 'person':
                xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
                persons.append({'bbox': (xmin, ymin, xmax, ymax)})
    return persons

# Process accessory detections
def process_accessories(person_bbox, frame, accessory_model):
    xmin, ymin, xmax, ymax = person_bbox
    person_crop = frame[ymin:ymax, xmin:xmax]
    accessory_results = accessory_model(person_crop)
    detected_objects = []

    for result in accessory_results:
        for box in result.boxes:
            cls_name = accessory_model.names[int(box.cls[0])]
            confidence = float(box.conf[0])
            obj_xmin, obj_ymin, obj_xmax, obj_ymax = map(int, box.xyxy[0].tolist())

            # Validate spatial location
            if cls_name == 'helmet' and not is_within_zone(person_bbox, (obj_xmin, obj_ymin, obj_xmax, obj_ymax), 'head'):
                continue
            if cls_name in ['vest'] and not is_within_zone(person_bbox, (obj_xmin, obj_ymin, obj_xmax, obj_ymax), 'torso'):
                continue

            # Add detection with dynamic threshold
            thresholds = {'helmet': 0.6, 'mask': 0.5, 'hairnet': 0.6, 'vest': 0.5}
            if confidence >= thresholds.get(cls_name, 0.5):
                detected_objects.append(cls_name)
    return set(detected_objects)

# Main function for video processing
def process_video(input_path, output_path):
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect persons
        persons = process_person_detections(frame, person_model)

        for person in persons:
            # Detect accessories for each person
            person_bbox = person['bbox']
            detected_objects = process_accessories(person_bbox, frame, accessory_model)

            # Temporal smoothing and conflict resolution
            for obj in detection_history.keys():
                is_detected = obj in detected_objects
                smoothed_state = smooth_detections(is_detected, obj)
                if smoothed_state and obj == 'helmet' and 'hairnet' in detected_objects:
                    detected_objects.remove('hairnet')

            # Draw detections
            xmin, ymin, xmax, ymax = person_bbox
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
            for i, obj in enumerate(detection_history.keys()):
                color = (0, 255, 0) if obj in detected_objects else (0, 0, 255)
                label = f"{obj}: {'using' if obj in detected_objects else 'removed'}"
                cv2.putText(frame, label, (xmin, ymin - 20 * (i + 1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)
        frame_count += 1
        if frame_count % 30 == 0:
            print(f"Processed {frame_count} frames.")

    cap.release()
    out.release()



# Process and display the video
input_video = "/content/orj_1.mp4"
output_video = "2_output_video.mp4"
process_video(input_video, output_video)


# Display video in a notebook
def display_video(path, width=800):
    video_tag = f"""
    <video width="{width}" height="auto" controls>
        <source src="{path}" type="video/mp4">
    </video>
    """
    return HTML(video_tag)
display_video(output_video)


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
0: 640x384 2 persons, 7.8ms
Speed: 2.2ms preprocess, 7.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x192 (no detections), 8.4ms
Speed: 1.1ms preprocess, 8.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 192)

0: 640x320 (no detections), 8.0ms
Speed: 1.4ms preprocess, 8.0ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 2 persons, 7.7ms
Speed: 1.8ms preprocess, 7.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 (no detections), 8.1ms
Speed: 1.2ms preprocess, 8.1ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 224)

0: 640x320 (no detections), 9.4ms
Speed: 1.3ms preprocess, 9.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 320)

0: 640x384 1 person, 8.0ms
Speed: 2.3ms preprocess, 8.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x224 (no detections), 10.1ms
Speed:

# merging datasets dataset from roboflow and training



In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="LOwiEWUanrCmE9j8dlkD")
project = rf.workspace("yt-hk546").project("ppe-detection-yfmym-gkuys")
version = project.version(1)
dataset = version.download("yolov5")


Collecting roboflow
  Downloading roboflow-1.1.49-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.49-py3-none-any.whl (80 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.9/80.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: filetype, python-dotenv, idna, roboflow
  Attempting uninstall: idna
    Found existing installation: idna 3.10
    

Downloading Dataset Version Zip in PPE-Detection-1 to yolov5pytorch:: 100%|██████████| 127298/127298 [00:02<00:00, 49483.09it/s]





Extracting Dataset Version Zip to PPE-Detection-1 in yolov5pytorch:: 100%|██████████| 10670/10670 [00:01<00:00, 7371.78it/s]


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="LOwiEWUanrCmE9j8dlkD")
project = rf.workspace("yt-hk546").project("annotated_pics-xp3st-xod00")
version = project.version(2)
dataset = version.download("yolov5")


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Annotated_pics-2 to yolov5pytorch:: 100%|██████████| 268568/268568 [00:04<00:00, 59003.61it/s]





Extracting Dataset Version Zip to Annotated_pics-2 in yolov5pytorch:: 100%|██████████| 29394/29394 [00:03<00:00, 8393.00it/s]


In [None]:
#clean datasets
import yaml

dataset1_path = 'PPE-Detection-1'

with open(f'{dataset1_path}/data.yaml', 'r') as f:
    data1 = yaml.safe_load(f)

class_names1 = data1['names']
print(class_names1)

['boots', 'gloves', 'helmet', 'human', 'vest']


In [None]:
import os

desired_classes1 = ['human', 'helmet', 'vest']

undesired_class_ids1 = [i for i, cls in enumerate(class_names1) if cls not in desired_classes1]

for split in ['train', 'valid', 'test']:
    labels_dir = f"{dataset1_path}/{split}/labels"
    images_dir = f"{dataset1_path}/{split}/images"
    for label_file in os.listdir(labels_dir):
        label_path = f"{labels_dir}/{label_file}"
        new_lines = []
        with open(label_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                class_id = int(line.split()[0])
                if class_id not in undesired_class_ids1:
                    new_lines.append(line)
        if new_lines:
            with open(label_path, 'w') as f:
                f.writelines(new_lines)
        else:
            os.remove(label_path)
            image_file = label_file.replace('.txt', '.jpg')
            if not os.path.exists(f"{images_dir}/{image_file}"):
                image_file = label_file.replace('.txt', '.png')
            os.remove(f"{images_dir}/{image_file}")

In [None]:
dataset2_path = 'Annotated_pics-2'

with open(f'{dataset2_path}/data.yaml', 'r') as f:
    data2 = yaml.safe_load(f)

class_names2 = data2['names']
print(class_names2)

['gloves', 'hair-net', 'mask', 'no-gloves', 'no-hair-net', 'no-mask']


In [None]:
desired_classes2 = ['hair-net', 'mask']

undesired_class_ids2 = [i for i, cls in enumerate(class_names2) if cls not in desired_classes2]

for split in ['train', 'valid', 'test']:
    labels_dir = f"{dataset2_path}/{split}/labels"
    images_dir = f"{dataset2_path}/{split}/images"
    for label_file in os.listdir(labels_dir):
        label_path = f"{labels_dir}/{label_file}"
        new_lines = []
        with open(label_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                class_id = int(line.split()[0])
                if class_id not in undesired_class_ids2:
                    new_lines.append(line)
        if new_lines:
            with open(label_path, 'w') as f:
                f.writelines(new_lines)
        else:
            os.remove(label_path)
            image_file = label_file.replace('.txt', '.jpg')
            if not os.path.exists(f"{images_dir}/{image_file}"):
                image_file = label_file.replace('.txt', '.png')
            os.remove(f"{images_dir}/{image_file}")

In [None]:
new_class_names = ['human', 'helmet', 'vest', 'hair-net', 'mask']

old_to_new_ids1 = {}
for i, cls in enumerate(class_names1):
    if cls in desired_classes1:
        new_id = new_class_names.index(cls)
        old_to_new_ids1[str(i)] = str(new_id)

for split in ['train', 'valid', 'test']:
    labels_dir = f"{dataset1_path}/{split}/labels"
    for label_file in os.listdir(labels_dir):
        label_path = f"{labels_dir}/{label_file}"
        with open(label_path, 'r') as f:
            lines = f.readlines()
        with open(label_path, 'w') as f:
            for line in lines:
                parts = line.strip().split()
                class_id = parts[0]
                if class_id in old_to_new_ids1:
                    parts[0] = old_to_new_ids1[class_id]
                    f.write(' '.join(parts) + '\n')

old_to_new_ids2 = {}
for i, cls in enumerate(class_names2):
    if cls in desired_classes2:
        new_id = new_class_names.index(cls)
        old_to_new_ids2[str(i)] = str(new_id)

for split in ['train', 'valid', 'test']:
    labels_dir = f"{dataset2_path}/{split}/labels"
    for label_file in os.listdir(labels_dir):
        label_path = f"{labels_dir}/{label_file}"
        with open(label_path, 'r') as f:
            lines = f.readlines()
        with open(label_path, 'w') as f:
            for line in lines:
                parts = line.strip().split()
                class_id = parts[0]
                if class_id in old_to_new_ids2:
                    parts[0] = old_to_new_ids2[class_id]
                    f.write(' '.join(parts) + '\n')


In [None]:
#merge them
import shutil

merged_dataset_path = 'Merged_Dataset'

for split in ['train', 'valid', 'test']:
    images_dir = f"{merged_dataset_path}/{split}/images"
    labels_dir = f"{merged_dataset_path}/{split}/labels"
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    src_images_dir1 = f"{dataset1_path}/{split}/images"
    src_labels_dir1 = f"{dataset1_path}/{split}/labels"
    if os.path.exists(src_images_dir1):
        for file_name in os.listdir(src_images_dir1):
            shutil.copy(f"{src_images_dir1}/{file_name}", images_dir)
        for file_name in os.listdir(src_labels_dir1):
            shutil.copy(f"{src_labels_dir1}/{file_name}", labels_dir)

    src_images_dir2 = f"{dataset2_path}/{split}/images"
    src_labels_dir2 = f"{dataset2_path}/{split}/labels"
    if os.path.exists(src_images_dir2):
        for file_name in os.listdir(src_images_dir2):
            shutil.copy(f"{src_images_dir2}/{file_name}", images_dir)
        for file_name in os.listdir(src_labels_dir2):
            shutil.copy(f"{src_labels_dir2}/{file_name}", labels_dir)

In [None]:
#download the dataset
import shutil
dataset_path = '/content/datasets'
shutil.make_archive('Merged_Dataset', 'zip', dataset_path)


'/content/Merged_Dataset.zip'

In [None]:
#train
!pip install ultralytics
from ultralytics import YOLO
import itertools

param_grid = {
    'lr0': [1e-3, 1e-4],
    'momentum': [0.9, 0.95],
    'weight_decay': [0.0005, 0.0001],
    'batch': [16, 32],
}

keys = param_grid.keys()
values = param_grid.values()
param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

results = []

for params in param_combinations:
    print(f"Training with parameters: {params}")

    model = YOLO('yolov8n.pt')

    model.train(
        data='/content/datasets/Merged_Dataset/data.yaml',
        epochs=50,
        imgsz=640,
        lr0=params['lr0'],
        momentum=params['momentum'],
        weight_decay=params['weight_decay'],
        batch=params['batch'],
        patience=5,
        verbose=False
    )

    metrics = model.val()

    val_map =  metrics.results_dict.get('metrics/mAP50(B)', 0)

    results.append({
        'params': params,
        'val_map': val_map
    })


results_sorted = sorted(results, key=lambda x: x['val_map'], reverse=True)

best_result = results_sorted[0]
print(f"best hyperparametreler: {best_result['params']}")
print(f"best mAP value: {best_result['val_map']}")


Collecting ultralytics
  Downloading ultralytics-8.3.40-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.40-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.5/898.5 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.40 ultralytics-thop-2.0.12
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Training with parameters: {'lr0': 0.001, 'momentum': 0.9, 'weight_decay': 0.0005, 'batch': 16}
Downl

100%|██████████| 6.25M/6.25M [00:00<00:00, 104MB/s]


Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/datasets/Merged_Dataset/data.yaml, epochs=50, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tru

100%|██████████| 755k/755k [00:00<00:00, 22.0MB/s]


Overriding model.yaml nc=80 with nc=5

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 87.6MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/datasets/Merged_Dataset/train/labels... 12128 images, 0 backgrounds, 0 corrupt: 100%|██████████| 12128/12128 [00:10<00:00, 1162.20it/s]


[34m[1mtrain: [0mNew cache created: /content/datasets/Merged_Dataset/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


A new version of Albumentations is available: 1.4.21 (you have 1.4.20). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.
[34m[1mval: [0mScanning /content/datasets/Merged_Dataset/valid/labels... 1134 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1134/1134 [00:01<00:00, 974.76it/s]

[34m[1mval: [0mNew cache created: /content/datasets/Merged_Dataset/valid/labels.cache





Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.9' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      2.21G      1.368      2.129      1.487         57        640: 100%|██████████| 758/758 [01:20<00:00,  9.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:06<00:00,  5.58it/s]


                   all       1134       1706       0.71      0.618      0.675      0.416

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      2.13G      1.308      1.365      1.408         53        640: 100%|██████████| 758/758 [01:15<00:00, 10.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.16it/s]

                   all       1134       1706       0.79      0.578      0.665      0.413






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      2.13G      1.279      1.232      1.386         50        640: 100%|██████████| 758/758 [01:13<00:00, 10.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.16it/s]

                   all       1134       1706      0.801      0.673      0.748      0.465






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      2.13G      1.254      1.147      1.368         44        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.22it/s]

                   all       1134       1706      0.816      0.675      0.762      0.487






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      2.12G        1.2      1.069      1.339         47        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.22it/s]

                   all       1134       1706      0.814      0.735      0.803      0.514






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      2.11G       1.17       1.01      1.311         54        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.21it/s]

                   all       1134       1706      0.831      0.723      0.812      0.523






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      2.07G      1.143      0.968      1.296         50        640: 100%|██████████| 758/758 [01:13<00:00, 10.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.13it/s]

                   all       1134       1706      0.792      0.738        0.8      0.515






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50      2.13G      1.124     0.9482      1.282         41        640: 100%|██████████| 758/758 [01:12<00:00, 10.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.20it/s]

                   all       1134       1706      0.864      0.749      0.825      0.542






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      2.13G       1.11     0.9067      1.268         41        640: 100%|██████████| 758/758 [01:12<00:00, 10.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.31it/s]

                   all       1134       1706      0.814       0.79      0.853      0.558






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      2.11G      1.095     0.8861      1.259         46        640: 100%|██████████| 758/758 [01:12<00:00, 10.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.38it/s]

                   all       1134       1706      0.825      0.791      0.844       0.55






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      2.11G       1.08     0.8644       1.25         57        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.29it/s]

                   all       1134       1706      0.842      0.766      0.846      0.563






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50      2.07G      1.066     0.8539       1.24         42        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.22it/s]

                   all       1134       1706      0.853      0.795      0.856      0.571






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      2.13G       1.06     0.8289      1.234         44        640: 100%|██████████| 758/758 [01:13<00:00, 10.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.29it/s]

                   all       1134       1706      0.854      0.817      0.874       0.57






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      2.13G      1.052     0.8186      1.228         61        640: 100%|██████████| 758/758 [01:12<00:00, 10.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.26it/s]

                   all       1134       1706       0.87      0.808      0.877      0.583






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      2.14G      1.034     0.8008      1.224         57        640: 100%|██████████| 758/758 [01:13<00:00, 10.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.30it/s]

                   all       1134       1706       0.88      0.818      0.885      0.594






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      2.13G      1.028     0.7841      1.216         44        640: 100%|██████████| 758/758 [01:12<00:00, 10.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.35it/s]

                   all       1134       1706      0.844      0.802      0.868      0.586






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50      2.13G       1.02     0.7709      1.208         43        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.24it/s]

                   all       1134       1706       0.85       0.81      0.872      0.584






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50      2.11G      1.015     0.7635      1.209         30        640: 100%|██████████| 758/758 [01:12<00:00, 10.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.30it/s]

                   all       1134       1706      0.852      0.829      0.883      0.603






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50      2.11G      1.007     0.7536      1.199         35        640: 100%|██████████| 758/758 [01:13<00:00, 10.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.19it/s]

                   all       1134       1706      0.843      0.848      0.896       0.61






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50      2.07G     0.9933     0.7329      1.191         47        640: 100%|██████████| 758/758 [01:12<00:00, 10.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.35it/s]

                   all       1134       1706      0.877      0.838      0.898      0.612






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50      2.13G     0.9863     0.7211      1.191         43        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.27it/s]

                   all       1134       1706      0.858      0.837        0.9      0.612






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50      2.13G     0.9803     0.7162      1.179         39        640: 100%|██████████| 758/758 [01:12<00:00, 10.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.31it/s]

                   all       1134       1706      0.859      0.851      0.896       0.62






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50      2.11G     0.9813     0.7035      1.181         42        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.23it/s]

                   all       1134       1706      0.885      0.841      0.897      0.616






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50      2.07G     0.9665     0.6942      1.171         38        640: 100%|██████████| 758/758 [01:13<00:00, 10.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.33it/s]

                   all       1134       1706      0.882      0.848      0.906      0.619






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50      2.13G     0.9668     0.6839      1.172         41        640: 100%|██████████| 758/758 [01:13<00:00, 10.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.26it/s]

                   all       1134       1706      0.906      0.847      0.902      0.629






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50      2.12G     0.9567     0.6816       1.17         40        640: 100%|██████████| 758/758 [01:12<00:00, 10.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.11it/s]

                   all       1134       1706      0.903      0.843      0.914      0.636






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50      2.12G     0.9415     0.6643       1.16         31        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.28it/s]

                   all       1134       1706      0.887      0.847      0.904      0.629






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50      2.11G     0.9433     0.6544      1.159         49        640: 100%|██████████| 758/758 [01:12<00:00, 10.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.30it/s]

                   all       1134       1706      0.902      0.853      0.909      0.629






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50      2.13G     0.9387     0.6511      1.154         40        640: 100%|██████████| 758/758 [01:12<00:00, 10.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.30it/s]

                   all       1134       1706      0.892      0.857      0.912      0.636






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50      2.11G     0.9337     0.6361      1.149         32        640: 100%|██████████| 758/758 [01:12<00:00, 10.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.32it/s]

                   all       1134       1706      0.903      0.851      0.912      0.638






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50      2.12G     0.9197     0.6321      1.142         44        640: 100%|██████████| 758/758 [01:13<00:00, 10.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.18it/s]

                   all       1134       1706      0.906      0.859      0.913      0.639






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50      2.11G     0.9159     0.6215      1.141         40        640: 100%|██████████| 758/758 [01:12<00:00, 10.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.32it/s]

                   all       1134       1706      0.895      0.874       0.92      0.639






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50      2.13G     0.9093     0.6231      1.136         42        640: 100%|██████████| 758/758 [01:12<00:00, 10.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.29it/s]

                   all       1134       1706      0.896      0.867       0.92      0.646






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50      2.13G     0.9051     0.6108      1.135         43        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.12it/s]

                   all       1134       1706      0.909      0.866      0.925       0.65






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50      2.12G     0.8935     0.5959      1.126         51        640: 100%|██████████| 758/758 [01:12<00:00, 10.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.17it/s]

                   all       1134       1706      0.913       0.87      0.923      0.652






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50      2.13G     0.8933     0.5941      1.128         45        640: 100%|██████████| 758/758 [01:13<00:00, 10.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.27it/s]

                   all       1134       1706      0.911      0.867       0.92      0.651






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50      2.11G     0.8846     0.5932      1.125         40        640: 100%|██████████| 758/758 [01:12<00:00, 10.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.20it/s]

                   all       1134       1706      0.908      0.879       0.92      0.652






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50      2.13G     0.8817     0.5832      1.119         40        640: 100%|██████████| 758/758 [01:12<00:00, 10.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.30it/s]

                   all       1134       1706      0.918      0.871      0.923      0.652






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50      2.11G     0.8781     0.5777      1.118         42        640: 100%|██████████| 758/758 [01:12<00:00, 10.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.18it/s]

                   all       1134       1706      0.925      0.873      0.921      0.656






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50      2.13G     0.8741     0.5775      1.116         51        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.17it/s]

                   all       1134       1706      0.915      0.882      0.925      0.655





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50      2.23G     0.8952     0.4871      1.144         24        640: 100%|██████████| 758/758 [01:13<00:00, 10.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.28it/s]

                   all       1134       1706      0.906      0.866       0.92      0.657






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50      2.07G     0.8785     0.4761      1.134         22        640: 100%|██████████| 758/758 [01:13<00:00, 10.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.16it/s]

                   all       1134       1706      0.903      0.884      0.918      0.654






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50      2.07G     0.8663     0.4644      1.127         22        640: 100%|██████████| 758/758 [01:12<00:00, 10.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.32it/s]

                   all       1134       1706      0.911      0.876      0.916      0.659






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50      2.07G     0.8526     0.4575      1.118         24        640: 100%|██████████| 758/758 [01:12<00:00, 10.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.14it/s]

                   all       1134       1706      0.911      0.886      0.916       0.66






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50      2.07G     0.8458     0.4482      1.111         20        640: 100%|██████████| 758/758 [01:12<00:00, 10.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.38it/s]

                   all       1134       1706      0.912      0.872      0.918      0.662






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50      2.07G     0.8356     0.4395      1.103         28        640: 100%|██████████| 758/758 [01:12<00:00, 10.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.26it/s]

                   all       1134       1706      0.917      0.873      0.918      0.661






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50      2.07G     0.8254     0.4348      1.097         23        640: 100%|██████████| 758/758 [01:12<00:00, 10.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.35it/s]

                   all       1134       1706      0.909      0.876       0.92      0.664






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50      2.07G     0.8209     0.4282      1.094         27        640: 100%|██████████| 758/758 [01:12<00:00, 10.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.39it/s]

                   all       1134       1706      0.907      0.878       0.92      0.663






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50      2.07G     0.8097     0.4222      1.087         22        640: 100%|██████████| 758/758 [01:12<00:00, 10.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  7.03it/s]

                   all       1134       1706      0.908      0.881      0.919      0.662






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50      2.07G     0.8108     0.4178      1.084         23        640: 100%|██████████| 758/758 [01:12<00:00, 10.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:04<00:00,  7.33it/s]

                   all       1134       1706      0.908      0.879      0.919      0.662






50 epochs completed in 1.095 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
Model summary (fused): 168 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:05<00:00,  6.09it/s]


                   all       1134       1706      0.914      0.875      0.919      0.664
Speed: 0.2ms preprocess, 0.9ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1mruns/detect/train[0m


AttributeError: 'DetMetrics' object has no attribute 'get'. See valid attributes below.

    Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP) of an
    object detection model.

    Args:
        save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory.
        plot (bool): A flag that indicates whether to plot precision-recall curves for each class. Defaults to False.
        on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
        names (dict of str): A dict of strings that represents the names of the classes. Defaults to an empty tuple.

    Attributes:
        save_dir (Path): A path to the directory where the output plots will be saved.
        plot (bool): A flag that indicates whether to plot the precision-recall curves for each class.
        on_plot (func): An optional callback to pass plots path and data when they are rendered.
        names (dict of str): A dict of strings that represents the names of the classes.
        box (Metric): An instance of the Metric class for storing the results of the detection metrics.
        speed (dict): A dictionary for storing the execution time of different parts of the detection process.

    Methods:
        process(tp, conf, pred_cls, target_cls): Updates the metric results with the latest batch of predictions.
        keys: Returns a list of keys for accessing the computed detection metrics.
        mean_results: Returns a list of mean values for the computed detection metrics.
        class_result(i): Returns a list of values for the computed detection metrics for a specific class.
        maps: Returns a dictionary of mean average precision (mAP) values for different IoU thresholds.
        fitness: Computes the fitness score based on the computed detection metrics.
        ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
        results_dict: Returns a dictionary that maps detection metric keys to their computed values.
        curves: TODO
        curves_results: TODO
    

In [None]:
metrics = model.val()

val_map =  metrics.results_dict.get('metrics/mAP50(B)', 0)

results.append({
        'params': params,
        'val_map': val_map
    })


results_sorted = sorted(results, key=lambda x: x['val_map'], reverse=True)

best_result = results_sorted[0]
print(f"best hyperparametreler: {best_result['params']}")
print(f"best mAP value: {best_result['val_map']}")

Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
Model summary (fused): 168 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /content/datasets/Merged_Dataset/valid/labels.cache... 1134 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1134/1134 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 71/71 [00:07<00:00,  9.51it/s]


                   all       1134       1706      0.909      0.877       0.92      0.664
                 human        300        344      0.927      0.922      0.949      0.885
                helmet         68         88      0.848      0.784      0.848      0.531
                  vest        347        420      0.942      0.936      0.964      0.834
              hair-net        368        386      0.938      0.908      0.952      0.595
                  mask        408        468      0.887      0.833      0.889      0.474
Speed: 0.2ms preprocess, 1.8ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns/detect/val[0m
best hyperparametreler: {'lr0': 0.001, 'momentum': 0.9, 'weight_decay': 0.0005, 'batch': 16}
best mAP value: 0.9203706164163228


best hyperparametreler: {'lr0': 0.001, 'momentum': 0.9, 'weight_decay': 0.0005, 'batch': 16}


best mAP value: 0.9203706164163228

In [None]:
model = YOLO('yolov8n.pt')

model.train(
    data='/content/datasets/Merged_Dataset/data.yaml',
    epochs=60,
    imgsz=640,
    lr0=0.001,
    momentum=0.9,
    weight_decay=0.0005,
    batch=16,
    #class_weights=[1, 1, 1, 2, 1],
    patience=4,
    verbose=True

)

Ultralytics 8.3.40 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA L4, 22700MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/datasets/Merged_Dataset/data.yaml, epochs=60, time=None, patience=4, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tru

[34m[1mtrain: [0mScanning /content/datasets/Merged_Dataset/train/labels.cache... 12128 images, 0 backgrounds, 0 corrupt: 100%|██████████| 12128/12128 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/datasets/Merged_Dataset/valid/labels.cache... 1134 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1134/1134 [00:00<?, ?it/s]


Plotting labels to runs/detect/train8/labels.jpg... 
[Errno 2] No such file or directory: '/content/runs/detect/train8/labels_correlogram.jpg'
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.9' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train8[0m
Starting training for 60 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/60      2.09G      1.644      3.976      1.707         36        640:   0%|          | 2/758 [00:00<01:32,  8.19it/s]Exception in thread Thread-116 (plot_images):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ultralytics/utils/plotting.py", line 1169, in plot_images
    annotator.im.save(fname)  # save
  File "/usr/local/lib/python3.10/dist-packages/PIL/Image.py", line 2600, in save
    fp = builtins.open(filename, "w+b")
FileNotFoundError: [Errno 2] No such file or directory: '/content/runs/detect/train8/train_batch0.jpg'
Exception in thread Thread-117 (plot_images):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run


                   all       1134       1706       0.59      0.559      0.585      0.362


FileNotFoundError: [Errno 2] No such file or directory: 'runs/detect/train8/results.csv'

In [None]:
#test on video
from ultralytics import YOLO
import cv2

model = YOLO('runs/detect/train/weights/best.pt')

conf_threshold = 0.5

video_path = "orj_1.mp4"
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "output_vide4o.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (360, 640))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_resized = cv2.resize(frame, (360, 640))

    results = model.predict(frame_resized, conf=conf_threshold)

    if len(results[0].boxes) > 0:
        print(f"Detections found: {len(results[0].boxes)}")
    else:
        print("No detections.")

    annotated_frame = results[0].plot()
    out.write(annotated_frame)

cap.release()
out.release()
print(f"Processed video saved at {output_path}")

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
0: 640x384 1 mask, 7.9ms
Speed: 1.4ms preprocess, 7.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 7.9ms
Speed: 1.8ms preprocess, 7.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 8.4ms
Speed: 1.8ms preprocess, 8.4ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 8.1ms
Speed: 1.6ms preprocess, 8.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 7.4ms
Speed: 2.0ms preprocess, 7.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 10.3ms
Speed: 1.9ms preprocess, 10.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)
Detections found: 1

0: 640x384 1 mask, 7.7ms
Speed: 1.7ms preprocess, 7.7ms inference, 1.3ms postprocess 