In [2]:
import cv2
import os
import numpy as np
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict, deque
from math import sqrt
import json

In [2]:
#buat video youtube ke m3u8
import yt_dlp

VIDEO_URL = "https://www.youtube.com/watch?v=muijHPW82vI"

ydl_opts = {
    'quiet': True,
    'skip_download': True,
    'force_generic_extractor': False,
    'format': 'best[ext=mp4]',
    'simulate': True,
    'forceurl': True,
    'forcejson': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(VIDEO_URL, download=False)
    m3u8_url = info['url']
    print("Resolved URL:", m3u8_url)


Resolved URL: https://manifest.googlevideo.com/api/manifest/hls_playlist/expire/1754082386/ei/8teMaPTSBq2RssUPoM-40A4/ip/103.19.109.10/id/muijHPW82vI.3/itag/96/source/yt_live_broadcast/requiressl/yes/ratebypass/yes/live/1/sgoap/gir%3Dyes%3Bitag%3D140/sgovp/gir%3Dyes%3Bitag%3D137/rqh/1/hls_chunk_host/rr2---sn-oxujpup2xgn5q5-jb3e.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/playlist_duration/30/manifest_duration/30/bui/AY1jyLOOcrNeWajyVBpsQN4R46RyPd7YAV1qDmmdP-wfbLcB5BbX6PCYQBvxpcTCMtAaUWYWksbfxAz8/spc/l3OVKVBmqVkBy3fN_OTYz2ma/vprv/1/playlist_type/DVR/initcwndbps/1242500/met/1754060787,/mh/EU/mm/44/mn/sn-oxujpup2xgn5q5-jb3e/ms/lva/mv/m/mvi/2/pl/24/rms/lva,lva/dover/11/pacing/0/keepalive/yes/fexp/51355912/mt/1754060430/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,live,sgoap,sgovp,rqh,xpc,playlist_duration,manifest_duration,bui,spc,vprv,playlist_type/sig/AJfQdSswRQIgYpPPOAmtBbcd5wbFR_AoBsoVrrg-hfwiKkcQkCkAI08CIQDrEW8HSrYUeAM3rUEDzAs5rqyCbZ4vsRTelYDeGVyJJQ%3D%3D/lsparams/hls_chunk_host

In [6]:
def point_in_polygon(x, y, polygon):
    return cv2.pointPolygonTest(np.array(polygon, np.int32), (int(x), int(y)), False) >= 0

def boxes_overlap(person_box, vehicle_box):
    px_center = (person_box[0] + person_box[2]) / 2
    py_center = (person_box[1] + person_box[3]) / 2
    
    vx1, vy1, vx2, vy2 = vehicle_box
    
    if vx1 < px_center < vx2 and vy1 < py_center < vy2:
        return True
    return False

def realtime_event_detection(stream_url, zona_json):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = YOLO('yolo11n.pt')
    model.to(device)
    label_map = model.names
    tracker = DeepSort(max_age=30)
    
    with open(zona_json, 'r') as f:
        red_zone_polygon = json.load(f)

    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print(f"Error opening video source: {stream_url}")
        return

    vehicle_frame_buffers = defaultdict(lambda: deque(maxlen=120)) 
    vehicle_positions = {} 
    saved_events = set()

    output_dir = "event_dataset"
    os.makedirs(output_dir, exist_ok=True)

    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            break

        results = model(frame, verbose=False)[0]
        detections = []
        for box in results.boxes:
            conf = float(box.conf[0])
            if conf > 0.45: 
                cls_id = int(box.cls[0])
                if label_map.get(cls_id) in ["car", "truck", "bus", "person"]:
                    xyxy = box.xyxy[0].cpu().numpy()
                    x1, y1, x2, y2 = xyxy
                    w, h = x2 - x1, y2 - y1
                    detections.append(([x1, y1, w, h], conf, cls_id))
        
        tracks = tracker.update_tracks(detections, frame=frame)

        current_vehicles = {}
        current_people = {}

        for track in tracks:
            if not track.is_confirmed():
                continue

            track_id = track.track_id
            ltrb = track.to_ltrb()
            cls_id = track.det_class
            label = label_map.get(cls_id, 'unknown')

            x1, y1, x2, y2 = map(int, ltrb)
            color = (0, 255, 0) if "car" in label else (255, 100, 100)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            label_text = f"ID:{track_id}"
            cv2.putText(frame, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
            

            if label in ["car", "truck", "bus"]:
                vehicle_frame_buffers[track_id].append(frame.copy())
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)
                vehicle_positions[track_id] = (cx, cy)
                current_vehicles[track_id] = {'box': ltrb}
            
            elif label == "person":
                current_people[track_id] = {'box': ltrb}

        #deteksi overlap
        for p_id, p_data in current_people.items():
            for v_id, v_data in current_vehicles.items():
                if v_id in saved_events:
                    continue
                
                # Cek kalo mobil ada di zona ilegal
                v_cx, v_cy = vehicle_positions[v_id]
                if not point_in_polygon(v_cx, v_cy, red_zone_polygon):
                    continue

                # kalo box orang dan mobil overlap
                if boxes_overlap(p_data['box'], v_data['box']):
                    print(f" Orang {p_id} overlap sama vehicle {v_id}.")
                    
                    # Ambil 120 frame sebelum 
                    sequence_to_save = list(vehicle_frame_buffers[v_id])
                    
                    if len(sequence_to_save) > 0:
                        seq_dir = os.path.join(output_dir, f"event_{int(v_id):04d}")
                        os.makedirs(seq_dir, exist_ok=True)
                        
                        vx1, vy1, vx2, vy2 = map(int, v_data['box'])

                        margin_w = int((vx2 - vx1) * 0.30) 
                        margin_h = int((vy2 - vy1) * 0.30)

                        h_img, w_img, _ = frame.shape
                        x1_crop = max(0, vx1 - margin_w)
                        y1_crop = max(0, vy1 - margin_h)
                        x2_crop = min(w_img, vx2 + margin_w)
                        y2_crop = min(h_img, vy2 + margin_h)
                        
                        #simplify ke 16 frames yang merepresentasikan semua clip
                        total_frames_in_buffer = len(sequence_to_save)
                        target_frames = 16
                        
                        if total_frames_in_buffer >= target_frames:
                            interval = total_frames_in_buffer // target_frames
                            sampled_indices = [i * interval for i in range(target_frames)]
                        else:
                            sampled_indices = range(total_frames_in_buffer)

                        for i, frame_index in enumerate(sampled_indices):
                            img_frame = sequence_to_save[frame_index]
                            
                            crop = img_frame[y1_crop:y2_crop, x1_crop:x2_crop]
                            if crop.shape[0] > 0 and crop.shape[1] > 0:
                                crop_resized = cv2.resize(crop, (224, 224))
                                cv2.imwrite(os.path.join(seq_dir, f"frame_{i:02d}.jpg"), crop_resized)
                        
                        print(f"Saved {len(sequence_to_save)} frames to {seq_dir}")
                        saved_events.add(v_id)
                    
                    break 
            
        cv2.polylines(frame, [np.array(red_zone_polygon, dtype=np.int32)], isClosed=True, color=(0, 0, 255), thickness=2)
        cv2.imshow('Deteksi Event', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [7]:
data_video = "test.mp4" 
data_zona = "zona_vietnam2.json"
realtime_event_detection(data_video, data_zona)

 Orang 13 overlap sama vehicle 4.
Saved 120 frames to event_dataset/event_0004


KeyboardInterrupt: 