In [1]:
import cv2
import json
import numpy as np
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict, deque
import os
from math import sqrt

In [2]:
#buat video youtube ke m3u8
import yt_dlp

VIDEO_URL = "https://www.youtube.com/watch?v=muijHPW82vI"

ydl_opts = {
    'quiet': True,
    'skip_download': True,
    'force_generic_extractor': False,
    'format': 'best[ext=mp4]',
    'simulate': True,
    'forceurl': True,
    'forcejson': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(VIDEO_URL, download=False)
    m3u8_url = info['url']
    print("Resolved URL:", m3u8_url)


Resolved URL: https://manifest.googlevideo.com/api/manifest/hls_playlist/expire/1754489394/ei/0g2TaKPSAZfc29gP3LzvqAM/ip/202.138.248.119/id/muijHPW82vI.3/itag/96/source/yt_live_broadcast/requiressl/yes/ratebypass/yes/live/1/sgoap/gir%3Dyes%3Bitag%3D140/sgovp/gir%3Dyes%3Bitag%3D137/rqh/1/hls_chunk_host/rr3---sn-2ugxh5a5-cqvl.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/playlist_duration/30/manifest_duration/30/bui/AY1jyLNp6h_N83qLX2JkXuUP_iFnv_bbsTlMiSuCarSzUwds8C3vJEK9TDszqYBFazS5MULGx6q-1sU4/spc/l3OVKd-fC17WkcPjcCkh964O/vprv/1/playlist_type/DVR/initcwndbps/885000/met/1754467795,/mh/EU/mm/44/mn/sn-2ugxh5a5-cqvl/ms/lva/mv/m/mvi/3/pcm2cms/yes/pl/24/rms/lva,lva/dover/11/pacing/0/keepalive/yes/fexp/51355912,51548755/mt/1754467477/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,live,sgoap,sgovp,rqh,xpc,playlist_duration,manifest_duration,bui,spc,vprv,playlist_type/sig/AJfQdSswRAIgF5JQxOaua0s6pi3LDfX7ms5vFQ48on7FkB-ql7e-n9oCICxGGwicHX4pZX2Z2Eh8ZI1awO_Vbc2EaCWE4cA7TSXS/lsparams/hls_chunk_ho

In [3]:
class SmartZoneLoader:

    def load_zone_with_scaling(self, zone_json_path, current_video_path):
        #Auto scale untuk handle different resolutions
        try:
            with open(zone_json_path, 'r') as f:
                zone_data = json.load(f)
    
            if isinstance(zone_data, list):
                return zone_data
            
            original_polygon = zone_data['polygon']
            original_metadata = zone_data.get('video_metadata', {})
            
            cap = cv2.VideoCapture(current_video_path)
            ret, frame = cap.read()
            if not ret:
                cap.release()
                return original_polygon
            
            current_height, current_width = frame.shape[:2]
            cap.release()
            
            original_width = original_metadata.get('width', current_width)
            original_height = original_metadata.get('height', current_height)
            
            if original_width == current_width and original_height == current_height:
                return original_polygon
            
            width_scale = current_width / original_width
            height_scale = current_height / original_height
            
            scaled_polygon = []
            for x, y in original_polygon:
                scaled_x = int(x * width_scale)
                scaled_y = int(y * height_scale)
                scaled_polygon.append((scaled_x, scaled_y))
            
            print(f"Zona discale dari {original_width}x{original_height} ke {current_width}x{current_height}")
            return scaled_polygon
            
        except Exception as e:
            print(f"Error loading zone: {e}")
            return []

class EnhancedEventDetector:
    
    def __init__(self):
        self.person_vehicle_interactions = defaultdict(dict)
        self.vehicle_person_history = defaultdict(list)
        self.person_trajectory_history = defaultdict(lambda: deque(maxlen=10))
        
    def point_in_polygon(self, x, y, polygon):
        return cv2.pointPolygonTest(np.array(polygon, np.int32), (int(x), int(y)), False) >= 0
    
    def calculate_intersection_area(self, box1, box2):
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        
        if x2 <= x1 or y2 <= y1:
            return 0
        
        intersection = (x2 - x1) * (y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        min_area = min(area1, area2)

        return intersection / min_area if min_area > 0 else 0


In [None]:
def realtime_event_detection_enhanced(stream_url, zona_json_paths):
    # Setup GPU/CPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if device == 'cuda':
        print("Using GPU for processing.")

    model = YOLO('yolov8n.pt')
    model.to(device)
    label_map = model.names
    tracker = DeepSort(max_age=30, n_init=3)

    zone_loader = SmartZoneLoader()
    red_zone_polygons = []
    for zona_path in zona_json_paths:
        scaled_zone = zone_loader.load_zone_with_scaling(zona_path, stream_url)
        if scaled_zone:
            red_zone_polygons.append(scaled_zone)
    if not red_zone_polygons:
        print("Tidak ada zona yang valid!")
        return

    detector = EnhancedEventDetector()
    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print(f"Error opening video source: {stream_url}")
        return

    vehicle_frame_buffers = defaultdict(lambda: deque(maxlen=100)) 
    vehicle_states = defaultdict(dict)
    intersection_counter = defaultdict(int)
    saved_events = set()

    output_dir = "event_dataset_enhanced"
    os.makedirs(output_dir, exist_ok=True)

    frame_count = 0
    required_intersection_frames = 90

    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            break

        frame_count += 1
        h_img, w_img = frame.shape[:2]

        results = model(frame, verbose=False)[0]
        detections = []
        for box in results.boxes:
            conf = float(box.conf[0])
            if conf > 0.5:
                cls_id = int(box.cls[0])
                if label_map.get(cls_id) in ["car", "truck", "bus", "person", "motorcycle"]:
                    xyxy = box.xyxy[0].cpu().numpy()
                    x1, y1, x2, y2 = xyxy
                    w, h = x2 - x1, y2 - y1
                    detections.append(([x1, y1, w, h], conf, cls_id))

        tracks = tracker.update_tracks(detections, frame=frame)
        current_vehicles = {}
        current_people = {}

        for track in tracks:
            if not track.is_confirmed():
                continue
            track_id = track.track_id
            ltrb = track.to_ltrb()
            cls_id = track.det_class
            label = label_map.get(cls_id, 'unknown')
            v_cx = (ltrb[0] + ltrb[2]) / 2
            v_cy = (ltrb[1] + ltrb[3]) / 2
            in_red_zone = any(detector.point_in_polygon(v_cx, v_cy, p) for p in red_zone_polygons)

            if label in ["car", "truck", "bus"]:
                if in_red_zone:
                    vehicle_frame_buffers[track_id].append(frame.copy())
                    current_vehicles[track_id] = {'box': ltrb, 'label': label}
                else:
                    vehicle_frame_buffers[track_id].clear()
                    current_vehicles.pop(track_id, None)
            elif label == "person":
                current_people[track_id] = {'box': ltrb}

        event_captured_this_frame = False
        for v_id, v_data in current_vehicles.items():
            if v_id in saved_events:
                continue

            v_box = v_data['box']
            for p_id, p_data in current_people.items():
                p_box = p_data['box']
                intersection_ratio = detector.calculate_intersection_area(p_box, v_box)
                key = (v_id, p_id)

                if intersection_ratio > 0.20:
                    intersection_counter[key] += 1
                else:
                    intersection_counter[key] = 0

                if intersection_counter[key] >= required_intersection_frames:
                    print(f"EVENT DETECTED. Orang {p_id} keluar dari mobil {v_id} dalem RED ZONE.")
                    sequence_to_save = list(vehicle_frame_buffers[v_id])
                    if len(sequence_to_save) > 0:
                        target_frames = 16
                        total_frames = len(sequence_to_save)
                        if total_frames >= target_frames:
                            interval = total_frames // target_frames
                            sampled_indices = [i * interval for i in range(target_frames)]
                        else:
                            sampled_indices = range(total_frames)

                        seq_dir = os.path.join(output_dir, f"event_v{int(v_id):03d}_p{int(p_id):03d}_{frame_count}")
                        os.makedirs(seq_dir, exist_ok=True)
                        for i, idx in enumerate(sampled_indices):
                            img = sequence_to_save[idx]
                            x1, y1, x2, y2 = map(int, v_box)
                            bbox_w = x2 - x1
                            bbox_h = y2 - y1
                            margin_x = int(0.3 * bbox_w)
                            margin_y = int(0.3 * bbox_h)
                            x1_crop = max(0, x1 - margin_x)
                            y1_crop = max(0, y1 - margin_y)
                            x2_crop = min(w_img, x2 + margin_x)
                            y2_crop = min(h_img, y2 + margin_y)
                            crop = img[y1_crop:y2_crop, x1_crop:x2_crop]
                            crop = cv2.resize(crop, (224, 224))
                            cv2.imwrite(os.path.join(seq_dir, f"frame_{i:03d}.jpg"), crop)

                        print(f"✅ Saved {len(sampled_indices)} cropped frames to {seq_dir}")
                        saved_events.add(v_id)
                        vehicle_states[v_id]['status'] = 'event_saved'
                        intersection_counter[key] = 0
                        event_captured_this_frame = True
                        break
            if event_captured_this_frame:
                break

        # Visualisasi zona dan tracking
        for i, poly in enumerate(red_zone_polygons):
            cv2.polylines(frame, [np.array(poly, dtype=np.int32)], True, (0, 0, 255), 3)
            if len(poly) > 0:
                cv2.putText(frame, f"RED ZONE {i+1}", (poly[0][0], poly[0][1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

        for track in tracks:
            if not track.is_confirmed():
                continue
            track_id = track.track_id
            x1, y1, x2, y2 = map(int, track.to_ltrb())
            label = label_map.get(track.det_class, 'unknown')
            status = vehicle_states.get(track_id, {}).get('status')
            color = (0, 255, 0)
            if status == 'event_saved':
                color = (255, 0, 255)
            elif label == 'person':
                color = (255, 100, 100)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            conf = track.get_det_conf()
            label_text = f"ID:{track_id} {label} {conf:.2f}" if conf is not None else f"ID:{track_id} {label}"
            if status:
                label_text += " [EVENT SAVED]"
            text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
            cv2.rectangle(frame, (x1, y1-25), (x1 + text_size[0], y1), color, -1)
            cv2.putText(frame, label_text, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2)

        cv2.rectangle(frame, (10, 10), (400, 120), (0, 0, 0), -1)
        cv2.putText(frame, f"Frame: {frame_count}", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)
        cv2.putText(frame, f"Vehicles tracked: {len(current_vehicles)}", (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)
        cv2.putText(frame, f"People tracked: {len(current_people)}", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)
        cv2.putText(frame, f"Events saved: {len(saved_events)}", (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 1)
        cv2.putText(frame, f"Device: {device.upper()}", (20, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)

        cv2.imshow('Deteksi Parkir V2', frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or (event_captured_this_frame and False):
            break
        elif key == ord('r'):
            saved_events.clear()
            vehicle_states.clear()
            intersection_counter.clear()
            print("Reset saved events and vehicle states.")

    cap.release()
    cv2.destroyAllWindows()
    print(f"\nSelesai deteksi. Total events saved: {len(saved_events)}")


In [32]:

def run_detection(video_path, zone_json_paths):
    if isinstance(zone_json_paths, str):
        zone_json_paths = [zone_json_paths]
    
    realtime_event_detection_enhanced(video_path, zone_json_paths)

In [33]:
VIDEO_PATH = "C:/Users/andre/Documents/Binus/Lomba/compfest/vietnam7.mp4"
#VIDEO_PATH = m3u8_url
ZONE_JSON = ["zona_enhanced.json", "zona_vietnam1.json"]

print("Mulai deteksi")
run_detection(VIDEO_PATH, ZONE_JSON)

Mulai deteksi
Using GPU for processing.
Zona discale dari 1536x864 ke 1920x1080
EVENT DETECTED. Orang 42 keluar dari mobil 1 dalem RED ZONE.
✅ Saved 16 cropped frames to event_dataset_enhanced\event_v001_p042_373

Selesai deteksi. Total events saved: 1
