In [4]:
import cv2
import os
import numpy as np
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict
from math import sqrt

In [3]:
#buat video youtube ke m3u8
import yt_dlp

VIDEO_URL = "https://www.youtube.com/watch?v=muijHPW82vI"

ydl_opts = {
    'quiet': True,
    'skip_download': True,
    'force_generic_extractor': False,
    'format': 'best[ext=mp4]',
    'simulate': True,
    'forceurl': True,
    'forcejson': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(VIDEO_URL, download=False)
    m3u8_url = info['url']
    print("Resolved URL:", m3u8_url)


Resolved URL: https://manifest.googlevideo.com/api/manifest/hls_playlist/expire/1754061540/ei/hIaMaKreHZWc4t4PzNLpmAs/ip/202.138.248.119/id/muijHPW82vI.3/itag/96/source/yt_live_broadcast/requiressl/yes/ratebypass/yes/live/1/sgoap/gir%3Dyes%3Bitag%3D140/sgovp/gir%3Dyes%3Bitag%3D137/rqh/1/hls_chunk_host/rr3---sn-2ugxh5a5-cqvl.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/playlist_duration/30/manifest_duration/30/bui/AY1jyLOqGvY94QbKcT7NH2innxMiuP-pNUHQNQRJ7CH5e3n1l9YgnyfU1rnXlavtO1HkFICSHn2grbNq/spc/l3OVKQtdpv-89nqhoouhBylL/vprv/1/playlist_type/DVR/initcwndbps/740000/met/1754039942,/mh/EU/mm/44/mn/sn-2ugxh5a5-cqvl/ms/lva/mv/m/mvi/3/pl/24/rms/lva,lva/dover/11/pacing/0/keepalive/yes/fexp/51355912/mt/1754039791/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,live,sgoap,sgovp,rqh,xpc,playlist_duration,manifest_duration,bui,spc,vprv,playlist_type/sig/AJfQdSswRQIgfvAPOmH8fS6x5SiVZsIyzA_kQap0fyPsbQ5ASMa-j4cCIQD6wAphC3Tq9gno1vrWKD6KK0nLqD2sGe_rH87NX2noqw%3D%3D/lsparams/hls_chunk_host,initcwndbp

In [None]:
from collections import defaultdict
import json

frame_buffer = defaultdict(list)
saved_tracks = set()

# Cell 2: Polygon helper
def point_in_polygon(x, y, polygon):
    return cv2.pointPolygonTest(np.array(polygon, np.int32), (int(x), int(y)), False) >= 0

# Cell 3: Main detection with red zone
def realtime_detection_with_redzone(stream_url, zona_json):
    colors = {
        "car": (255, 0, 0),
        "truck": (255, 0, 0),
        "bus": (255, 0, 0),
        "motorcycle": (0, 255, 0),
        "bicycle": (0, 255, 0),
        "person": (0, 0, 255),
    }
    duration = {}
    updated_position = {}
    frame_buffers = defaultdict(list)
    saved_tracks = set()

    os.makedirs("behavior_dataset", exist_ok=True)

    with open(zona_json, 'r') as f:
        red_zone_polygon = json.load(f)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = YOLO('yolo11n.pt')
    model.to(device)
    label_map = model.names

    tracker = DeepSort(max_age=30)

    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened():
        print("Error opening video stream.")
        return

    frame_counter = 0
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("Stream ended or invalid frame.")
            break
        frame_counter += 1

        results = model(frame)[0]
        detections = []

        for box in results.boxes:
            cls_id = int(box.cls[0])
            conf = float(box.conf[0])
            xyxy = box.xyxy[0].cpu().numpy()
            x1, y1, x2, y2 = xyxy
            w, h = x2 - x1, y2 - y1
            x_center, y_center = x1, y1

            detections.append(([x_center, y_center, w, h], conf, cls_id))

        try:
            tracks = tracker.update_tracks(detections, frame=frame)
        except Exception as e:
            print("Tracking error:", e)
            continue

        for track in tracks:
            if not track.is_confirmed():
                continue

            track_id = track.track_id
            ltrb = track.to_ltrb()
            x1, y1, x2, y2 = map(int, ltrb)

            cls_id = track.det_class
            cls_conf = track.det_conf
            label = label_map.get(cls_id, 'unknown')
            if cls_conf is None or label not in colors:
                continue

            cx = int((x1 + x2) / 2) # center x
            cy = int((y1 + y2) / 2) # center y

            in_red_zone = point_in_polygon(cx, cy, red_zone_polygon)

            label_text = f"{label} {track_id} {cls_conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), colors[label], 2)
            cv2.putText(frame, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[label], 2)
            cv2.circle(frame, (cx, cy), 5, colors[label], -1)

            if in_red_zone:
                if track_id not in duration:
                    duration[track_id] = 0
                    if track_id not in updated_position:
                        updated_position[track_id] = (cx, cy)
                else:
                    # check if the vehicle is static or not (use euclidean distance)
                    if frame_counter % 30 == 0:
                        if sqrt((cx - updated_position[track_id][0]) ** 2 + (cy - updated_position[track_id][1]) ** 2) < 10:
                            duration[track_id] += 1
                        else:
                            duration[track_id] = 0
                        updated_position[track_id] = (cx, cy)
                # === CROP DAN SIMPAN SEQUENCE ===
                if duration[track_id] >= 5 and track_id not in saved_tracks:
                    margin = 20
                    h_img, w_img, _ = frame.shape
                    x1_crop = max(0, x1 - margin)
                    y1_crop = max(0, y1 - margin)
                    x2_crop = min(w_img, x2 + margin)
                    y2_crop = min(h_img, y2 + margin)
                    crop = frame[y1_crop:y2_crop, x1_crop:x2_crop]
                    crop = cv2.resize(crop, (224, 224))
                    frame_buffers[track_id].append(crop)

                    if len(frame_buffers[track_id]) == 30:
                        seq_dir = f"behavior_dataset/seq_{int(track_id):04d}"
                        os.makedirs(seq_dir, exist_ok=True)
                        for i, img in enumerate(frame_buffers[track_id]):
                            cv2.imwrite(os.path.join(seq_dir, f"frame_{i:02d}.jpg"), img)
                        print(f"Saved sequence: {seq_dir}")
                        saved_tracks.add(track_id)

                cv2.putText(frame, f"Time {duration[track_id] // 10}s", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
                        
        cv2.polylines(frame, [np.array(red_zone_polygon, dtype=np.int32)], isClosed=True, color=(0, 0, 255), thickness=2)

        cv2.imshow(f'Deteksi CCTV Real-time - ParkLens AI - {device.upper()}', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

data_video = m3u8_url
data_zona = "zona_vietnam2.json"
realtime_detection_with_redzone(data_video, data_zona)

  self.model.load_state_dict(torch.load(model_wts_path))



0: 384x640 2 persons, 3 cars, 1 motorcycle, 49.8ms
Speed: 9.9ms preprocess, 49.8ms inference, 20.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 cars, 1 motorcycle, 18.3ms
Speed: 3.0ms preprocess, 18.3ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 cars, 1 motorcycle, 28.1ms
Speed: 3.1ms preprocess, 28.1ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 3 cars, 3 motorcycles, 21.0ms
Speed: 2.9ms preprocess, 21.0ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 motorcycle, 21.0ms
Speed: 2.9ms preprocess, 21.0ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 2 motorcycles, 15.7ms
Speed: 3.0ms preprocess, 15.7ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 4 motorcycles, 64.8ms
Speed: 4.1ms preprocess, 64.8ms inference, 3.5ms pos

ValueError: Unknown format code 'd' for object of type 'str'