In [2]:
# YOLOv8s-TestingEnv.ipynb
from ultralytics import YOLO
import cv2
from datetime import datetime
from matplotlib import pyplot as plt
import os
import time
import numpy as np
import torch

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"   # avoids MKL crashes on some Windows setups
torch.set_num_threads(1)                      # keeps CPU thread count sane for Jupyter

# show bounding-boxes only when confidence >= confMin
confMin = 0.45

# #Click-to-Focus settings
# FOCUS_SECONDS = 1.0          # how long to keep focus after a click
# FOCUS_SIZE_FRAC = 0.18       # ROI box width/height as a fraction of min(frame_w, frame_h)
# _click_until_ts = 0.0
# _click_xy = None
# _click_log = []              # <<< NEW: collect click logs

# # Mouse callback
# def _mouse_cb(event, x, y, flags, param):
#     global _click_until_ts, _click_xy
#     if event == cv2.EVENT_LBUTTONDOWN:
#         _click_xy = (x, y)
#         _click_until_ts = time.time() + FOCUS_SECONDS
#         ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
#         _click_log.append((ts, x, y))   # store; we’ll flush to file each frame
#         print(f"[click] {ts}  x={x} y={y}")
#         # visual feedback: small crosshair will be drawn in the loop
#         # (no blocking here)


# Load trained model from YOLOv8s-Trainable
model = YOLO("D:/Anaconda-Projects/yolo_env/runs/train/drone_detector_50epoch/weights/best.pt")

# Choose a video to test
video_path = "D:/Anaconda-Projects/yolo_env/Drone-Dataset/archive/15MAY22 UAV Videos 4k/20220121_110507_VIS_H264.MOV"
#Create output directory for metrics inside trained model
metrics_dir = "D:/Anaconda-Projects/yolo_env/runs/train/drone_detector_50epoch/Metrics-Testing"
timestamp = datetime.now().strftime("%m%d%Y_%H%M%S")
session_dir = os.path.join(metrics_dir, f"metrics_{timestamp}")
os.makedirs(session_dir, exist_ok=True)
#click_log_path = os.path.join(session_dir, "click_log.txt")

#Open video file
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
     print(f"Failed to open video: {video_path}")
     exit()

# Get video properties for saving output
# fps = int(cap.get(cv2.CAP_PROP_FPS))
# width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define codec and create VideoWriter
# output_video_path = os.path.join(output_dir, f"tracked_output_{timestamp}.mp4")
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
# out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    
print("Running YOLOv8 on your video video. Press 'Q' or close window to exit.")
# print(f"Output will be saved to: {output_dir}")

# Create window
cv2.namedWindow("YOLOv8 Drone Tracking", cv2.WINDOW_NORMAL)
cv2.resizeWindow("YOLOv8 Drone Tracking", 960, 540)

frame_count = 0
total_detections = 0

# BEFORE the loop
start_time = time.time()
out = None
preview_of_video = os.path.join(session_dir, f"preview_of_video{timestamp}.mov")

try:
    while vid.isOpened():
        ret, frame = vid.read()
        if not ret:
            break
        h, w = frame.shape[:2]

        # lazy-init preview writer once we know FPS and size
        if out is None:
            fps = vid.get(cv2.CAP_PROP_FPS)
            if not fps or fps <= 0:
                fps = 60.0  # fallback if source FPS not reported
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # works on most OpenCV builds for .mov
            out = cv2.VideoWriter(preview_of_video, fourcc, fps, (w, h))

        # Build focused frame if click is active
        #use_focus = time.time() < _click_until_ts and _click_xy is not None
        # if use_focus:
        #     fx, fy = _click_xy
        #     side = int(min(w, h) * FOCUS_SIZE_FRAC)
        #     x0 = max(fx - side // 2, 0)
        #     y0 = max(fy - side // 2, 0)
        #     x1 = min(x0 + side, w)
        #     y1 = min(y0 + side, h)
        #     # keep only ROI content; black everywhere else (same size image)
        #     focused = np.zeros_like(frame)
        #     focused[y0:y1, x0:x1] = frame[y0:y1, x0:x1]
        #     source_img = focused
        # else:
        source_img = frame
        #Run tracking on frame/interface
        results = model.track(
            # Tracker configuration YAML file (controls which algorithm to use).
            tracker="bytetrack.yaml",   # Options: 'bytetrack.yaml', 'botsort.yaml', 'ocsort.yaml'or 'botsort.yaml'
    
            # Path to the video, image folder, webcam index (0), or stream URL.
            source=source_img, # Examples: "video.mp4", 0, "rtsp://...", "folder_of_images/"
    
            # Verbosity control: print logs (True) or suppress (False).
            verbose=True,
    
            # Whether to display the results in a pop-up OpenCV window.
            show=False,
    
            # shows frames in a pop-up window
            # When True, saves to: runs/track/{name}/
            save=False,

            persist=True,                   # keeps tracking IDs between frames
    
            # saves output under runs/detect/
            # Minimum confidence threshold for detection (0.0–1.0).
            # Detections below this confidence are ignored.
            conf=confMin,                        # confidence threshold
    
            # Set image size for inference. Smaller = faster but less accurate.
            # Typical values: 640, 720, 1080
            #imgsz=640,                       #smaller frame size
    
            # IoU (Intersection over Union) threshold for non-max suppression (0.0–1.0).
            # Higher = fewer overlapping boxes, lower = more.
            #iou=0.7,
    
            # Directory where outputs (video, logs, etc.) are stored.
            project="D:/Anaconda-Projects/yolo_env/runs/train/drone_detector",
    
            # Name of the subfolder under 'project' where this run’s files go.
            # Combined path example: D:/Anaconda-Projects/YOLO-Outputs/drone_tracking/
            name="drone_tracking",
    
            # If True, overwrites existing folders with the same name instead of creating 'drone_tracking2', etc.
            exist_ok=False,
    
            # Use GPU acceleration if available (default True). Set False for CPU-only.
            device="cuda:0",  # examples: 'cpu', 'cuda:0', 'cuda:1'
            # Whether to visualize model predictions inline (useful in notebooks).
            # Usually disabled when 'show=True'.
            visualize=False,
    
            # Whether to return tracking results as a generator instead of accumulating all in memory.
            # Prevents out-of-memory errors for long videos or streams.
            stream=False,
    
            # Maximum number of detections per image.
            max_det=300,
    
            # Classes to detect (list of class IDs). e.g., [0] for 'person', [2, 3, 5] for specific objects.
            # None = detect all classes.
            classes=None,
    
            # Enable or disable saving of cropped detections.
            save_crop=False,
    
            # Save text labels (bounding box coordinates and class IDs) to *.txt files.
            save_txt=False,
    
            # Line thickness for bounding boxes in the display.
            line_width=2,
    
            # Whether to automatically open the output folder after completion.
            show_labels=False,
    
            # Control tracker-specific behavior (inside the YAML config).
            # Examples: tracking buffer size, re-ID thresholds, frame rate smoothing, etc.
            )
        #Extra safety: filter any boxes below confMin (some trackers can pass along extra boxes)
        r = results[0]
        if r.boxes is not None and len(r.boxes) > 0:
            confs = r.boxes.conf
            keep = confs >= confMin
            r.boxes = r.boxes[keep]

        # # Draw on the ORIGINAL frame
        # annotated = frame.copy()
        # if r.boxes is not None and len(r.boxes) > 0:
        #     names = r.names if hasattr(r, "names") else {}
        #     for b in r.boxes:
        #         x1, y1, x2, y2 = map(int, b.xyxy[0].tolist())
        #         conf = float(b.conf[0])
        #         cls_id = int(b.cls[0]) if b.cls is not None else -1
        #         label = f"{names.get(cls_id, 'obj')} {conf:.2f}"
        #         cv2.rectangle(annotated, (x1, y1), (x2, y2), (0, 255, 0), 2)
        #         cv2.putText(annotated, label, (x1, max(10, y1 - 6)),
        #                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
        # # crosshair to show focus point (optional)
        # if use_focus and _click_xy is not None:
        #     fx, fy = _click_xy
        #     cv2.drawMarker(annotated, (fx, fy), (255, 255, 0),
        #                    markerType=cv2.MARKER_CROSS, markerSize=12, thickness=2)
         # Draw results
        annotated = results[0].plot()
        cv2.imshow("YOLOv8 Drone Tracking", annotated)

        # write annotated frame to preview
        if out is not None:
            out.write(annotated)
        
        # # Write any new click entries to file (non-blocking)
        # if _click_log:
        #     with open(click_log_path, "a") as f:
        #         while _click_log:
        #             ts, cx, cy = _click_log.pop(0)
        #             roi_txt = f"{roi_box}" if roi_box else "None"
        #             f.write(f"{ts}, click=({cx},{cy}), roi={roi_txt}\n")

        # Collect metrics
        frame_count += 1
        total_detections += (len(r.boxes) if r.boxes is not None else 0)
        
        # Visualize results on frame
        #frame = results[0].plot()
        
        # Resize for display
        #display_frame = cv2.resize(annotated_frame, (960, 540))
        
        # Write frame to output video
        #out.write(annotated_frame)
        
        # Display frame
        #cv2.imshow("YOLOv8 Drone Tracking", display_frame)
        
        # Break on 'Q' press or window close
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or cv2.getWindowProperty("YOLOv8 Drone Tracking", cv2.WND_PROP_VISIBLE) < 1:
            break
except Exception as e:
    print(f"An error occurred: {e}")

finally:
    #Cleanup
    vid.release()
    if out is not None:
        out.release()
    cv2.destroyAllWindows()

    end_time = time.time()
    
    # Compute runtime stats
    total_time = end_time - start_time

    avg_fps = frame_count / total_time if total_time > 0 else 0
    avg_detections = total_detections / frame_count if frame_count > 0 else 0

    #session_metrics = os.path.join(session_dir, "metrics-of-video.txt")
    tracker_used = "bytetrack.yaml"      # same one you pass to model.track()
    conf_threshold = confMin                # same confidence level
    device_used = "cuda:0"               # same device
    img_size = 640                       # model input size

    # # Save metrics
    metrics_file = os.path.join(session_dir, "metrics-of-video.txt")
    
    with open(metrics_file, "w") as f:
        f.write(f"Video: {os.path.basename(video_path)}\n")
        f.write(f"Frames processed: {frame_count}\n")
        f.write(f"Total detections: {total_detections}\n")
        f.write(f"Average detections per frame: {avg_detections:.2f}\n")
        f.write(f"Average FPS: {avg_fps:.2f}\n")
        f.write(f"Total runtime (s): {total_time:.2f}\n")
        f.write(f"Model path: {model.ckpt_path if hasattr(model, 'ckpt_path') else 'N/A'}\n")
        f.write(f"Model name: {os.path.basename(model.ckpt_path) if hasattr(model, 'ckpt_path') else 'best.pt'}\n")
        f.write(f"Tracker used: {os.path.basename(tracker_used)}\n")
        f.write(f"Confidence threshold: {conf_threshold}\n")
        f.write(f"Device: {device_used}\n")
        f.write(f"Image size: {img_size}\n")
        f.write(f"Timestamp: {timestamp}\n")
        #f.write(f"ClickLog: {click_log_path}\n")

    print("\n Tracking complete!")
    print(f"Metrics saved to: {metrics_file}")
    print(f"Preview video saved to: {preview_of_video}")
    #print(f"Click log saved to: {click_log_path}")
    # print(f"Video processing complete!")
    # print(f"Output video saved to: {output_video_path}")
    # print(f"Metrics directory: {output_dir}")



# Define video writer
#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#out = cv2.VideoWriter('D:/Anaconda-Projects/yolo_env/runs/train/drone_detector/YOLOv8s-outputTesting.mp4', 
#                      fourcc, 
#                     30.0, 
#                      (int(cap.get(3)), int(cap.get(4))))
        # Load your trained model
        #model = YOLO("D:/Anaconda-Projects/yolo_env/runs/train/drone_detector/weights/best.pt")
        
        # Run detection
        # results = model.predict(
        #     source="Drone-Dataset/archive/15MAY22 UAV Videos 4k/20220121_101736_VIS_H264.MOV",
        #     conf=0.25,
        #     show=True
        # )
    
# annotated = results[0].plot()
# resized = cv2.resize(annotated, (960, 540))
# cv2.imshow("YOLOv8s Drone Tracking", resized)
    
# Press 'q' or close window to exit safely
# key = cv2.waitKey(1) & 0xFF
# if key == ord('q') or cv2.getWindowProperty("YOLOv8s Drone Tracking", cv2.WND_PROP_VISIBLE) < 1:
#     break


#plt.imshow(results[0].plot()) # draws bounding boxes
#plt.axis('off')
#plt.show()
#results[0].boxes.data


Running YOLOv8 on your video video. Press 'Q' or close window to exit.

0: 384x640 (no detections), 10.0ms
Speed: 35.1ms preprocess, 10.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 8.6ms
Speed: 2.0ms preprocess, 8.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 8.3ms
Speed: 1.7ms preprocess, 8.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 8.7ms
Speed: 1.7ms preprocess, 8.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 9.2ms
Speed: 1.8ms preprocess, 9.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 8.7ms
Speed: 1.9ms preprocess, 8.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 10.1ms
Speed: 1.9ms preprocess, 10.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384