# UAV Detection and Tracking

Multi-Object Tracking (MOT) is a core visual ability that humans poses
to perform kinetic tasks and coordinate other tasks. The AI community
has recognized the importance of MOT via a series of
[competitions](https://motchallenge.net).

In this assignment, the object class is `drone` and the ability to track
this object will be demonstrated using [Kalman
Filters](https://en.wikipedia.org/wiki/Kalman_filter). The assignment
will give you the opportunity to apply probabilistic reasoning in the
physical security application space.

## Task 1: Setup your development environment and store the test videos locally (10 points)

In [1]:
!pip install ultralytics
!nvidia-smi
from ultralytics import YOLO
import ultralytics
import os
import cv2
ultralytics.checks()

#model = YOLO('yolov8n.pt')

Ultralytics YOLOv8.0.212 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 26.9/107.7 GB disk)


In [2]:
!pip install pytube
from pytube import YouTube
import os

VIDEO_FOLDER = "droneVids"
vidLink1 = YouTube("https://www.youtube.com/watch?v=DhmZ6W1UAv4")
vidLink2 = YouTube("https://www.youtube.com/watch?v=YrydHPwRelI")

if not os.path.exists(VIDEO_FOLDER):
    os.makedirs(VIDEO_FOLDER)

stream1 = vidLink1.streams.get_by_itag(22)
stream2 = vidLink2.streams.get_by_itag(22)

stream1.download("droneVids")
stream2.download("droneVids")

import cv2

#this variable downloads a frame every downloadOnWhats frame
#think animating on 15s, or animating on 4s
#this is to prevent neural networking literally tens of
#thousands of images
"""downloadOnWhats = 15

for filename in os.listdir("droneVids"):
    f = os.path.join("droneVids", filename)
    if os.path.isfile(f):
        print(f)
    capture = cv2.VideoCapture(f)
    fDir, nothin = os.path.splitext(f)
    if not os.path.exists(fDir):
            os.makedirs(fDir)
    frameNr = 0
    count = 1
    formNr = "{:05d}".format(count)
    while (True):
        success, frame = capture.read()
        if(frameNr % downloadOnWhats == 0):
            if success:
                cv2.imwrite(f'{fDir}/frame_{formNr}.jpg', frame)
                count += 1
            else:
                break
        frameNr = frameNr+1
        formNr = "{:05d}".format(count)

    capture.release()"""

Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/57.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0


'downloadOnWhats = 15\n\nfor filename in os.listdir("droneVids"):\n    f = os.path.join("droneVids", filename)\n    if os.path.isfile(f):\n        print(f)\n    capture = cv2.VideoCapture(f)\n    fDir, nothin = os.path.splitext(f)\n    if not os.path.exists(fDir):\n            os.makedirs(fDir)\n    frameNr = 0\n    count = 1\n    formNr = "{:05d}".format(count)\n    while (True):\n        success, frame = capture.read()\n        if(frameNr % downloadOnWhats == 0):\n            if success:\n                cv2.imwrite(f\'{fDir}/frame_{formNr}.jpg\', frame)\n                count += 1\n            else:\n                break\n        frameNr = frameNr+1\n        formNr = "{:05d}".format(count)\n\n    capture.release()'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

import zipfile
with zipfile.ZipFile("/content/gdrive/MyDrive/DroneDataset.zip", "r") as zip_ref:
  zip_ref.extractall("/content/DroneDataset")


Mounted at /content/gdrive


## Task 1: Drone Object Detection (40 points)

You need to research can use any dataset that can be used to detect the
class `drone` such as the drones used for the test videos. Please be
careful to distinguish between the datasets that detect objects *from*
drones to datasets that detect *the* drones. Your object detector must
use a deep learning model but you can use an existing object detector
model architecture.

Split the videos into frames and use each frame to present the drone
detections you got. Store all images that you had detections in a folder
called `detections`. Write your code in such a way that a number of
videos can be processed from a directory and not just these two.

In [5]:
!yolo task=detect mode=train model=yolov8s.pt data=/content/DroneDataset/drone_dataset/data.yaml epochs=3 plots=True

Ultralytics YOLOv8.0.212 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/content/DroneDataset/drone_dataset/data.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=t

In [7]:
model = YOLO('/content/runs/detect/train2/weights/best.pt')

resultGenerator = model(source, save=True, stream=True)

NameError: ignored

In [None]:
from google.colab.patches import cv2_imshow

if not os.path.exists("detections"):
    os.makedirs("detections")

# Load the YOLOv8 model
model = YOLO('/content/runs/detect/train2/weights/best.pt')

for filename in os.listdir('/content/droneVids'):
    video_path = os.path.join('/content/droneVids', filename)

    if os.path.isfile(video_path):
        dirName, nothin = os.path.splitext(filename)
        downloadDir = os.path.join('/content/detections', dirName)
        print(video_path)
        if not os.path.exists(downloadDir):
            os.makedirs(downloadDir)
        cap = cv2.VideoCapture(video_path)


        frameNr = 0
        # Loop through the video frames
        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()
            frameNr += 1
            if success:
                # Run YOLOv8 inference on the frame
                results = (model(frame))[0]

                for classes in results.boxes:
                    if(classes.cls == 0 and classes.conf >= 0.3):
                        # Visualize the results on the frame
                        annotated_frame = results.plot()

                        # Writes frame to detections
                        formNr = "{:05d}".format(frameNr)
                        cv2.imwrite(f'{downloadDir}/frame_{formNr}.jpg', annotated_frame)

                        # Display the annotated frame
                        #cv2_imshow(annotated_frame)
                        break


                # Break the loop if 'q' is pressed
                if cv2.waitKey(1) & 0xFF == ord("q"):
                    break
            else:
                # Break the loop if the end of the video is reached
                break

        # Release the video capture object and close the display window
        cap.release()
        cv2.destroyAllWindows()

In [None]:
!zip -r /content/detections.zip /content/detections

from google.colab import files
files.download("/content/detections.zip")

In [44]:
#This script is for deleting entire folders worth of frames
#Do not run this script if going through this ipynb as intended

"""fDir = '/content/detections/Drone tracking 2'
for filename in os.listdir(fDir):
    f = os.path.join(fDir, filename)
    if os.path.isfile(f):
        os.remove(f)"""

## Task 2: Kalman Filter (50 points)

Use the \`filterpy\`\` library to implement a Kalman filter that will
track the drone in the video. You will need to use the detections from
the previous task to initialize the Kalman filter.

You need to deliver a number of short videos with each video containing
**only** the frames where the drone is present in the test video and its
2D trajectory shown as a line that connects the pixels that the tracker
indicated. You can use the `ffmpeg` command line tool and OpenCV to
superpose the bounding box of the drone on the video as well as plot its
trajectory.

In [38]:
from google.colab.patches import cv2_imshow

from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise

In [24]:
if not os.path.exists("kalmanDetections"):
    os.makedirs("kalmanDetections")

# Load the YOLOv8 model
model = YOLO('/content/runs/detect/train2/weights/best.pt')

for filename in os.listdir('/content/droneVids'):
    video_path = os.path.join('/content/droneVids', filename)

    if os.path.isfile(video_path):
        dirName, nothin = os.path.splitext(filename)
        downloadDir = os.path.join('/content/kalmanDetections', dirName)
        print(video_path)
        if not os.path.exists(downloadDir):
            os.makedirs(downloadDir)
        cap = cv2.VideoCapture(video_path)

        frameNr = 0
        # Loop through the video frames
        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()
            frameNr += 1
            if success:
                # Run YOLOv8 inference on the frame
                results = (model(frame))[0]

                for classes in results.boxes:
                    if(classes.cls == 0 and classes.conf >= 0.3):
                        # Visualize the results on the frame
                        annotated_frame = results.plot()

                        #grab my bounding box
                        x1, y1, x2, y2 = classes.xyxy[0]
                        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                        # Writes frame to detections
                        formNr = "{:05d}".format(frameNr)
                        cv2.imwrite(f'{downloadDir}/frame_{formNr}.jpg', annotated_frame)

                        # Display the annotated frame
                        #cv2_imshow(annotated_frame)
                        break


                # Break the loop if 'q' is pressed
                if cv2.waitKey(1) & 0xFF == ord("q"):
                    break
            else:
                # Break the loop if the end of the video is reached
                break

        # Release the video capture object and close the display window
        cap.release()
        cv2.destroyAllWindows()

/content/droneVids/Drone tracking 2.mp4


NameError: ignored