In [2]:
import cv2
from ultralytics import YOLO
import numpy as np
import math
from numpy import random
import os
from IPython.display import Video
from utils import *
from PIL import Image
from tqdm import tqdm
from IPython import display
from collections import deque

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
video_path = "video_test_dataset/1/made_1.mp4"
Video(video_path, width=640)

In [3]:
model = YOLO("weights/detect_large.pt")

video_path = "video_test_dataset/1/made_5.mp4"
output_dir = "cropped_video_dataset"
detect_conf_threshold = 0.7

#def create_zoomed_in_hoop_video(video_path, detect_conf_threshold=0.7, output_dir = "cropped_video_dataset", display_result = False):                 
cap, fps, frame_width, frame_height = get_video_info(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 


video_name = video_path.split("/")[-1]
video_name = video_name.split(".")[0] 

center_buffer = deque(maxlen=10)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

already_intialized_video = False 
last_known_center = None



for i in tqdm(range(total_frames)):
    ret, img = cap.read()
    hoop_detected = False
    if ret:
        results = model(img, stream = False, device = device, conf = detect_conf_threshold, verbose = False)
        
        for r in results:
            boxes = r.boxes

            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
                confidence = box.conf[0]
                predicted_class = model.names[int(box.cls)]
                
                # If "basketball-hoops" is detected, make a prediction with cls_model
                if predicted_class == "hoop" and confidence > detect_conf_threshold:
                    hoop_detected = True
                    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
                    last_known_center = (center_x, center_y)
                    
                    box_width = x2 - x1
                    box_height = y2 - y1
                    x1 = int(x1 - box_width / 2)
                    y1 = int(y1 - box_height / 2)
                    x2 = int(x2 + box_width / 2)
                    y2 = int(y2 + box_height / 2)
                    
                    
                    if not already_intialized_video:
                        already_intialized_video = True
                        crop_width = x2 - x1
                        crop_height = y2 - y1
                        new_vid_fps = fps
                        new_vid_codec = cv2.VideoWriter_fourcc(*'vp09')
                        new_vid_name = video_name + "_cropped.mp4"
                        new_vid_path = os.path.join(output_dir, new_vid_name) if output_dir is not None else new_vid_name
                        new_vid = cv2.VideoWriter(new_vid_path, new_vid_codec, new_vid_fps, (crop_width, crop_height))

                    else:
                        center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
                        center_buffer.append((center_x, center_y))  # Add current center to the buffer

                        # Calculate smoothed center
                        avg_center_x, avg_center_y = map(int, np.mean(center_buffer, axis=0))

                        # Rest of your cropping logic, but use avg_center_x and avg_center_y for cropping
                        x1_crop = max(avg_center_x - crop_width // 2, 0)
                        y1_crop = max(avg_center_y - crop_height // 2, 0)
                        x2_crop = min(x1_crop + crop_width, img.shape[1])
                        y2_crop = min(y1_crop + crop_height, img.shape[0])

                        # Adjust the crop area if it exceeds the image size
                        x1_crop = max(x2_crop - crop_width, 0)
                        y1_crop = max(y2_crop - crop_height, 0)
                        
                        img_crop = img[y1_crop:y2_crop, x1_crop:x2_crop]
                        new_vid.write(img_crop)
                        
    if not hoop_detected and last_known_center is not None:
        # Use the last known center for cropping if hoop is not detected
        center_x, center_y = last_known_center
        center_buffer.append(last_known_center)  # Update center buffer with the last known center

        if already_intialized_video and not img is None:
            # Calculate smoothed center
            avg_center_x, avg_center_y = map(int, np.mean(center_buffer, axis=0))            
            x1_crop = max(avg_center_x - crop_width // 2, 0)
            y1_crop = max(avg_center_y - crop_height // 2, 0)
            x2_crop = min(x1_crop + crop_width, img.shape[1])
            y2_crop = min(y1_crop + crop_height, img.shape[0])

            # Adjust the crop area if it exceeds the image size
            x1_crop = max(x2_crop - crop_width, 0)
            y1_crop = max(y2_crop - crop_height, 0)
            
            img_crop = img[y1_crop:y2_crop, x1_crop:x2_crop]
            new_vid.write(img_crop)    
                


    elif not ret:
        break
    
if already_intialized_video:
    new_vid.release()
cap.release()

    
    #return new_vid_path


In [9]:
root_data_dir = "video_test_dataset"
output_root_dir = "cropped_video_dataset"

classes = ["0", "1"]

for cls in classes:
    root_dir = os.path.join(root_data_dir, cls)
    output_dir = os.path.join(output_root_dir, cls)
    for video_name in os.listdir(root_dir):
        video_path = os.path.join(root_dir, video_name)
        create_zoomed_in_hoop_video(video_path, detect_conf_threshold, output_dir, display_result = False)

100%|██████████| 161/161 [00:05<00:00, 29.53it/s]
100%|██████████| 85/85 [00:02<00:00, 39.75it/s]
100%|██████████| 158/158 [00:01<00:00, 87.58it/s]
100%|██████████| 108/108 [00:03<00:00, 29.54it/s]
100%|██████████| 135/135 [00:03<00:00, 36.10it/s]
100%|██████████| 104/104 [00:02<00:00, 44.65it/s]
100%|██████████| 155/155 [00:04<00:00, 32.10it/s]
100%|██████████| 78/78 [00:01<00:00, 43.49it/s]
100%|██████████| 91/91 [00:07<00:00, 12.24it/s]
100%|██████████| 157/157 [00:02<00:00, 65.68it/s]
100%|██████████| 108/108 [00:08<00:00, 12.10it/s]
100%|██████████| 190/190 [00:01<00:00, 116.92it/s]
100%|██████████| 90/90 [00:07<00:00, 11.69it/s]
100%|██████████| 86/86 [00:02<00:00, 29.53it/s]
100%|██████████| 79/79 [00:06<00:00, 12.43it/s]
  0%|          | 0/125 [00:00<?, ?it/s]


UnboundLocalError: local variable 'last_known_center' referenced before assignment

In [17]:
video_path = "video_test_dataset/1/made_5.mp4"
output_dir = "cropped_video_dataset"

cap, fps, frame_width, frame_height = get_video_info(video_path)
out = cv2.VideoWriter('output.mp4',cv2.VideoWriter_fourcc(*"vp09"), fps, (frame_width,frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        result = model.track(frame, persist = True, device = "cuda")
        
        annotated_frame = result[0].plot()
        out.write(annotated_frame)
    else:
        break
    
cap.release()
out.release()

In [18]:
Video("output.mp4", width=640)