In [23]:
# Read a video file and display it frame by frame

import cv2
path = "input_videos/27-11-2024-21-02.mp4"
cap = cv2.VideoCapture(path)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    cv2.imshow("frame", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cap.release()
cv2.destroyAllWindows()


In [None]:
# Add detected people to the video frames using YOLOv8

from ultralytics import YOLO
import cv2

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture(path)

# Check if the video was opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if not success:
        print("End of video or failed to read frame.")
        break

    # Run YOLOv8 inference on the frame
    results = model(frame)[0]
    id_name_dict = results.names

    for box in results.boxes:
        object_cls_id = box.cls.tolist()[0]
        if id_name_dict[object_cls_id] == "person":
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 2)

    # Visualize the results on the frame
    # annotated_frame = results.plot()  # Draw bounding boxes and labels

    # Display the annotated frame
    cv2.imshow("YOLOv8 Inference", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

In [29]:
# Detect the ball using frame differencing

import cv2
cap = cv2.VideoCapture(path)
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
thresh = 30

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(old_gray, frame_gray)

    _, thresholded = cv2.threshold(frame_diff, thresh, 255, cv2.THRESH_BINARY)
    cv2.imshow("original", frame)
    cv2.imshow("thresholded", thresholded)

    if cv2.waitKey(0) & 0xFF == ord("q"):
        break
    old_gray = frame_gray
cap.release()
cv2.destroyAllWindows()

In [None]:
# Remove people to detect only the ball

import cv2
from ultralytics import YOLO
from itertools import chain

path = "input_videos/27-11-2024-21-02-second.mp4"
model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture(path)

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

success, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
thresh = 30
min_area = 100

results = model(old_frame)[0]
id_name_dict = results.names
old_boxes = results.boxes

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(old_gray, frame_gray)

    _, thresholded = cv2.threshold(frame_diff, thresh, 255, cv2.THRESH_BINARY)
    results = model(frame)[0]
    id_name_dict = results.names

    for box in chain(old_boxes, results.boxes):
        object_cls_id = box.cls.tolist()[0]
        if id_name_dict[object_cls_id] == "person":
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), 0, -1)
            thresholded[int(y1):int(y2), int(x1):int(x2)] = 0

    # thresholded = cv2.erode(thresholded, None, iterations=4)
    # thresholded = cv2.dilate(thresholded, None, iterations=4)

    # draw the ball
    contours, _ = cv2.findContours(thresholded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > min_area:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imshow("original", frame)
    cv2.imshow("thresholded", thresholded)

    if cv2.waitKey(0) & 0xFF == ord("q"):
        break
    old_gray = frame_gray
    old_boxes = results.boxes

cap.release()
cv2.destroyAllWindows()


In [None]:
# YOLO segmentation

import cv2
import numpy as np
from ultralytics import YOLO


model = YOLO("yolov8s-seg.pt")  # You can use 'yolov8n-seg.pt' for a smaller model
path = "input_videos/27-11-2024-21-02-second.mp4"
cap = cv2.VideoCapture(path)

# Get video properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
# fps = int(cap.get(cv2.CAP_PROP_FPS))

# Output video writer
# out = cv2.VideoWriter("output.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)

    for result in results:
        masks = result.masks  # Get segmentation masks
        
        if masks is None:
            continue

        for mask, cls in zip(masks.data, result.boxes.cls):
            if int(cls) == 0:  # Class 0 corresponds to 'person'
                mask = mask.cpu().numpy()  # Convert mask to NumPy
                mask = cv2.resize(mask, (frame_width, frame_height))  # Resize mask to match frame
                mask = (mask > 0.5).astype(np.uint8) * 255  # Convert to binary mask

                # Create a color overlay for the mask
                color_mask = np.zeros_like(frame, dtype=np.uint8)
                color_mask[:, :, 0] = mask  # Blue channel (you can change color)

                # Blend mask with the frame
                frame = cv2.addWeighted(frame, 1, color_mask, 0.5, 0)

    # Write frame to output
    # out.write(frame)

    # Show frame (press 'q' to exit)
    cv2.imshow("YOLO Segmentation", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
# out.release()
cv2.destroyAllWindows()


In [None]:
# Use YOLO segmentation model to remove people from the video frames and detect ball

import cv2
import numpy as np
from ultralytics import YOLO
from itertools import chain

# Load YOLOv8 Segmentation model
path = "input_videos/27-11-2024-21-02-second.mp4"
model = YOLO("yolov8n-seg.pt")  # Segmentation model instead of detection
cap = cv2.VideoCapture(path)

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Read the first frame and convert to grayscale
success, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

# Motion detection parameters
thresh = 30
min_area = 3

# Detect people on previous frame
old_results = model(old_frame)[0]

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert current frame to grayscale
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(old_gray, frame_gray)

    # Threshold the difference image
    _, thresholded = cv2.threshold(frame_diff, thresh, 255, cv2.THRESH_BINARY)

    # Run YOLO segmentation
    results = model(frame)[0]  # Run inference

    if results.masks is None and old_results.masks is None:
        continue

    for mask, cls in zip(chain(results.masks.data, old_results.masks.data),
                         chain(results.boxes.cls,  old_results.boxes.cls )):

        if int(cls) != 0 and int(cls) != 38:  # 0 = 'person'  38 = 'tennis racket'
            continue

        mask = mask.cpu().numpy()  # Convert to NumPy array
        mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))  # Resize to frame size
        mask = (mask > 0.5).astype(np.uint8) * 255  # Convert to binary mask
        
        # Expand the mask to cover more of the border
        kernel = np.ones((20, 20), np.uint8)  # Adjust the size (larger = more expansion)
        mask = cv2.dilate(mask, kernel, iterations=1)  # Expand the mask

        # Remove people from the original frame and thresholded image
        frame[mask > 0] = 0  # Set pixels to black
        thresholded[mask > 0] = 0  # Remove motion detection in people regions

    # Find contours and draw bounding boxes around them
    contours, _ = cv2.findContours(thresholded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > min_area:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Draw ball bounding box

    # Show frames
    cv2.imshow("Original", frame)
    cv2.imshow("Thresholded", thresholded)

    # Press 'q' to exit
    if cv2.waitKey(0) & 0xFF == ord("q"):
        break

    # Update previous frame
    old_gray = frame_gray
    old_results = results

cap.release()
cv2.destroyAllWindows()


In [None]:
# Check if the two videos are synchronized.
# Consider that one video is 20fps, the other is 10fps

import cv2
import numpy as np

# Load the two videos
path1 = "input_videos/27-11-2024-21-02.mp4"
path2 = "input_videos/27-11-2024-21-02-second.mp4"
cap1 = cv2.VideoCapture(path1)
cap2 = cv2.VideoCapture(path2)

# Check if the videos were opened successfully
if not cap1.isOpened() or not cap2.isOpened():
    print("Error: Could not open video.")
    exit()

# Loop through the video frames
while cap1.isOpened() and cap2.isOpened():
    # Read a frame from each video
    ret1, a = cap1.read()   # discard frame since 20fps vs 10fps
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()

    # Check if frames were read successfully
    if not ret1 or not ret2:
        print("End of video or failed to read frame.")
        break

    # Display the frames side by side
    frame = np.hstack((frame1, frame2))
    cv2.imshow("Synchronized Videos", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(0) & 0xFF == ord("q"):
        break

# Release the video capture objects and close the display window
cap1.release()
cap2.release()
cv2.destroyAllWindows()



# THEY ARE NOT SYNCHRONIZED

End of video or failed to read frame.


In [7]:
# Manually synchronize the two videos

import cv2
import numpy as np
path1 = "input_videos/27-11-2024-21-02.mp4"
path2 = "input_videos/27-11-2024-21-02-second.mp4"
cap1 = cv2.VideoCapture(path1)
cap2 = cv2.VideoCapture(path2)

if not cap1.isOpened() or not cap2.isOpened():
    print("Error: Could not open video.")
    exit()

# Skip the first n frames of the first video to synchronize
for _ in range(3):
    ret1, frame1 = cap1.read()

while cap1.isOpened() and cap2.isOpened():
    ret1, frame1 = cap1.read()  # discard frame since 20fps vs 10fps
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()

    if not ret1 or not ret2:
        print("End of video or failed to read frame.")
        break

    frame = np.hstack((frame1, frame2))
    cv2.imshow("Synchronized Videos", frame)

    if cv2.waitKey(0) & 0xFF == ord("q"):
        break

cap1.release()
cap2.release()
cv2.destroyAllWindows()

In [None]:
# Save possible positions to a file (from first video)

import cv2
import numpy as np
from ultralytics import YOLO
from itertools import chain
import csv

# Load YOLOv8 Segmentation model
path = "input_videos/27-11-2024-21-02.mp4"
model = YOLO("yolov8n-seg.pt")  # Segmentation model instead of detection
cap = cv2.VideoCapture(path)
output_file = "balls1.csv"

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Read the first frame and convert to grayscale
success, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

# Motion detection parameters
thresh = 30
min_area = 3

# Detect people on previous frame
old_results = model(old_frame)[0]

frame_num = 0

# Create the file (erasing it if it already exists)
with open(output_file, 'w', newline='') as csvfile:
    pass

# Skip the first n frames of the first video to synchronize
for _ in range(3):
    ret1, frame1 = cap1.read()

while cap.isOpened():
    ret, frame = cap.read() # Discard half of the frames since 20fps vs 10fps
    ret, frame = cap.read()
    if not ret:
        break

    # Convert current frame to grayscale
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(old_gray, frame_gray)

    # Threshold the difference image
    _, thresholded = cv2.threshold(frame_diff, thresh, 255, cv2.THRESH_BINARY)

    # Run YOLO segmentation
    results = model(frame)[0]  # Run inference

    if results.masks is None and old_results.masks is None:
        continue


    for mask, cls in zip(chain(results.masks.data, old_results.masks.data),
                         chain(results.boxes.cls,  old_results.boxes.cls )):

        if int(cls) != 0 and int(cls) != 38:  # 0 = 'person'  38 = 'tennis racket'
            continue

        mask = mask.cpu().numpy()  # Convert to NumPy array
        mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))  # Resize to frame size
        mask = (mask > 0.5).astype(np.uint8) * 255  # Convert to binary mask

        # Expand the mask to cover more of the border
        kernel = np.ones((20, 20), np.uint8)  # Adjust the size (larger = more expansion)
        mask = cv2.dilate(mask, kernel, iterations=1)  # Expand the mask

        # Remove people from the original frame and thresholded image
        frame[mask > 0] = 0  # Set pixels to black
        thresholded[mask > 0] = 0  # Remove motion detection in people regions

    balls_positions = []

    # Find contours
    contours, _ = cv2.findContours(thresholded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > min_area:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Draw ball bounding box
            balls_positions.append(np.float32([x+w/2, y+h/2]))
            
    # Show frames
    cv2.imshow("Original", frame)
    cv2.imshow("Thresholded", thresholded)

    # Save the frame number and ball positions to a file
    frame_data = {
        "frame_num": frame_num,
        "balls": balls_positions
    }
    with open(output_file, 'a', newline='') as f:
        writer = csv.writer(f)
        row = [frame_data["frame_num"]] + [pos for pos in frame_data["balls"]]
        writer.writerow(row)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    # Update previous frame
    frame_num += 1
    old_gray = frame_gray
    old_results = results

cap.release()
cv2.destroyAllWindows()


In [None]:
# Same thing for the second video

import cv2
import numpy as np
from ultralytics import YOLO
from itertools import chain
import csv

# Load YOLOv8 Segmentation model
path = "input_videos/27-11-2024-21-02-second.mp4"
model = YOLO("yolov8n-seg.pt")  # Segmentation model instead of detection
cap = cv2.VideoCapture(path)
output_file = "balls2.csv"

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Read the first frame and convert to grayscale
success, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

# Motion detection parameters
thresh = 30
min_area = 3

# Detect people on previous frame
old_results = model(old_frame)[0]

frame_num = 0

# Create the file (erasing it if it already exists)
with open(output_file, 'w', newline='') as csvfile:
    pass

while cap.isOpened():
    # ret, frame = cap.read() # Do NOT discard half of the frames
    ret, frame = cap.read()
    if not ret:
        break

    # Convert current frame to grayscale
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(old_gray, frame_gray)

    # Threshold the difference image
    _, thresholded = cv2.threshold(frame_diff, thresh, 255, cv2.THRESH_BINARY)

    # Run YOLO segmentation
    results = model(frame)[0]  # Run inference

    if results.masks is None and old_results.masks is None:
        continue


    for mask, cls in zip(chain(results.masks.data, old_results.masks.data),
                         chain(results.boxes.cls,  old_results.boxes.cls )):

        if int(cls) != 0 and int(cls) != 38:  # 0 = 'person'  38 = 'tennis racket'
            continue

        mask = mask.cpu().numpy()  # Convert to NumPy array
        mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))  # Resize to frame size
        mask = (mask > 0.5).astype(np.uint8) * 255  # Convert to binary mask

        # Expand the mask to cover more of the border
        kernel = np.ones((20, 20), np.uint8)  # Adjust the size (larger = more expansion)
        mask = cv2.dilate(mask, kernel, iterations=1)  # Expand the mask

        # Remove people from the original frame and thresholded image
        frame[mask > 0] = 0  # Set pixels to black
        thresholded[mask > 0] = 0  # Remove motion detection in people regions

    balls_positions = []

    # Find contours
    contours, _ = cv2.findContours(thresholded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > min_area:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Draw ball bounding box
            balls_positions.append(np.float32([x+w/2, y+h/2]))
            
    # Show frames
    cv2.imshow("Original", frame)
    cv2.imshow("Thresholded", thresholded)

    # Save the frame number and ball positions to a file
    frame_data = {
        "frame_num": frame_num,
        "balls": balls_positions
    }
    with open(output_file, 'a', newline='') as f:
        writer = csv.writer(f)
        row = [frame_data["frame_num"]] + [pos for pos in frame_data["balls"]]
        writer.writerow(row)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    # Update previous frame
    frame_num += 1
    old_gray = frame_gray
    old_results = results

cap.release()
cv2.destroyAllWindows()
