# Tennis Player Tracking with Court Verification

This Jupyter Notebook implements a system for detecting and tracking tennis players in a video, estimating the distance they cover, and verifying if the current view shows a valid tennis court.

## 1. Setup and Imports

This section installs the necessary libraries and imports them into the environment.

In [1]:
## 1. Setup and Imports
!pip install ultralytics opencv-python deep_sort_realtime pandas




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict
from skimage.metrics import structural_similarity as ssim
from google.colab.patches import cv2_imshow 

ModuleNotFoundError: No module named 'skimage'

## 2. Configuration

Define various parameters for video paths, model selection, and spatial conversion.

In [3]:
# === Configuration ===
VIDEO_PATH = '/content/tennis.mp4' # Update this path to your video file
OUTPUT_PATH = 'output_annotated.mp4'
YOLO_MODEL = 'yolov8n.pt' # Or 'yolov8m.pt', 'yolov8l.pt', etc.
PIXELS_PER_COURT_WIDTH = 720 # Reference pixel width of the court at a known perspective
COURT_WIDTH_METERS = 8.23 # Standard tennis court width
PIXEL_TO_METER = COURT_WIDTH_METERS / PIXELS_PER_COURT_WIDTH # Conversion factor
MIN_FRAMES_FOR_PLAYER = 20 # Minimum frames for a track to be considered a player

## 3. Court Corner Reference 

Manually measured corner coordinates from the first frame. This section is for reference and is not actively used in the current tracking logic, but can be useful for court transformation later.

In [4]:
# === Court Corner Reference (manually measured from first frame) ===
court_corners_ref = np.array([
    [100, 200],    # top-left
    [1180, 200],   # top-right
    [1180, 620],   # bottom-right
    [100, 620]     # bottom-left
], dtype="float32")

## 4. Helper Functions

Define utility functions for calculations, drawing, and court geometry analysis.

In [5]:
# === Helper Functions ===
def euclidean(p1, p2):
    """Calculates the Euclidean distance between two points."""
    return np.linalg.norm(np.array(p1) - np.array(p2))

def draw_text(frame, text, pos, font_scale=0.6, thickness=1, color=(255, 255, 255), bg=(0, 0, 0)):
    """Draws text on a video frame with an optional background."""
    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
    x, y = pos
    cv2.rectangle(frame, (x, y - th - 4), (x + tw + 4, y + 4), bg, -1)
    cv2.putText(frame, text, (x + 2, y - 2), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness)

def is_valid_court_geometry(frame, min_lines=5):
    """
    Analyzes a frame to check for characteristics of a tennis court geometry
    by detecting horizontal and vertical lines.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Use HoughLinesP to detect lines
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=100, maxGap=20)
    if lines is None:
        return False

    vertical = 0
    horizontal = 0
    for line in lines:
        x1, y1, x2, y2 = line[0]
        # Calculate angle of the line
        angle = np.abs(np.arctan2(y2 - y1, x2 - x1))
        # Classify lines as horizontal or vertical based on angle
        if angle < np.pi / 12:      # approximately horizontal (within 15 degrees)
            horizontal += 1
        elif angle > np.pi / 3:     # approximately vertical (greater than 60 degrees)
            vertical += 1

    # Return True if sufficient horizontal and vertical lines are found
    return horizontal >= min_lines and vertical >= min_lines

def is_game_view(original_frame, current_frame):
    """Determines if the current frame is a valid game view based on court geometry."""
    # Currently relies solely on current frame geometry analysis
    return is_valid_court_geometry(current_frame)

## 5. Initialization

Initialize the YOLO model, Deep SORT tracker, video capture, and video writer. Also, set up data structures for tracking.

In [6]:
# === Initialization ===
model = YOLO(YOLO_MODEL) # Load the YOLO model
tracker = DeepSort(max_age=30) # Initialize Deep SORT tracker

cap = cv2.VideoCapture(VIDEO_PATH) # Open the video file
if not cap.isOpened():
    print(f"Error: Could not open video file {VIDEO_PATH}")
    exit() # Exit if video cannot be opened

# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Or 'XVID', 'MJPG' depending on desired output format
out = cv2.VideoWriter(OUTPUT_PATH, fourcc, fps, (width, height))

# Data structures to store tracking information
positions = defaultdict(list) # Stores historical (x, y) positions for each track ID
distances = defaultdict(float) # Stores accumulated distance for each track ID
player_labels = {} # Stores assigned labels ('Player A', 'Player B') for track IDs
scene_active = True # Flag to indicate if the current scene is a valid game view

# Read the first frame as a reference (currently used only by is_game_view signature)
ret, original_scene = cap.read()
if not ret:
    print("Error: Could not read first frame.")
    cap.release()
    exit()

Error: Could not open video file /content/tennis.mp4
Error: Could not read first frame.


## 6. Main Video Processing Loop

Iterate through video frames, perform object detection and tracking, calculate distances, and annotate the frame.

In [1]:
# === Main Loop ===
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or error reading frame.")
        break # Break the loop if no frame is read

    # Check if the current frame represents a valid game view
    is_valid = is_game_view(original_scene, frame)

    if not is_valid:
        # If not a valid game angle, display a message and pause tracking logic
        draw_text(frame, "NON-GAME ANGLE - PAUSED", (10, 30), font_scale=0.7, color=(0, 0, 255), bg=(255, 255, 255))
        scene_active = False
        out.write(frame) # Still write the frame to the output
        cv2_imshow(frame) # Display the frame (especially for Colab)
        # Check for key press (e.g., 'q' to quit)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        continue # Skip tracking logic for this frame

    # If transitioning from an inactive scene to an active one, reset tracking data
    if not scene_active:
        print("[INFO] Returning to game view - Resetting tracker.")
        tracker = DeepSort(max_age=30) # Re-initialize tracker
        positions.clear() # Clear stored positions
        distances.clear() # Clear accumulated distances
        player_labels.clear() # Clear player labels
        scene_active = True

    # Perform object detection using YOLO
    results = model(frame)[0]
    detections = []
    # Filter detections to include only 'person' with high confidence
    for r in results.boxes.data.tolist():
        x1, y1, x2, y2, conf, cls = r
        # Class ID 0 is typically 'person' in COCO dataset
        if int(cls) == 0 and conf > 0.5:
            # Format detection for Deep SORT: (x, y, w, h), confidence, class_name
            detections.append(([x1, y1, x2 - x1, y2 - y1], conf, 'player'))

    # Update the Deep SORT tracker with current detections
    tracks = tracker.update_tracks(detections, frame=frame)
    y_centers = {} # To store y-coordinates for player labeling

    # Process each confirmed track
    for track in tracks:
        # Only consider confirmed tracks (seen for enough frames)
        if not track.is_confirmed():
            continue

        track_id = track.track_id # Get the unique ID for this track
        l, t, r, b = track.to_ltrb() # Get the bounding box in (left, top, right, bottom) format
        cx, cy = int((l + r) / 2), int((t + b) / 2) # Calculate the center coordinates

        # Store the current position
        positions[track_id].append((cx, cy))

        # Only start calculating distance and labeling after minimum frames
        if len(positions[track_id]) < MIN_FRAMES_FOR_PLAYER:
            continue

        # Calculate and accumulate distance if there's a previous position
        if len(positions[track_id]) > 1:
            d_pix = euclidean(positions[track_id][-1], positions[track_id][-2]) # Distance in pixels between last two points
            distances[track_id] += d_pix * PIXEL_TO_METER # Convert to meters and add to total

        y_centers[track_id] = cy # Store y-center for labeling

        # Draw bounding box and trajectory on the frame
        cv2.rectangle(frame, (int(l), int(t)), (int(r), int(b)), (0, 255, 0), 2) # Green rectangle
        for i in range(1, len(positions[track_id])):
            cv2.line(frame, positions[track_id][i - 1], positions[track_id][i], (0, 0, 255), 2) # Red trajectory line

    # Assign Player A/B labels based on vertical position if at least two players are tracked
    if len(y_centers) >= 2:
        # Sort track IDs by their y-center (smaller y is higher on the screen)
        sorted_ids = sorted(y_centers.items(), key=lambda x: x[1])
        player_labels[sorted_ids[0][0]] = "Player A" # Player A is higher
        player_labels[sorted_ids[1][0]] = "Player B" # Player B is lower

    # Draw player labels and distances on the frame
    y_offset = 30
    for tid, label in player_labels.items():
        dist = distances[tid] # Get total distance for this player
        # Draw label and total distance in the top-left corner
        draw_text(frame, f"{label}: {dist:.2f} m", (10, y_offset))
        y_offset += 30 # Move down for the next label

        # Draw label, ID, and distance near the player's bounding box
        if tid in positions and positions[tid]: # Ensure track exists and has positions
             draw_text(frame, f"{label} (ID {tid}) | {dist:.2f} m", (positions[tid][-1][0] + 10, positions[tid][-1][1] - 10))


    # Write the annotated frame to the output video file
    out.write(frame)

    # Display the frame
    cv2_imshow(frame) # For Colab compatibility (replace with cv2.imshow for local execution)

    # Check for key press (e.g., 'q' to quit)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# === Cleanup ===
print("\n[INFO] Processing finished.")
cap.release() # Release the video capture object
out.release() # Release the video writer object
cv2.destroyAllWindows() # Close any OpenCV windows

# === Final Output ===
print("\n[INFO] Final Distances:")
for tid, label in player_labels.items():
    # Ensure the track ID still exists in distances (might not if tracking was reset)
    if tid in distances:
        print(f"{label} (ID {tid}): {distances[tid]:.2f} meters")
    else:
        print(f"{label} (ID {tid}): Data unavailable (tracking reset).")

NameError: name 'cap' is not defined