In [17]:
import numpy as np
import cv2

# --- CONFIGURATION ---
# Actual ball diameter in meters (e.g., Volleyball is ~0.21m)
BALL_REAL_DIAMETER = 0.21 
NET_HEIGHT = 2.43
# Define your real-world coordinates for your labels (X, Y, Z)
# Z = 0 is the ground. Let's assume the net height is 2.43m.
world_points_map = {
    "BE_LEFT":       [0.0, 0.0, 0.0   ],
    "BA_LEFT":       [0.0, 6.0, 0.0   ],
    "M_LEFT":        [0.0, 9.0, 0.0   ],
    "TA_LEFT":       [0.0, 12.0, 0.0  ],
    "TE_LEFT":       [0.0, 18.0, 0.0  ],
    "NET_B_LEFT":    [0.0, 9.0, NET_HEIGHT-1],
    "NET_T_LEFT":    [0.0, 9.0, NET_HEIGHT], 
    "ANT_T_LEFT":    [0.0, 9.0, NET_HEIGHT+0.8],
    "BE_RIGHT":      [9.0, 0.0, 0.0   ],
    "BA_RIGHT":      [9.0, 6.0, 0.0   ],
    "M_RIGHT":       [9.0, 9.0, 0.0   ],
    "TA_RIGHT":      [9.0, 12.0, 0.0  ],
    "TE_RIGHT":      [9.0, 18.0, 0.0  ],
    "NET_B_RIGHT": [9.0, 9.0, NET_HEIGHT-1],
    "NET_T_RIGHT":   [9.0, 9.0, NET_HEIGHT],
    "ANT_T_RIGHT": [9.0, 9.0, NET_HEIGHT+0.8],
}
# BA_RIGHT,1467.015,754.13
# BA_LEFT,323.559,668.08
# M_RIGHT,1587.646,687.474
# M_RIGHT,575.877,634.642
# NET_T_RIGHT,1563.401,324.402
# NET_T_LEFT,573.488,369.349
# TA_RIGHT,1660.031,645.074
# NET_B_RIGHT,1576.133,478.926
# NET_B_LEFT,576.6,483.066
# Data from your CSV (name, img_x, img_y)
collected_data = [
    ("BA_RIGHT", 1467.015,754.13),
    ("BA_LEFT", 323.559,668.08),
    ("M_RIGHT", 1587.646,687.474),
    ("M_LEFT", 575.877,634.642),
    ("NET_T_RIGHT", 1563.401,324.402),
    ("NET_T_LEFT",573.488,369.349),
    ("TA_RIGHT",1660.031,645.074)
]
collected_data = [
    ("BE_RIGHT",903.709,861.078,),
    ("BA_RIGHT",1295.086,742.611,),
    ("M_RIGHT",1399.292,711.481,),
    ("BA_LEFT",450.494,688.182,),
    ("M_LEFT",585.012,671.832,),
    ("TA_RIGHT",1556.239,664.584,),
    ("NET_T_RIGHT",1395.733,446.422,),
    # ("NET_B_RIGHT",1399.577,551.74,),
    # ("ANT_T_RIGHT",1391.42,358.154,),
    # ("NET_B_LEFT",638.214,547.15,),
    ("NET_T_LEFT",639.631,462.495,),
    # ("ANT_T_LEFT" ,641.402,393.07,),
]
collected_data = [
    ("TE_LEFT",465.565,519.4940),
    ("TA_LEFT",321.796,601.733),
    ("BA_LEFT",1.132,781.967),
    ("M_RIGHT",1420.885,636.213),
    ("TE_RIGHT",1184.41,501.596),
    ("TA_RIGHT",1312.372,575.302),
    ("M_LEFT",193.865,671.834),
    ("BA_RIGHT",1585.02,728.349),
    ("NET_T_LEFT" ,164.882,319.697),
    ("NET_T_RIGHT",1428.694,302.609),
]
collected_data = [
    ("BE_LEFT",1248.824,876.159),
   ("BA_LEFT",812.639,776.17),
   ("M_LEFT",686.566,748.637),
   ("BA_RIGHT",1607.148,696.992),
   ("TE_LEFT",512.393,710.208),
   ("TA_LEFT",610.407,733.987),
   ("M_RIGHT",1423.955,687.176),
   ("NET_T_RIGHT",1413.493,480.536),
   ("NET_T_LEFT",678.291,493.482),

]
collected_data = [
    ("BA_LEFT",991.204,1278.692),
   ("BA_RIGHT",2812.985,1281.593),
   ("M_LEFT",1165.259,1014.708),
   ("M_RIGHT",2644.731,1014.708),
   ("TA_LEFT",1271.05,831.604),
   ("TA_RIGHT",2534.906,834.014),
   ("TE_RIGHT",2390.327,589.436),
   ("TE_LEFT",1420.447,589.436),
   ("NET_T_RIGHT",2712.014,644.857),
   ("NET_T_LEFT",1098.642,642.059),
]

In [2]:
def calibrate_camera(data, world_map, img_w=1920, img_h=1080,focal_length = None):
    image_pts = []
    object_pts = []
    
    # Only use points that exist in BOTH your CSV data and your world_map
    for name, ix, iy in data:
        if name in world_map:
            image_pts.append([ix, iy])
            object_pts.append(world_map[name])
        else:
            print(f"Warning: Point '{name}' in data but not defined in world_map. Skipping.")
            
    if len(image_pts) < 4:
        raise ValueError(f"Need at least 4 points to calibrate. Found {len(image_pts)}.")

    image_pts = np.array(image_pts, dtype=np.float32)
    object_pts = np.array(object_pts, dtype=np.float32)

    # Intrinsics: Use a focal length guess. 
    # For many wide-angle cameras, focal_length â‰ˆ image_width
    if(focal_length==None):
        focal_length = img_w
    center = (img_w / 2, img_h / 2)
    
    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype=np.float32)
    
    dist_coeffs = np.zeros((4, 1)) 

    # solvePnP finds the camera position/rotation relative to the court
    success, rvec, tvec = cv2.solvePnP(
        object_pts, 
        image_pts, 
        camera_matrix, 
        dist_coeffs, 
        flags=cv2.SOLVEPNP_ITERATIVE
    )
    
    if not success:
        print("Calibration failed!")
        return None, None, None

    return camera_matrix, rvec, tvec

In [3]:
def get_ball_height(ball_x, ball_y, ball_w_px, camera_matrix, rvec, tvec):
    """
    Calculates the 3D position and height of the ball.
    Uses the pixel width to determine distance (Z depth from camera).
    """
    f_x = camera_matrix[0, 0]
    
    # 1. Calculate distance from camera using the apparent width
    # distance = (focal_length * real_width) / width_in_pixels
    distance_from_cam = (f_x * BALL_REAL_DIAMETER) / ball_w_px
    
    # 2. Get Normalized Image Coordinates
    cx, cy = camera_matrix[0, 2], camera_matrix[1, 2]
    x_norm = (ball_x - cx) / f_x
    y_norm = (ball_y - cy) / camera_matrix[1, 1]
    
    # 3. Ball position in Camera Space
    P_cam = np.array([x_norm * distance_from_cam, 
                      y_norm * distance_from_cam, 
                      distance_from_cam])
    
    # 4. Transform Camera Space to World Space
    R, _ = cv2.Rodrigues(rvec)
    # P_world = R_inv * (P_cam - T)
    P_world = np.dot(R.T, (P_cam - tvec.flatten()))
    
    return P_world # Return the Z-coordinate (Height)

def verify_calibration(camera_matrix, rvec, tvec, world_map):
    # Convert world points to a numpy array
    names = list(world_map.keys())
    pts_3d = np.array([world_map[n] for n in names], dtype=np.float32)
    
    # Project 3D points to 2D image plane
    img_pts, _ = cv2.projectPoints(pts_3d, rvec, tvec, camera_matrix, np.zeros((4,1)))
    
    for i, name in enumerate(names):
        print(f"Point {name}: World {world_map[name]} -> Image {img_pts[i].ravel()}")
    return img_pts

In [4]:
def get_net_homography(camera_matrix, rvec, tvec):
    R, _ = cv2.Rodrigues(rvec)
    
    # Normally for ground (Z=0), we take columns 0 and 1 of R.
    # For the Net (Y=9), the plane is defined by X and Z.
    # So we take column 0 (X) and column 2 (Z).
    
    # We also need to account for the fact that Y is not 0, it's 9.
    # The translation becomes: t_effective = (R * [0, 9, 0]^T) + tvec
    y_offset_world = np.array([0, 9.0, 0], dtype=np.float32).reshape(3, 1)
    t_effective = np.dot(R, y_offset_world) + tvec
    
    # Combine Column 0 (X), Column 2 (Z), and the effective translation
    rt_vertical = np.column_stack((R[:, 0], R[:, 2], t_effective))
    
    # H = K * [r1 r3 t_eff]
    h_net = np.dot(camera_matrix, rt_vertical)
    
    return np.linalg.inv(h_net)

In [5]:
def image_to_net_coords(u, v, h_net_inv):
    """Convert pixel (u, v) to world (X, Z) on the net plane."""
    pixel_pt = np.array([u, v, 1.0], dtype=np.float32)
    world_pt = np.dot(h_net_inv, pixel_pt)
    
    world_pt /= world_pt[2] # Normalize
    return world_pt[0], world_pt[1] # Returns X and Z (Height)

In [6]:
def image_to_world_ground(u, v, h_inv):
    """Convert pixel (u, v) to world (X, Y) assuming it's on the ground."""
    pixel_pt = np.array([u, v, 1.0], dtype=np.float32)
    world_pt = np.dot(h_inv, pixel_pt)
    
    # Normalize by the third coordinate (scale factor)
    world_pt /= world_pt[2]
    return world_pt[0], world_pt[1]

In [7]:
def get_ground_homography(camera_matrix, rvec, tvec):
    """
    Creates a Homography matrix that maps image pixels (u, v) 
    directly to ground coordinates (X, Y, 0).
    """
    R, _ = cv2.Rodrigues(rvec)
    
    # The transformation matrix from World to Camera is [R | t]
    # We only care about the mapping to the Z=0 plane
    # So we take columns 1, 2, and 4 of the projection matrix
    rt_extrinsic = np.column_stack((R[:, 0], R[:, 1], tvec))
    
    # H = K * [r1 r2 t]
    homography = np.dot(camera_matrix, rt_extrinsic)
    
    # We want the inverse to go from Image -> World
    h_inv = np.linalg.inv(homography)
    return h_inv

In [8]:
def draw_homography_overlay(image, cam_mtx, rvec, tvec):
    overlay = image.copy()
    
    # 1. FLOOR OVERLAY (Green)
    # Define the 4 corners of the court floor in World Space (X, Y, Z=0)
    floor_corners = np.array([
        [0, 0, 0], [9, 0, 0], [9, 18, 0], [0, 18, 0]
    ], dtype=np.float32)
    
    # Project floor to image
    floor_img_pts, _ = cv2.projectPoints(floor_corners, rvec, tvec, cam_mtx, np.zeros((4,1)))
    floor_img_pts = floor_img_pts.reshape(-1, 2).astype(np.int32)
    
    # Draw floor polygon
    cv2.fillPoly(overlay, [floor_img_pts], (0, 255, 0)) # Green floor

    # 2. NET OVERLAY (Blue)
    # Define the 4 corners of the net plane in World Space (X, Y=9, Z)
    net_corners = np.array([
        [0, 9, 0], [9, 9, 0], [9, 9, NET_HEIGHT], [0, 9, NET_HEIGHT]
    ], dtype=np.float32)
    
    # Project net to image
    net_img_pts, _ = cv2.projectPoints(net_corners, rvec, tvec, cam_mtx, np.zeros((4,1)))
    net_img_pts = net_img_pts.reshape(-1, 2).astype(np.int32)
    
    # Draw net polygon
    cv2.fillPoly(overlay, [net_img_pts], (255, 0, 0)) # Blue net

    # 3. Blend overlay with original image
    alpha = 0.3  # Transparency factor
    return cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)

In [20]:
# --- OPTIMIZATION STEP ---


cam_mtx, rvec, tvec = calibrate_camera(collected_data, world_points_map, img_w=3840, img_h=2160)

if cam_mtx is not None:
    # 1. Project the Net Top Left to see if it matches the image
    net_tl_world = np.array([world_points_map["NET_T_LEFT"]], dtype=np.float32)
    projected_pt, _ = cv2.projectPoints(net_tl_world, rvec, tvec, cam_mtx, np.zeros((4,1)))
    print(f"Actual Image Pt: [573.488,369.349]")
    print(f"Projected 3D Net Pt: {projected_pt.ravel()}")
    print(f"Error is{[573.488,369.349] - projected_pt.ravel()} ")

    # 2. Get the full 3D position of the "detected" net center
    # Note: net_w here acts as the 'real diameter' scale
    ball_x = (1110.581 + 410.468) / 2
    ball_y = (236.174 + 265.529) / 2
    ball_w_px = 19
    h_inv = get_ground_homography(cam_mtx,rvec,tvec)
    nh_inv = get_net_homography(cam_mtx,rvec,tvec)
    world_p = image_to_world_ground(1467.015,754.13,h_inv)
    world_p2 = image_to_net_coords(1563.401,324.402,nh_inv)
    # Using your function (ensure BALL_REAL_DIAMETER is set to net width, e.g., 9.0m)
    # We temporarily override the diameter constant for the net calculation
    P_world = get_ball_height(ball_x, ball_y, ball_w_px, cam_mtx, rvec, tvec)
    print(f"the Ground Point is at: {world_p}")
    print(f"the Net Point is at: {world_p2}")
    print(f"--- 3D World Position of Net Center ---")
    print(f"X: {P_world[0]:.2f}m, Y: {P_world[1]:.2f}m, Z (Height): {P_world[2]:.2f}m")
    img = cv2.imread(r"C:\Users\morde\Desktop\volleyball\raw footage\netball_imgs\net_ball1_frame_1450.jpg") 
if img is not None:
    # Resize image to match the img_w/img_h used in calibration if necessary
    img = cv2.resize(img, (3840, 2160))
    
    # Generate the visualization
    result_img = draw_homography_overlay(img, cam_mtx, rvec, tvec)
    
    # Add dots for your collected_data points for verification
    for name, ix, iy in collected_data:
        cv2.circle(result_img, (int(ix), int(iy)), 5, (0, 0, 255), -1)
        cv2.putText(result_img, name, (int(ix)+10, int(iy)), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

    cv2.imshow("Calibration Overlay", result_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

Actual Image Pt: [573.488,369.349]
Projected 3D Net Pt: [1113.3717   638.76025]
Error is[-539.8837041  -269.41125391] 
the Ground Point is at: (1.3894238188747292, 13.85355593430862)
the Net Point is at: (2.623439923072273, 4.238237195560694)
--- 3D World Position of Net Center ---
X: -8.17m, Y: 30.53m, Z (Height): -0.10m


In [11]:
import pandas as pd
import numpy as np

def process_ball_data_by_track(input_path, output_path, cam_mtx, rvec, tvec, window_size=5, fps=30):
    """
    Processes multi-track detections:
    - Preserves all original columns.
    - Groups by 'track_id' to prevent cross-track smoothing.
    - Uses Median filter for outlier rejection.
    """
    # 1. Load data
    df = pd.read_csv(input_path)
    
    # Ensure data is sorted by track and then frame for rolling window
    df = df.sort_values(by=['track_id', 'frame']).reset_index(drop=True)
    
    # 2. Pre-calculate the average size (Diameter)
    df['avg_size'] = (df['w'] + df['h']) / 2
    
    # 3. Apply Smoothing PER TRACK
    # We use groupby('track_id') so the window resets for every new ball/player
    grouped = df.groupby('track_id', group_keys=False)

    def smooth_group(group):
        # Rolling Median to kill outliers, followed by a small Mean to smooth quantization
        for col in ['avg_size', 'cx', 'cy']:
            median_val = group[col].rolling(window=window_size, center=True, min_periods=1).median()
            group[f'smoothed_{col}'] = median_val.rolling(window=3, center=True, min_periods=1).mean()
        return group

    df = grouped.apply(smooth_group)

    # 4. Convert 2D Smoothed points to 3D World Coordinates
    # (Applying the function row-by-row)
    def row_to_3d(row):
        # Ensure your get_ball_height function is defined globally
        return get_ball_height(
            row['smoothed_cx'], 
            row['smoothed_cy'], 
            row['smoothed_avg_size'], 
            cam_mtx, rvec, tvec
        )

    # Calculate 3D points
    world_points = df.apply(row_to_3d, axis=1)
    df[['world_x', 'world_y', 'world_z']] = pd.DataFrame(world_points.tolist(), index=df.index)

    # 5. Calculate Speed PER TRACK
    def calculate_speed(group):
        # Calculate diffs within the group
        dx = group['world_x'].diff()
        dy = group['world_y'].diff()
        dz = group['world_z'].diff()
        
        dist = np.sqrt(dx**2 + dy**2 + dz**2)
        raw_speed = dist * fps * 3.6
        
        # Apply median filter to speed to ignore single-frame tracking jumps
        group['speed_kph'] = raw_speed.rolling(window=window_size, center=True, min_periods=1).median()
        return group

    df = df.groupby('track_id', group_keys=False).apply(calculate_speed)

    # 6. Save (All original columns + new 3D/Speed columns)
    df.to_csv(output_path, index=False)
    print(f"Processed {len(df)} rows across {df['track_id'].nunique()} tracks.")

# --- Usage ---
# process_ball_data_by_track("detections.csv", "output_3d_multi_track.csv", cam_mtx, rvec, tvec)

In [21]:
# --- CONFIGURATION ---
INPUT_CSV = r"C:\Users\morde\Desktop\volleyball\raw footage\table_data\netball_set1_detections_cleaned.csv"      # Your input file with cx, cy, w, h
OUTPUT_CSV = "ball_coords_net_3d.csv"     # Where the 3D data will be saved
BALL_REAL_DIAMETER = 0.21   
# --- MAIN EXECUTION ---

# 1. Perform Calibration (from your snippet)
cam_mtx, rvec, tvec = calibrate_camera(collected_data, world_points_map, img_w=3840, img_h=2160)

if cam_mtx is not None:
    # 2. Run the processing pipeline
    process_ball_data_by_track(INPUT_CSV, OUTPUT_CSV, cam_mtx, rvec, tvec)
else:
    print("Error: Camera calibration failed. Check your collected_data.")

  df = grouped.apply(smooth_group)


Processed 15923 rows across 97 tracks.


  df = df.groupby('track_id', group_keys=False).apply(calculate_speed)
