In [1]:
pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.21 sounddevice-0.5.1


In [None]:
def calculate_scaling_factor(self, virtual_points):
        """
        Function 2:
        - Calculates the scaling factor using two corresponding points.
        - Uses the first two detected virtual points and the first two physical points from P_A.
        Returns the scaling factor (cm per pixel).
        """
        if virtual_points.shape[0] < 2:
            raise ValueError("Not enough virtual points detected for scaling factor calculation.")
        virtual_p1 = np.array(virtual_points[0])
        virtual_p2 = np.array(virtual_points[1])
        physical_p1 = self.P_A[:, 0]  # First physical point (column)
        physical_p2 = self.P_A[:, 1]  # Second physical point
        virtual_distance = np.linalg.norm(virtual_p2 - virtual_p1)
        physical_distance = np.linalg.norm(physical_p2 - physical_p1)
        return physical_distance / virtual_distance


In [22]:
## to calculating scaling factor

import numpy as np


# Define the coordinates
virtual_point_7 = np.array([0, 1802, 1161])
virtual_point_8 = np.array([0, 660, 2343])


physical_point_14 = np.array([0,112, 194])
physical_point_15 = np.array([0,112,166])

# Calculate Euclidean distance in virtual coordinates (pixels)
virtual_distance = np.linalg.norm(virtual_point_7 - virtual_point_8)

# Calculate Euclidean distance in physical coordinates (cm)
physical_distance = np.linalg.norm(physical_point_14 - physical_point_15)

# Calculate the scaling factor
scaling_factor = physical_distance / virtual_distance

virtual_distance, physical_distance, scaling_factor

(1643.5595517047748, 28.0, 0.017036194381248385)

In [43]:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow

###############################################
# CLASS 1: Processing the Static Poster (A)  #
###############################################
class PosterAProcessor:
    def __init__(self):
        # Physical coordinates for the 15 black dots on Poster A (given manually)
        # Each point is (0, y, z) – stored as columns in a 3x15 matrix.
        self.P_A = np.array([
            (0, 154, 222),
            (0, 154, 194),
            (0, 154, 166),
            (0, 147, 208),
            (0, 147, 180),
            (0, 133, 222),
            (0, 133, 208),
            (0, 133, 194),
            (0, 133, 180),
            (0, 133, 166),
            (0, 119, 208),
            (0, 119, 180),
            (0, 112, 222),
            (0, 112, 194),
            (0, 112, 166)
        ]).T  # Shape: (3, 15)
        # Default scale factor; in practice, you will compute this from two corresponding points.
        self.scale_factor_A = 0.017

    def detect_black_points(self, image_path):
        """
        Function 1:
        - Reads an input image.
        - Processes the image to detect the 15 black dots on Poster A.
        - Converts the detected centroids into the required format: (0, y, z)
        - Overlays red circles and blue numbering for visualization.
        Returns an array of virtual points (shape: (15, 3)).
        """
        # Load the input image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("Error: Image not found or cannot be loaded.")

        # Convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Crop the region of interest (ROI) where the poster is likely located
        height, width = gray.shape
        roi = gray[int(height * 0.2):int(height * 0.8), int(width * 0.2):int(width * 0.8)]
        offset_y = int(height * 0.2)
        offset_x = int(width * 0.2)

        # Apply Gaussian blur to reduce noise
        blurred = cv2.GaussianBlur(roi, (5, 5), 0)

        # Threshold to isolate black points on the white poster
        _, binary = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY_INV)

        # Detect contours from the binary image
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Prepare a list to store the coordinates of the detected black points
        # For Poster A, format as (0, y, z) where image x is interpreted as z.
        coordinates = []
        for contour in contours:
            area = cv2.contourArea(contour)
            if 100 < area < 1000:  # Filter out noise and overly large areas
                perimeter = cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
                if len(approx) >= 4:
                    M = cv2.moments(contour)
                    if M["m00"] != 0:
                        cX = int(M["m10"] / M["m00"]) + offset_x
                        cY = int(M["m01"] / M["m00"]) + offset_y
                        # Format as (0, y, z)
                        coordinates.append((0, cY, cX))

        # Sort the coordinates by y then by z (top-to-bottom, left-to-right)
        coordinates.sort(key=lambda point: (point[1], point[2]))
        # Limit to 15 points
        coordinates = coordinates[:15]

        # Annotate points on the image for visualization
        for idx, (a, y, z) in enumerate(coordinates):
            cv2.circle(image, (z, y), 5, (0, 0, 255), -1)  # Red circle
            cv2.putText(image, f"{idx + 1}", (z - 10, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

        output_path = image_path.replace(".png", "_processed.png")
        cv2.imwrite(output_path, image)
        print(f"Processed image saved at: {output_path}")
        for idx, (a, y, z) in enumerate(coordinates):
            print(f"Point {idx + 1}: (0, {y}, {z})")

        cv2_imshow(image)
        virtual_points = np.array(coordinates)
        return virtual_points

    def calculate_scaling_factor(self, virtual_points):
        """
        Function 2:
        - Calculates the scaling factor using two corresponding points.
        - Uses the first two detected virtual points and the first two physical points from P_A.
        Returns the scaling factor (physical units per pixel).
        """
        if virtual_points.shape[0] < 2:
            raise ValueError("Not enough virtual points detected for scaling factor calculation.")
        virtual_p1 = np.array(virtual_points[0])
        virtual_p2 = np.array(virtual_points[1])
        physical_p1 = self.P_A[:, 0]  # First physical point (column)
        physical_p2 = self.P_A[:, 1]  # Second physical point
        virtual_distance = np.linalg.norm(virtual_p2 - virtual_p1)
        physical_distance = np.linalg.norm(physical_p2 - physical_p1)
        return physical_distance / virtual_distance

    def compute_RT(self, virtual_points, scale_factor):
        """
        Function 3:
        - Converts virtual points to centimeters using the scaling factor.
        - Computes the centroids of the scaled virtual points (Q) and the physical points (P_A).
        - Constructs the covariance matrix and applies SVD.
        - Calculates the rotation matrix (R) and translation vector (T) such that Q ≈ R * P_A + T.
        Returns R and T.
        """
        Q = (virtual_points * scale_factor).T  # Shape: (3, 15)
        P = self.P_A  # Physical coordinates (3,15)
        Q_bar = np.mean(Q, axis=1, keepdims=True)
        P_bar = np.mean(P, axis=1, keepdims=True)
        Cov = (P - P_bar) @ (Q - Q_bar).T
        U, S, Vt = np.linalg.svd(Cov)
        R = Vt.T @ U.T
        if np.linalg.det(R) < 0:
            Vt[-1, :] *= -1
            R = Vt.T @ U.T
        T = Q_bar - R @ P_bar
        return R, T

    def visualize_transformation(self, image_path, R, T, scale_factor):
        """
        Function 4:
        - Uses R and T to transform the physical coordinates (P_A) into virtual coordinates.
        - Q_hat = (R @ P_A) + T; then converts Q_hat back to pixel units by dividing by scale_factor.
        - The output format is (0, y, x) for Poster A.
        - Overlays these red points on the original image for visualization.
        Returns the computed virtual coordinates from physical data.
        """
        Q_hat = (R @ self.P_A) + T  # Shape: (3, 15)
        Q_hat = Q_hat / scale_factor  # Convert back to pixel units
        virtual_from_physical = []
        for i in range(Q_hat.shape[1]):
            a = Q_hat[0, i]
            y = Q_hat[1, i]
            x = Q_hat[2, i]
            virtual_from_physical.append((0, y, x))
        virtual_from_physical = np.array(virtual_from_physical)

        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Error: Could not load image from {image_path}")
        for i, (a, y, x) in enumerate(virtual_from_physical):
            cv2.circle(image, (int(x), int(y)), 5, (0, 0, 255), -1)
            cv2.putText(image, str(i+1), (int(x)-10, int(y)-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        cv2_imshow(image)
        cv2.imwrite('posterA_transformation_visualization.jpg', image)
        return virtual_from_physical




In [49]:
# ----------------- USAGE EXAMPLE ----------------- #
# Provide the correct image path (adjust the file extension as needed)
image_path = '/content/poster_A.jpg'  # Replace with your actual image path

# Create an instance of PosterAProcessor
posterA = PosterAProcessor()

# Function 1: Detect 15 virtual black points on Poster A.
virtual_points = posterA.detect_black_points(image_path)
print("Detected Virtual Points (Poster A):")
print(virtual_points)

# Function 2: Calculate the scaling factor using the detected virtual points.
# scale_factor = posterA.calculate_scaling_factor(virtual_points)
# print("Calculated Scaling Factor:", scale_factor)

# Optionally, you can override the computed scale factor with a known value:
scale_factor = 0.083  #posterA.scale_factor_A

# Function 3: Compute R and T using the virtual points and scale factor.
R, T = posterA.compute_RT(virtual_points, scale_factor)
print("Computed R:")
print(R)
print("Computed T:")
print(T)

# Function 4: Visualize the transformation (mapping physical coordinates back to virtual coordinates).
virtual_from_physical = posterA.visualize_transformation(image_path, R, T, scale_factor)
print("Virtual Coordinates from Physical (Poster A):")
print(virtual_from_physical)

Output hidden; open in https://colab.research.google.com to view.

In [50]:


###############################################
# CLASS 2: Processing Person Joint Landmarks  #
###############################################
class PosterAJointProcessor(PosterAProcessor):
    def __init__(self):
        self.mp_pose = mp.solutions.pose
        self.pose = self.mp_pose.Pose(min_detection_confidence=0.5,
                                      min_tracking_confidence=0.5)

    def detect_and_visualize_joints(self, image_path):
        """
        Function 1:
        - Uses MediaPipe Pose to detect 7 joint locations from an image containing a person.
        - You can select specific landmark indices (for example, [11, 12, 23, 24, 25, 26, 28]).
        - Visualizes the detected joints by overlaying red circles and blue labels.
        Returns an array of 7 virtual joint coordinates in 2D pixel space (x, y).
        """
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("Error: Image not found or cannot be loaded.")
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        result = self.pose.process(image_rgb)

        desired_indices = [11, 12, 23, 24, 25, 26, 28]  # Modify as needed
        joints = []
        if result.pose_landmarks:
            for idx, landmark in enumerate(result.pose_landmarks.landmark):
                if idx in desired_indices:
                    x = int(landmark.x * image.shape[1])
                    y = int(landmark.y * image.shape[0])
                    joints.append((x, y))
                    cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
                    cv2.putText(image, str(idx), (x-10, y-10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
        else:
            print("No landmarks detected.")
        cv2_imshow(image)
        cv2.imwrite('posterA_joints_detected.jpg', image)
        return np.array(joints)

    def map_joints_to_physical(self, virtual_joints, R, T, scale_factor):
        """
        Function 2 & 3:
        - Converts 2D virtual joint coordinates into 3D virtual points by adding a zero for the missing dimension,
          forming (0, y, x) for Poster A.
        - Then maps these 3D virtual points to 3D physical coordinates using the transformation parameters.
        Returns the physical 3D coordinates for the 7 joints.
        """
        # Augment 2D joints to 3D points: (0, y, x)
        virtual_3d = []
        for (x, y) in virtual_joints:
            virtual_3d.append((0, y, x))
        virtual_3d = np.array(virtual_3d)

        # Apply the transformation: physical = R_inv * ( (virtual_3d * scale_factor) - T )
        R_inv = np.linalg.inv(R)
        Q_scaled = virtual_3d * scale_factor
        Q_translated = Q_scaled.T - T
        physical_joints = (R_inv @ Q_translated).T
        return physical_joints

#############################
# USAGE EXAMPLE           #
#############################
# For Poster A (Static Poster Processing)
image_path_posterA = '/content/IMG-A1.jpg'  # Replace with your actual image path for Poster A

posterA_proc = PosterAProcessor()

# Function 1: Detect 15 virtual black dots on Poster A.
virtual_points = posterA_proc.detect_black_dots(image_path_posterA)
print("Detected Virtual Points (Poster A):\n", virtual_points)

# Function 2: Calculate scaling factor using the first two detected virtual points and corresponding physical points.
if virtual_points.shape[0] < 2:
    print("Not enough points detected for scaling factor; using default.")
    scale_factor = posterA_proc.scale_factor_A
else:
    scale_factor = posterA_proc.calculate_scaling_factor(virtual_points)
print("Calculated Scaling Factor:", scale_factor)

# Function 3: Compute R and T using SVD between the scaled virtual points and the known physical coordinates.
R, T = posterA_proc.compute_RT(virtual_points, scale_factor)
print("Computed R:\n", R)
print("Computed T:\n", T)

# Function 4: Visualize the transformation by mapping physical coordinates (P_A) into virtual coordinates.
virtual_from_physical = posterA_proc.visualize_transformation(image_path_posterA, R, T, scale_factor)
print("Virtual Coordinates from Physical (Poster A):\n", virtual_from_physical)

# For Poster A Joint Processing (Person with joints)
image_path_joints = '/content/IMG-A2.jpg'  # Replace with your image path containing a person
posterA_joint_proc = PosterAJointProcessor()

# Function 1 (Class 2): Detect and visualize 7 joint landmarks.
virtual_joints = posterA_joint_proc.detect_and_visualize_joints(image_path_joints)
print("Detected Virtual Joint Points:\n", virtual_joints)

# Functions 2 & 3 (Class 2): Map the detected virtual joints to 3D physical coordinates.
physical_joints = posterA_joint_proc.map_joints_to_physical(virtual_joints, R, T, scale_factor)
print("Mapped Physical Joint Coordinates:\n", physical_joints)


AttributeError: 'PosterAProcessor' object has no attribute 'detect_black_dots'

In [None]:
class PosterBProcessor:
    def __init__(self):
        self.mp_pose = mp.solutions.pose
        self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
        self.mp_draw = mp.solutions.drawing_utils

    def detect_landmarks(self, image):
        """Detects landmarks in an image for Poster B."""
        if image is None:
            raise ValueError("Error: Image not found or cannot be loaded.")
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        result = self.pose.process(image_rgb)
        virtual_points = []
        if result.pose_landmarks:
            for idx, landmark in enumerate(result.pose_landmarks.landmark):
                x = int(landmark.x * image.shape[1])
                y = int(landmark.y * image.shape[0])
                virtual_points.append((x, y, 0))  # Format (x, y, 0)
        return np.array(virtual_points)

    def calculate_scaling_factor(self, virtual_p1, virtual_p2, physical_p1, physical_p2):
        """Calculates the scaling factor from virtual to physical coordinates."""
        virtual_distance = np.linalg.norm(virtual_p2 - virtual_p1)
        physical_distance = np.linalg.norm(physical_p2 - physical_p1)
        return physical_distance / virtual_distance


    P_B = np.array([
        (62, 154, 0),
        (90,154 , 0),
        (118, 154, 0),
        (76, 147, 0),
        (104, 147, 0),
        (62, 133, 0),
        (76, 133, 0),
        (90, 133, 0),
        (104, 133, 0),
        (118, 133, 0),
        (76, 119, 0),
        (104, 119, 0),
        (62, 112, 0),
        (90, 112, 0),
        (118, 112, 0)
    ]).T
    def calculate_RT(self, P_B):
        """Calculates R and T matrices for transformation."""
        R = np.identity(3)  # Placeholder
        T = np.zeros((3, 1))  # Placeholder
        return R, T



In [None]:
class CoordinateFusion:
    def convert_virtual_to_physical(self, R, T, Q, scale_factor):
        """Converts virtual points Q to physical coordinates P."""
        R_inv = np.linalg.inv(R)
        Q_scaled = Q * scale_factor
        Q_translated = Q_scaled.T - T
        P = (R_inv @ Q_translated).T
        return P

    def fuse_3D_coordinates(self, P1, P2):
        """Fuses coordinates from both posters into a single 3D representation."""
        return np.column_stack((P2[:, 0], P1[:, 1], P1[:, 2]))