In [16]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
reference_image = cv2.imread(r'c:\Users\Dhruv\Downloads\1_frame_779.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for landmark in reference_results.pose_landmarks.landmark:
        reference_landmarks.append((landmark.x, landmark.y))

# Define the input video file path.
input_video_path = r'dataset/videos/icb.mp4'  # Replace with your input video file path.
# input_video_path = r'dataset/videos/dataset_video.mp4'
# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Get video properties.
frame_width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(input_video.get(cv2.CAP_PROP_FPS))

# Create a window to display the output video.
cv2.namedWindow("Processed Video", cv2.WINDOW_NORMAL)

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for landmark in frame_results.pose_landmarks.landmark:
            frame_landmarks.append((landmark.x, landmark.y))

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Draw white lines between keypoints in the reference frame.
    for connection in mp_pose.POSE_CONNECTIONS:
        start_point = mp_pose.PoseLandmark(connection[0]).value
        end_point = mp_pose.PoseLandmark(connection[1]).value
        x1, y1 = int(reference_landmarks[start_point][0] * frame.shape[1]), int(reference_landmarks[start_point][1] * frame.shape[0])
        x2, y2 = int(reference_landmarks[end_point][0] * frame.shape[1]), int(reference_landmarks[end_point][1] * frame.shape[0])
        cv2.line(frame_copy, (x1, y1), (x2, y2), (255, 255, 255), 2)

    # Iterate through the keypoints in both reference and frame.
    for i in range(len(reference_landmarks)):
        if i < len(frame_landmarks):  # Check if there are enough landmarks in the video frame.
            reference_point = reference_landmarks[i]
            frame_point = frame_landmarks[i]

            # Calculate the Euclidean distance between the reference and frame keypoints.
            distance = calculate_distance(reference_point, frame_point)

            # Define a threshold for matching.
            threshold = 0.1  # You can adjust this threshold as needed.

            # Check if the distance is below the threshold for matching.
            if distance < threshold:
                # Draw a green circle for matching keypoints.
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
            else:
                # Draw a red circle for non-matching keypoints.
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    # Display the modified frame with keypoints and white lines.
    cv2.imshow("Processed Video", frame_copy)

    # Check for user input to exit the loop.
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video object.
input_video.release()

# Close the display window.
cv2.destroyAllWindows()

rectangular frame

In [81]:
import cv2
import numpy as np

# Load YOLO model for person detection (make sure you have darknet installed).
net = cv2.dnn.readNet(r"yolov4.weights", r"yolov4.cfg")

# Set YOLO classes and layer names.
classes = ["person"]
layer_names = net.getUnconnectedOutLayersNames()

# Initialize the webcam or another video source.
video = cv2.VideoCapture(0)  # 0 represents the default camera, you can change this to your desired video source.

# Initialize a window to display the live video feed.
cv2.namedWindow("YOLO Person Detection", cv2.WINDOW_NORMAL)

# Iterate through the live video frames.
while True:
    ret, frame = video.read()

    if not ret:
        break

    # Perform YOLO object detection for persons.
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(layer_names)

    # Initialize a list to track detected persons.
    detected_persons = []

    # Iterate through the detections.
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if class_id == 0 and confidence > 0.5:  # "0" corresponds to the "person" class.
                # Extract bounding box coordinates.
                center_x, center_y, width, height = map(int, obj[0:4] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]]))
                x, y = int(center_x - width / 2), int(center_y - height / 2)

                # Append the detected person to the list.
                detected_persons.append((x, y, width, height))

    # Track the first detected person.
    if detected_persons:
        x, y, width, height = detected_persons[0]
        cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)

    # Display the frame with detected persons.
    cv2.imshow("YOLO Person Detection", frame)

    # Check for user input to exit the loop.
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video source and close the window.
video.release()
cv2.destroyAllWindows()

dynamic approach

In [17]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
reference_image = cv2.imread(r'c:\Users\Dhruv\Downloads\1_frame_779.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image.
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for landmark in reference_results.pose_landmarks.landmark:
        reference_landmarks.append((landmark.x, landmark.y))

# Define the input video file path.
input_video_path = r'dataset/videos/ice.mp4'

# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for landmark in frame_results.pose_landmarks.landmark:
            frame_landmarks.append((landmark.x, landmark.y))

    # Calculate the initial offset between reference and detected keypoints.
    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(frame_landmarks[0]) - np.array(reference_landmarks[0])

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Iterate through the keypoints in both reference and frame, adjusting for the offset.
    for i in range(len(reference_landmarks)):
        if i < len(frame_landmarks):
            reference_point = reference_landmarks[i]
            adjusted_frame_point = (frame_landmarks[i][0] - initial_offset[0], frame_landmarks[i][1] - initial_offset[1])

            # Calculate the Euclidean distance between adjusted keypoints.
            distance = calculate_distance(reference_point, adjusted_frame_point)

            # Define a threshold for matching.
            threshold = 0.1

            # Check if the distance is below the threshold for matching.
            if distance < threshold:
                cv2.circle(frame_copy, (int(adjusted_frame_point[0] * frame.shape[1]), int(adjusted_frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
            else:
                cv2.circle(frame_copy, (int(adjusted_frame_point[0] * frame.shape[1]), int(adjusted_frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    # Display the modified frame with keypoints.
    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video object.
input_video.release()

# Close the display window.
cv2.destroyAllWindows()


perfect

In [18]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
# reference_image = cv2.imread(r'c:\Users\Dhruv\Downloads\1_frame_779.jpg')
reference_image = cv2.imread(r'dataset/amit_reference.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image.
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for landmark in reference_results.pose_landmarks.landmark:
        reference_landmarks.append((landmark.x, landmark.y))

# Define the input video file path.
input_video_path = r'dataset/test.mp4'

# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for landmark in frame_results.pose_landmarks.landmark:
            frame_landmarks.append((landmark.x, landmark.y))

    # Calculate the initial offset between reference and detected keypoints.
    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0]) - np.array(frame_landmarks[0])

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Iterate through the keypoints in both reference and frame, adjusting for the offset.
    for i in range(len(frame_landmarks)):
        if i < len(reference_landmarks):
            frame_point = frame_landmarks[i]
            adjusted_reference_point = (reference_landmarks[i][0] - initial_offset[0], reference_landmarks[i][1] - initial_offset[1])

            # Calculate the Euclidean distance between adjusted keypoints.
            distance = calculate_distance(adjusted_reference_point, frame_point)

            # Define a threshold for matching.
            threshold = 0.1

            # Check if the distance is below the threshold for matching.
            if distance < threshold:
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
            else:
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    # Display the modified frame with keypoints.
    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video object.
input_video.release()

# Close the display window.
cv2.destroyAllWindows()


correction

In [4]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
reference_image = cv2.imread(r'c:\Users\Dhruv\Downloads\1_frame_779.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image.
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

# Define the input video file path.
input_video_path = r'dataset/videos/ice.mp4'

# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Define a dictionary to map keypoint numbers to labels.
keypoint_labels = {
    "Perfect": "Perfect! Keep Going :)",
    "HandsNotAtRightPosition": "Hands not at right position",
    "HandsNotAt90": "Hands not at 90 degree",
    "LegsNotTriangle": "Legs not triangle",
    "UnknownErrors": "Unknown errors",
    "LegDown": "Leg Down",
    "Idle": "idle, please perform asana",
    "BentRight": "bent right, straiten yourself",
    "BentLeft": "bent left, staiten yourself",
    "BentForward": "bent forward, straiten yourself"
}

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    # Calculate the initial offset between reference and detected keypoints.
    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Create flags to track the conditions.
    all_green = True
    hands_not_joined = False
    hands_not_at_90 = False
    legs_not_triangle = False

    # Iterate through the keypoints in both reference and frame, adjusting for the offset.
    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]
        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])

        # Calculate the Euclidean distance between adjusted keypoints.
        distance = calculate_distance(adjusted_reference_point, frame_point)

        # Define a threshold for matching.
        threshold = 0.1

        # Check if the distance is below the threshold for matching.
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            all_green = False
            if frame_keypoint_number in [12, 13]:
                hands_not_at_90 = True
            if frame_keypoint_number in [18, 20, 16] or frame_keypoint_number in [17, 19, 21]:
                hands_not_joined = True
            if frame_keypoint_number in [24, 25]:
                legs_not_triangle = True

            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)  # Red for specific keypoints

    # Determine the label based on the conditions.
    if all_green:
        label = keypoint_labels["Perfect"]
    else:
        error_labels = []

        if hands_not_at_90:
            error_labels.append(keypoint_labels["HandsNotAt90"])
        if hands_not_joined:
            error_labels.append(keypoint_labels["HandsNotAtRightPosition"])
        if legs_not_triangle:
            error_labels.append(keypoint_labels["LegsNotTriangle"])

        if not error_labels:
            label = keypoint_labels["UnknownErrors"]
        else:
            label = "\n".join(error_labels)  # Separate labels on different lines

    # Display the label on the frame.
    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Display the modified frame with keypoints and label.
    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video object.
input_video.release()

# Close the display window.
cv2.destroyAllWindows()


detailed correction - bent left right forward galat hai bas

In [9]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
reference_image = cv2.imread(r'dataset/amit_reference.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image.
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

# ********************************************************************************************************************
# input_video_path = r'dataset/videos/pcb.mp4'
# input_video_path = r'dataset/videos/icb.mp4'
# input_video_path = r'c:\Users\Dhruv\Downloads\WhatsApp Video 2023-08-22 at 18.04.54.mp4'
input_video_path = r'dataset/test.mp4'

# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Define a dictionary to map keypoint numbers to labels.
keypoint_labels = {
    "Perfect": "Perfect! Keep Going :)",
    "HandsNotAtRightPosition": "Hands not at right position",
    "HandsNotAt90": "Hands not at 90 degree",
    "LegsNotTriangle": "Legs not triangle",
    "Hands_legs_wrong": "hands and leg both wrong",
    "LegDown": "Leg Down",
    "Idle": "Idle, please perform asana",
    "BentRight": "bent right, straiten yourself",
    "BentLeft": "bent left, staiten yourself",
    "BentForward": "bent forward, straiten yourself"
}

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    # Calculate the initial offset between reference and detected keypoints.
    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Create flags to track the conditions.
    leg_down = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    bent_right = False
    bent_left = False

    # Iterate through the keypoints in both reference and frame, adjusting for the offset.
    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]
        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])

        # Calculate the Euclidean distance between adjusted keypoints.
        distance = calculate_distance(adjusted_reference_point, frame_point)

        # Define a threshold for matching.
        threshold = 0.1

        # Check if the distance is below the threshold for matching.
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            if frame_keypoint_number in [26, 28, 30] or frame_keypoint_number in [27, 29, 31] or frame_keypoint_number == 24 or frame_keypoint_number == 25:
                leg_down = True
            if frame_keypoint_number == 12 or frame_keypoint_number == 13:
                hands_not_at_90 = True
            if frame_keypoint_number in [18, 20, 16] or frame_keypoint_number in [17, 19, 21]:
                hands_not_at_right_position = True
            if frame_keypoint_number in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
                if frame_point[0] < adjusted_reference_point[0]:
                    bent_right = True
                elif frame_point[0] > adjusted_reference_point[0]:
                    bent_left = True

            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)  # Red for specific keypoints

    # Determine the label based on the conditions.
    if leg_down:
        if hands_not_at_right_position:
            if not hands_not_at_90:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["Hands_legs_wrong"]
        else:
            label = keypoint_labels["LegDown"]
    elif bent_right:
        label = keypoint_labels["BentRight"]
    elif bent_left:
        label = keypoint_labels["BentLeft"]
    elif hands_not_at_90:
        label = keypoint_labels["HandsNotAt90"]
    elif hands_not_at_right_position:
        label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    # Display the label on the frame.
    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Display the modified frame with keypoints and label.
    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video object.
input_video.release()

# Close the display window.
cv2.destroyAllWindows()


saving the video

In [14]:
import cv2
import numpy as np
import mediapipe as mp

# Initialize mediapipe pose class.
mp_pose = mp.solutions.pose

# Create a function to calculate Euclidean distance between two points.
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Load the reference image.
reference_image = cv2.imread(r'dataset/amit_reference.jpg')

# Create a Pose object for reference image processing.
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

# Process the reference image.
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

# Retrieve the landmarks from the reference image.
reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

# Define the input video path.
input_video_path = r'dataset/test.mp4'

# Open the input video file.
input_video = cv2.VideoCapture(input_video_path)

# Define the output video parameters.
output_video_path = 'output_video.mp4'  # Choose your desired output file name
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(input_video.get(cv2.CAP_PROP_FPS))
frame_width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a VideoWriter object to save the output video.
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Define a dictionary to map keypoint numbers to labels.
keypoint_labels = {
    "Perfect": "Perfect! Keep Going :)",
    "HandsNotAtRightPosition": "Hands not at the right \nposition",
    "HandsNotAt90": "Hands not at 90 degrees",
    "LegsNotTriangle": "Legs not in a triangle shape",
    "Hands_legs_wrong": "Hands and legs both at \nwrong positions",
    "LegDown": "Leg Down",
    "Idle": "Idle, please perform the \nasana",
    "BentRight": "Bent to the right, straighten yourself",
    "BentLeft": "Bent to the left, straighten yourself",
    "BentForward": "Bent forward, straighten yourself"
}

# Iterate through the frames in the input video.
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    # Process the frame to detect pose landmarks.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    # Retrieve the landmarks from the video frame.
    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    # Calculate the initial offset between reference and detected keypoints.
    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    # Create a copy of the frame for drawing.
    frame_copy = frame.copy()

    # Create flags to track the conditions.
    leg_down = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    bent_right = False
    bent_left = False

    # Iterate through the keypoints in both reference and frame, adjusting for the offset.
    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]
        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])

        # Calculate the Euclidean distance between adjusted keypoints.
        distance = calculate_distance(adjusted_reference_point, frame_point)

        # Define a threshold for matching.
        threshold = 0.1

        # Check if the distance is below the threshold for matching.
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            if frame_keypoint_number in [26, 28, 30] or frame_keypoint_number in [27, 29, 31] or frame_keypoint_number == 24 or frame_keypoint_number == 25:
                leg_down = True
            if frame_keypoint_number == 12 or frame_keypoint_number == 13:
                hands_not_at_90 = True
            if frame_keypoint_number in [18, 20, 16] or frame_keypoint_number in [17, 19, 21]:
                hands_not_at_right_position = True
            if frame_keypoint_number in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
                if frame_point[0] < adjusted_reference_point[0]:
                    bent_right = True
                elif frame_point[0] > adjusted_reference_point[0]:
                    bent_left = True

            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)  # Red for specific keypoints

    # Determine the label based on the conditions.
    if leg_down:
        if hands_not_at_right_position:
            if not hands_not_at_90:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["Hands_legs_wrong"]
        else:
            label = keypoint_labels["LegDown"]
    elif bent_right:
        label = keypoint_labels["BentRight"]
    elif bent_left:
        label = keypoint_labels["BentLeft"]
    elif hands_not_at_90:
        label = keypoint_labels["HandsNotAt90"]
    elif hands_not_at_right_position:
        label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    # Display the label on the frame.
    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Write the processed frame to the output video.
    output_video.write(frame_copy)

    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video objects.
input_video.release()
output_video.release()

# Close the display window.
cv2.destroyAllWindows()


bent left and right sahi se chalane wala code

In [12]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

reference_image = cv2.imread(r'c:\Users\Dhruv\Downloads\1_frame_779.jpg')
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)
reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))
reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/videos/pcd.mp4'
input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
    "Perfect": "Perfect! Keep Going :)",
    "HandsNotAtRightPosition": "Hands not at right position",
    "HandsNotAt90": "Hands not at 90 degree",
    "LegsNotTriangle": "Legs not triangle",
    "Hands_legs_wrong": "Hands and leg both wrong",
    "LegDown": "Leg Down",
    "Idle": "Idle, please perform asana",
    "BentRight": "Bent right, straighten yourself",
    "BentLeft": "Bent left, straighten yourself",
    "BentForward": "Bent forward, straighten yourself"
}

face_keypoints_threshold = 0.2

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()

    leg_down = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    bent_right = False
    bent_left = False
    bent_forward = False

    right_shoulder = None
    left_shoulder = None
    for frame_landmark in frame_landmarks:
        frame_keypoint_number = frame_landmark[2]
        if frame_keypoint_number == 10:
            left_shoulder = frame_landmark[:2]
        elif frame_keypoint_number == 11:
            right_shoulder = frame_landmark[:2]

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]
        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        threshold = 0.1

        if frame_keypoint_number in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
            if distance < face_keypoints_threshold:
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
            else:
                bent_forward = True
                cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)
        elif distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            if frame_keypoint_number in [26, 28, 30] or frame_keypoint_number in [27, 29, 31] or frame_keypoint_number == 24 or frame_keypoint_number == 25:
                leg_down = True
            if frame_keypoint_number == 12 or frame_keypoint_number == 13:
                hands_not_at_90 = True
            if frame_keypoint_number in [18, 20, 16] or frame_keypoint_number in [17, 19, 21]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if bent_left or (right_shoulder and left_shoulder and right_shoulder[1] > left_shoulder[1]):
        label = keypoint_labels["BentLeft"]
    elif bent_right or (right_shoulder and left_shoulder and right_shoulder[1] < left_shoulder[1]):
        label = keypoint_labels["BentRight"]
    elif leg_down:
        if hands_not_at_right_position:
            if not hands_not_at_90:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["Hands_legs_wrong"]
        else:
            label = keypoint_labels["LegDown"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow("Processed Video", frame_copy)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()
