In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.23-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.23-py3-none-any.whl (877 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m877.6/877.6 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.9-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.23 ultralytics-thop-2.0.9


In [2]:
!pip install gTTS

Collecting gTTS
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)
Installing collected packages: gTTS
Successfully installed gTTS-2.5.3


In [3]:
!pip install --upgrade scikit-learn



In [4]:
import cv2
import os
import pandas as pd
import numpy as np
from ultralytics import YOLO
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
import joblib  # For model saving and loading
import warnings
from gtts import gTTS
import shutil

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
warnings.filterwarnings("ignore")

# Define names for the 17 keypoints
KEYPOINT_NAMES = [
    "Nose", "Right Eye", "Left Eye", "Right Ear", "Left Ear",
    "Right Shoulder", "Left Shoulder", "Right Elbow", "Left Elbow",
    "Right Wrist", "Left Wrist", "Right Hip", "Left Hip",
    "Right Knee", "Left Knee", "Right Ankle", "Left Ankle"
]

# define key points to keep
keypoints_to_keep = [
    'Right Shoulder', 'Left Shoulder', 'Right Elbow', 'Left Elbow',
    'Right Wrist', 'Left Wrist', 'Right Hip', 'Left Hip',
    'Right Knee', 'Left Knee', 'Right Ankle', 'Left Ankle'
]

# construct the column names to be processed
keypoint_columns = []
for kp in keypoints_to_keep:
    keypoint_columns.append(f'{kp}_x')
    keypoint_columns.append(f'{kp}_y')

In [7]:
# Define the function that computes the new origin and coordinates normalization
def calculate_new_origin(keypoints_data):
    right_shoulder = keypoints_data[5][:2]
    left_shoulder = keypoints_data[6][:2]
    right_hip = keypoints_data[11][:2]
    left_hip = keypoints_data[12][:2]

    valid_points = [p for p in [right_shoulder, left_shoulder, right_hip, left_hip] if p[0] != 0 and p[1] != 0]
    if len(valid_points) == 0:
        raise ValueError("The key points of right shoulder, left shoulder, right hip, and left hip are missing and the new origin cannot be calculated")

    x0 = sum([p[0] for p in valid_points]) / len(valid_points)
    y0 = sum([p[1] for p in valid_points]) / len(valid_points)
    return x0, y0

def get_min_max_of_new_coords(keypoints_data, x0, y0):
    x_new_values = []
    y_new_values = []

    for keypoint in keypoints_data:
        x, y, conf = keypoint
        if conf > 0:  # Calculate only valid keypoints
            x_new = x - x0
            y_new = -(y - y0)  # The Y-axis grows from bottom to top
            x_new_values.append(x_new)
            y_new_values.append(y_new)

    x_min_new, x_max_new = min(x_new_values), max(x_new_values)
    y_min_new, y_max_new = min(y_new_values), max(y_new_values)

    return x_min_new, x_max_new, y_min_new, y_max_new

def parse_keypoints_with_custom_origin(results):
    parsed_keypoints_list = []
    for i in range(len(results)):
        keypoints_data = results[i].keypoints.data.cpu().numpy()[0]  # get keypoints (17, 3)

        # calculate the new origin coordinates
        try:
            x0, y0 = calculate_new_origin(keypoints_data)
        except ValueError as e:
            print(f"Failed to calculate new origin for object {i}: {e}")
            continue  # Skip this object

        # calculate the maximum and minimum values of x_new and y_new in the new coordinate system
        x_min_new, x_max_new, y_min_new, y_max_new = get_min_max_of_new_coords(keypoints_data, x0, y0)

        object_keypoints = {"object_id": i}
        keypoint_dict = {}

        # Calculate coordinates of each keypoint relative to the new origin and normalize
        for j, keypoint in enumerate(keypoints_data):
            x, y, conf = keypoint
            if x == 0 and y == 0:
                keypoint_dict[KEYPOINT_NAMES[j]] = None
            else:
                # Coordinates in the new coordinate system, with the Y-axis growing from bottom to top
                x_new = x - x0
                y_new = -(y - y0)

                # normalize the new coordinates
                if x_max_new != x_min_new:
                    x_normalized = (x_new - x_min_new) / (x_max_new - x_min_new)
                else:
                    x_normalized = 0  # Avoid division by zero errors if maximum is equal to minimum

                if y_max_new != y_min_new:
                    y_normalized = (y_new - y_min_new) / (y_max_new - y_min_new)
                else:
                    y_normalized = 0  # Avoid division by zero errors if maximum is equal to minimum

                keypoint_dict[KEYPOINT_NAMES[j]] = {
                    "name": KEYPOINT_NAMES[j],
                    "x": x_normalized,
                    "y": y_normalized,
                    "confidence": conf
                }

        object_keypoints["keypoints"] = keypoint_dict
        parsed_keypoints_list.append(object_keypoints)

    return parsed_keypoints_list

In [8]:
# Load the model and encoder with Google Drive paths
knn_model = joblib.load('/content/drive/MyDrive/fitness/action_classification_model.pkl')
rf_model_sequence = joblib.load('/content/drive/MyDrive/fitness/action_stage_model.pkl')
label_encoder = joblib.load('/content/drive/MyDrive/fitness/action_label_encoder.pkl')
onehot_encoder = joblib.load('/content/drive/MyDrive/fitness/action_onehot_encoder.pkl')
scaler = joblib.load('/content/drive/MyDrive/fitness/feature_scaler.pkl')

# Load keypoint averages for standard actions
standard_keypoints_mean = pd.read_pickle('/content/drive/MyDrive/fitness/standard_keypoints_mean.pkl')


In [9]:
# Text-to-Speech
def text_to_speech(text, filename="output.mp3"):

    tts = gTTS(text=text, lang='en')

    tts.save(filename)
    print(f"MP3 file saved as {filename}")

In [10]:
# Define a function to process a new video and output error types
def process_new_video(input_video_path):
    # Load YOLOv8 pose model
    model = YOLO('yolov8n-pose.pt')

    # Extract keypoint data from the video
    all_data = []

    # Output the currently processing video file
    print(f"Processing video: {input_video_path}")
    # Set default action name to 'Unknown'
    action_name = 'Unknown'
    standard_type = 'nonstandard'  # Default to nonstandard action

    # Open the video file
    cap = cv2.VideoCapture(input_video_path)  # Use OpenCV to open the video file.
    fps = cap.get(cv2.CAP_PROP_FPS)  # Get the frame rate (FPS) of the video.
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Get the total number of frames.
    duration = frame_count / fps  # Calculate the total duration of the video.

    # Take 5 frames every 0.5 seconds
    frames_per_half_second = int(fps / 2)
    max_duration = 3.0  # Only process the first 3 seconds of the video
    max_frame_to_process = int(fps * max_duration)

    frame_indices = []
    for i in range(0, max_frame_to_process, frames_per_half_second):
        # Take 3 frames at intervals from 5 frames
        for j in [0, 2, 4]:  # Select the 1st, 3rd, and 5th frames
            frame_index = i + j
            if frame_index < max_frame_to_process:
                frame_indices.append(frame_index)

    frame_indices = sorted(set(frame_indices))  # Ensure no duplicate frames, ordered

    # Process the video frame by frame
    frame_number = 0
    sequence_number = 0

    while cap.isOpened():  # Process each frame of the video in a loop
        ret, frame = cap.read()
        if not ret:
            break

        if frame_number in frame_indices:  # Only process specified frames (in frame_indices).
            # Perform detection using the model
            results = model(frame)
            parsed_keypoints = parse_keypoints_with_custom_origin(results)

            # Assign the same sequence number for frames within 0.5 seconds
            sequence_number = frame_number // frames_per_half_second

            for obj in parsed_keypoints:
                keypoints = obj['keypoints']
                row_data = {
                    'action_name': action_name,
                    'standard_type': standard_type,
                    'frame_index': frame_number,
                    'sequence': sequence_number,
                }
                # Store each keypoint's coordinates in row_data
                for kp_name in KEYPOINT_NAMES:
                    kp_info = keypoints.get(kp_name, None)
                    if kp_info:
                        row_data[f'{kp_name}_x'] = kp_info['x']
                        row_data[f'{kp_name}_y'] = kp_info['y']
                    else:
                        row_data[f'{kp_name}_x'] = None
                        row_data[f'{kp_name}_y'] = None

                all_data.append(row_data)

        frame_number += 1

    # Release video resources
    cap.release()

    # Convert all data to DataFrame
    columns = ['action_name', 'standard_type', 'frame_index', 'sequence'] + [f'{k}_x' for k in KEYPOINT_NAMES] + [f'{k}_y' for k in KEYPOINT_NAMES]
    data = pd.DataFrame(all_data, columns=columns)

    # Data cleaning and mean imputation
    data[keypoint_columns] = data[keypoint_columns].fillna(data[keypoint_columns].mean())

    # Prepare features and labels
    features = keypoint_columns
    X = data[features]

    # Use the action classification model for prediction
    y_pred_action_encoded = knn_model.predict(X)
    y_pred_action = label_encoder.inverse_transform(y_pred_action_encoded)
    data['predicted_action'] = y_pred_action

    # Update the action name
    action_name = y_pred_action[0]  # Assume the action name is the same for all frames
    data['action_name'] = action_name

    # Default to nonstandard action
    data['standard_type'] = 'nonstandard'

    # Prepare features for the action phase classification model
    # Prepare three-frame combination features for nonstandard actions and make predictions
    grouped_data_nonstandard = []
    group_size = 3  # Group every three frames

    for name, group in data.groupby(['action_name', 'standard_type', 'sequence']):
        if len(group) >= group_size:
            for i in range(0, len(group) - group_size + 1, 1):
                frames = group.iloc[i:i + group_size]

                second_frame = frames.iloc[1][keypoint_columns].values
                first_diff = frames.iloc[0][keypoint_columns].values - frames.iloc[1][keypoint_columns].values
                third_diff = frames.iloc[2][keypoint_columns].values - frames.iloc[1][keypoint_columns].values

                combined_features = np.hstack([second_frame, first_diff, third_diff])

                grouped_data_nonstandard.append({
                    'action_name': name[0],
                    'standard_type': name[1],
                    'sequence': name[2],
                    'features': combined_features,
                    'second_frame_keypoints': second_frame  # Save middle frame keypoints
                })

    # Convert to DataFrame
    grouped_df_nonstandard = pd.DataFrame(grouped_data_nonstandard)

    if grouped_df_nonstandard.empty:
        print("Unable to extract valid three-frame combination features from the video. The video may be too short or keypoint detection may have failed.")
        return

    # Perform one-hot encoding for action_name (using previously trained encoder)
    action_name_encoded_nonstandard = onehot_encoder.transform(grouped_df_nonstandard[['action_name']])

    # Combine the one-hot encoded categorical features with the numerical features
    X_nonstandard = np.hstack((action_name_encoded_nonstandard, np.vstack(grouped_df_nonstandard['features'].values)))

    # Standardize the features (using previously trained scaler)
    X_nonstandard_scaled = scaler.transform(X_nonstandard)

    # Use the trained model for prediction
    y_pred_nonstandard = rf_model_sequence.predict(X_nonstandard_scaled)

    # Add prediction results to DataFrame
    grouped_df_nonstandard['predicted_sequence'] = y_pred_nonstandard

    # 9. Compare nonstandard and standard action keypoints to identify error types

    # Define error types mapping
    error_types = []

    # Define the indices for each keypoint
    keypoint_indices = {keypoint: idx for idx, keypoint in enumerate(keypoint_columns)}

    # Define a function to calculate joint angles
    def calculate_angle(a, b, c):
        """
        Calculate the angle formed by three points: a, b, c.
        Return the angle in degrees.
        """
        ba = a - b
        bc = c - b
        cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
        # Prevent numerical errors that could result in values outside the [-1, 1] range
        cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
        angle = np.arccos(cosine_angle)
        return np.degrees(angle)

    for idx, row in grouped_df_nonstandard.iterrows():
        action_name = row['action_name']
        predicted_sequence = row['predicted_sequence']
        nonstandard_keypoints = row['second_frame_keypoints']

        # Get the average keypoints for the corresponding standard action
        standard_row = standard_keypoints_mean[
            (standard_keypoints_mean['action_name'] == action_name) &
            (standard_keypoints_mean['sequence'] == predicted_sequence)
        ]

        if standard_row.empty:
            # If no matching standard action, skip
            error_types.append('Unable to match standard action')
            continue

        standard_keypoints = standard_row.iloc[0]['second_frame_keypoints']

        # Calculate keypoint differences
        differences = nonstandard_keypoints - standard_keypoints  # Difference: nonstandard - standard

        # Initialize error type as None
        error_type = None

        # 1. Check x-direction differences for left and right wrists
        left_wrist_x_diff = differences[keypoint_indices['Left Wrist_x']]
        right_wrist_x_diff = differences[keypoint_indices['Right Wrist_x']]

        # Set a threshold to determine if the deviation is too large (adjust based on data distribution)
        threshold_hand = 0.1  # For example, if the deviation exceeds 0.1

#  error label:1.Left hand too far left
#               2.Left hand too far right
#               3.Right hand too far left
#               4.Right hand too far right
#               5.Left arm not bent enough
#               6.Left arm too bent
#               7.Right arm not bent enough
#               8.Right arm too bent
#               9.Left leg not bent enough
#               10.Left leg too bent
#               11.Right leg not bent enough
#               12.Right leg too bent
#               13.Waist misaligned
#               14.No significant error detected
        if abs(left_wrist_x_diff) > threshold_hand:
            if left_wrist_x_diff < 0:
                error_type = 'Left hand too far left'
            else:
                error_type = 'Left hand too far right'
        elif abs(right_wrist_x_diff) > threshold_hand:
            if right_wrist_x_diff < 0:
                error_type = 'Right hand too far left'
            else:
                error_type = 'Right hand too far right'
        else:
            # 2. Check left/right elbow angles
            # Get nonstandard and standard keypoint coordinates
            left_shoulder = nonstandard_keypoints[keypoint_indices['Left Shoulder_x']:keypoint_indices['Left Shoulder_y']+1]
            left_elbow = nonstandard_keypoints[keypoint_indices['Left Elbow_x']:keypoint_indices['Left Elbow_y']+1]
            left_wrist = nonstandard_keypoints[keypoint_indices['Left Wrist_x']:keypoint_indices['Left Wrist_y']+1]

            right_shoulder = nonstandard_keypoints[keypoint_indices['Right Shoulder_x']:keypoint_indices['Right Shoulder_y']+1]
            right_elbow = nonstandard_keypoints[keypoint_indices['Right Elbow_x']:keypoint_indices['Right Elbow_y']+1]
            right_wrist = nonstandard_keypoints[keypoint_indices['Right Wrist_x']:keypoint_indices['Right Wrist_y']+1]

            # Standard action keypoints
            left_shoulder_std = standard_keypoints[keypoint_indices['Left Shoulder_x']:keypoint_indices['Left Shoulder_y']+1]
            left_elbow_std = standard_keypoints[keypoint_indices['Left Elbow_x']:keypoint_indices['Left Elbow_y']+1]
            left_wrist_std = standard_keypoints[keypoint_indices['Left Wrist_x']:keypoint_indices['Left Wrist_y']+1]

            right_shoulder_std = standard_keypoints[keypoint_indices['Right Shoulder_x']:keypoint_indices['Right Shoulder_y']+1]
            right_elbow_std = standard_keypoints[keypoint_indices['Right Elbow_x']:keypoint_indices['Right Elbow_y']+1]
            right_wrist_std = standard_keypoints[keypoint_indices['Right Wrist_x']:keypoint_indices['Right Wrist_y']+1]

            # Calculate angles
            left_elbow_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
            left_elbow_angle_std = calculate_angle(left_shoulder_std, left_elbow_std, left_wrist_std)

            right_elbow_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
            right_elbow_angle_std = calculate_angle(right_shoulder_std, right_elbow_std, right_wrist_std)

            # Calculate angle differences
            left_elbow_angle_diff = left_elbow_angle - left_elbow_angle_std
            right_elbow_angle_diff = right_elbow_angle - right_elbow_angle_std

            # Set angle difference threshold (adjust as needed)
            threshold_angle = 10  # Angle difference exceeds 10 degrees

            if abs(left_elbow_angle_diff) > threshold_angle:
                if left_elbow_angle_diff > 0:
                    error_type = 'Left arm not bent enough'
                else:
                    error_type = 'Left arm too bent'
            elif abs(right_elbow_angle_diff) > threshold_angle:
                if right_elbow_angle_diff > 0:
                    error_type = 'Right arm not bent enough'
                else:
                    error_type = 'Right arm too bent'
            else:
                # 3. Check left/right knee angles
                left_hip = nonstandard_keypoints[keypoint_indices['Left Hip_x']:keypoint_indices['Left Hip_y']+1]
                left_knee = nonstandard_keypoints[keypoint_indices['Left Knee_x']:keypoint_indices['Left Knee_y']+1]
                left_ankle = nonstandard_keypoints[keypoint_indices['Left Ankle_x']:keypoint_indices['Left Ankle_y']+1]

                right_hip = nonstandard_keypoints[keypoint_indices['Right Hip_x']:keypoint_indices['Right Hip_y']+1]
                right_knee = nonstandard_keypoints[keypoint_indices['Right Knee_x']:keypoint_indices['Right Knee_y']+1]
                right_ankle = nonstandard_keypoints[keypoint_indices['Right Ankle_x']:keypoint_indices['Right Ankle_y']+1]

                left_hip_std = standard_keypoints[keypoint_indices['Left Hip_x']:keypoint_indices['Left Hip_y']+1]
                left_knee_std = standard_keypoints[keypoint_indices['Left Knee_x']:keypoint_indices['Left Knee_y']+1]
                left_ankle_std = standard_keypoints[keypoint_indices['Left Ankle_x']:keypoint_indices['Left Ankle_y']+1]

                right_hip_std = standard_keypoints[keypoint_indices['Right Hip_x']:keypoint_indices['Right Hip_y']+1]
                right_knee_std = standard_keypoints[keypoint_indices['Right Knee_x']:keypoint_indices['Right Knee_y']+1]
                right_ankle_std = standard_keypoints[keypoint_indices['Right Ankle_x']:keypoint_indices['Right Ankle_y']+1]

                # Calculate angles
                left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
                left_knee_angle_std = calculate_angle(left_hip_std, left_knee_std, left_ankle_std)

                right_knee_angle = calculate_angle(right_hip, right_knee, right_ankle)
                right_knee_angle_std = calculate_angle(right_hip_std, right_knee_std, right_ankle_std)

                # Calculate angle differences
                left_knee_angle_diff = left_knee_angle - left_knee_angle_std
                right_knee_angle_diff = right_knee_angle - right_knee_angle_std

                if abs(left_knee_angle_diff) > threshold_angle:
                    if left_knee_angle_diff > 0:
                        error_type = 'Left leg not bent enough'
                    else:
                        error_type = 'Left leg too bent'
                elif abs(right_knee_angle_diff) > threshold_angle:
                    if right_knee_angle_diff > 0:
                        error_type = 'Right leg not bent enough'
                    else:
                        error_type = 'Right leg too bent'
                else:
                    # 4. Check if the waist is bent (by comparing the horizontal positions of the left/right shoulders and hips)
                    left_shoulder = nonstandard_keypoints[keypoint_indices['Left Shoulder_x']:keypoint_indices['Left Shoulder_y']+1]
                    right_shoulder = nonstandard_keypoints[keypoint_indices['Right Shoulder_x']:keypoint_indices['Right Shoulder_y']+1]
                    left_hip = nonstandard_keypoints[keypoint_indices['Left Hip_x']:keypoint_indices['Left Hip_y']+1]
                    right_hip = nonstandard_keypoints[keypoint_indices['Right Hip_x']:keypoint_indices['Right Hip_y']+1]

                    # Calculate the midpoint of shoulders and hips
                    shoulders_midpoint = (left_shoulder + right_shoulder) / 2
                    hips_midpoint = (left_hip + right_hip) / 2

                    # Calculate vertical offset
                    vertical_diff = abs(shoulders_midpoint[0] - hips_midpoint[0])  # x-direction difference

                    # Set threshold
                    threshold_waist = 0.05  # Adjust as needed

                    if vertical_diff > threshold_waist:
                        error_type = 'Waist misaligned'
                    else:
                        error_type = 'No significant error detected'

        error_types.append(error_type)

    # Add error types to DataFrame
    grouped_df_nonstandard['error_type'] = error_types

    # Output results
    print(grouped_df_nonstandard[['action_name', 'sequence', 'predicted_sequence', 'error_type']])

    # os mkdir
    os.makedirs("mp3", exist_ok=True)

    # Text-to-Speech
    for row in grouped_df_nonstandard.itertuples():
        text_to_speech(row.error_type, f"mp3/output_sequence_{row.action_name}_{row.sequence + 1}_{row.error_type}.mp3")

    # Save results to Excel
    grouped_df_nonstandard.to_excel("test_keypoints_error.xlsx", index=False)
    print("Error type results have been saved to test_keypoints_error.xlsx")

    return error_types


In [22]:
def process_video_with_text(input_video_path, output_video_path, text_list):
    # Load model
    model = YOLO('yolov8n-pose.pt')

    # Open video file
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the frame interval for each text to appear
    if len(text_list) > 0:
        interval = total_frames // len(text_list)
    else:
        interval = total_frames  # Prevent division by zero

    # Create VideoWriter object to save the output video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Set font, font size, and color
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 2
    font_color = (0, 0, 255)  # Red text (BGR format)
    thickness = 2  # Thicker line for bold text
    position = (15, 60)  # Position of the text in the top-left corner

    # Process the video frame by frame
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform detection using the model
        results = model(frame)

        # Add text to the frame
        text_index = frame_count // interval
        if text_index < len(text_list):
            text = text_list[text_index]
            # Display the text in the top-left corner of the frame with bold red font
            cv2.putText(frame, text, position, font, font_scale, font_color, thickness, cv2.LINE_AA)

        # Draw the model detection results
        for r in results:
            im_array = r.plot()  # Directly draw the image
            # Write the processed frame to the output video
            out.write(im_array)

        frame_count += 1

    # Release resources
    cap.release()
    out.release()


In [23]:
from google.colab import files

# Specify the path for the input and output video files
test_video_path = '/content/drive/My Drive/fitness/dataset_fitness/02 Barbell Bench Press/nonstandard04.mov'
output_video_path = 'output_nonstandard05.mp4'

# Call the processing function to handle the new video
error_types = process_new_video(test_video_path)

# Process the output video with text annotations
process_video_with_text(test_video_path, output_video_path, error_types)

# Download the processed output video to the local system
files.download(output_video_path)



Processing video: /content/drive/My Drive/fitness/dataset_fitness/02 Barbell Bench Press/nonstandard04.mov

0: 640x480 1 person, 168.0ms
Speed: 6.7ms preprocess, 168.0ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 164.2ms
Speed: 5.2ms preprocess, 164.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 160.4ms
Speed: 5.4ms preprocess, 160.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 185.2ms
Speed: 4.7ms preprocess, 185.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 164.0ms
Speed: 4.6ms preprocess, 164.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 155.4ms
Speed: 4.7ms preprocess, 155.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 person, 171.1ms
Speed: 5.6ms preprocess, 171.1ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 48

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
# List all files in the mp3 folder
mp3_files = os.listdir('mp3')
print(mp3_files)

# Compress the entire mp3 folder into a zip file
shutil.make_archive('mp3_files', 'zip', 'mp3')

# Download the compressed zip file
files.download('mp3_files.zip')

['output_sequence_Barbell Bench Press_6_Left hand too far right.mp3', 'output_sequence_Barbell Bench Press_2_Left hand too far right.mp3', 'output_sequence_Barbell Bench Press_1_Left hand too far right.mp3', 'output_sequence_Barbell Bench Press_4_Left hand too far right.mp3', 'output_sequence_Barbell Bench Press_5_Left hand too far right.mp3', 'output_sequence_Barbell Bench Press_3_Left hand too far right.mp3']


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>