In [2]:
import cv2
import numpy as np
import tensorflow as tf
import time

# Define the input dimensions for each model
model_dims = {
    'model1.h5': (299, 299),
    'model2.h5': (224, 224),
    'model3.h5': (224, 224),
    'model4.h5': (224, 224),
    'model5.h5': (224, 224)
}

# Load the five saved models
model1 = tf.keras.models.load_model(r'd:\Capstone\koi bhi\Top_5_Models\pose_classification_model_xception.h5')
model2 = tf.keras.models.load_model(r'd:\Capstone\koi bhi\Top_5_Models\pose_classification_model_vgg16.h5')
model3 = tf.keras.models.load_model(r'd:\Capstone\koi bhi\Top_5_Models\pose_classification_model_mobilenetv2.h5')
model4 = tf.keras.models.load_model(r'd:\Capstone\koi bhi\Top_5_Models\pose_classification_model_cnn.h5')
model5 = tf.keras.models.load_model(r'd:\Capstone\koi bhi\Top_5_Models\pose_classification_model_resnet50.h5')

# Function to preprocess the frame for prediction
def preprocess_frame(frame, img_width, img_height):
    resized_frame = cv2.resize(frame, (img_width, img_height))
    preprocessed_frame = np.expand_dims(resized_frame, axis=0)
    preprocessed_frame = preprocessed_frame / 255.0
    return preprocessed_frame

# Open the video file
video_path = (r'd:\Capstone\Yoga Capstone\videos1\correct\WhatsApp Video 2023-08-12 at 13.04.58.mp4') 
# video_path = (r'c:\Users\Lenovo\Desktop\Yoga Capstone\WhatsApp Video 2023-08-26 at 01.45.04.mp4')
# video_path = r'C:\Users\Lenovo\Desktop\Yoga Capstone\videos1\incorrect\hands not joined\icb.mp4'  # Replace with the path to your video file
cap = cv2.VideoCapture(video_path)
start_time = time.time()
# Variables to keep track of ensemble results and counters
total_frames = 0
ensemble_predictions = []
correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0
none_frames = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Determine the input dimensions for the current model
    current_model_dims = model_dims['model1.h5']  # Default to the dimensions of model1
    current_model = model1  # Default to model1

    # Use the dimensions of the current model being processed
    if total_frames < len(model_dims):
        current_model_dims = model_dims[list(model_dims.keys())[total_frames]]
        current_model = [model1, model2, model3, model4, model5][total_frames]

    # Preprocess the frame for prediction using the appropriate dimensions
    preprocessed_frame = preprocess_frame(frame, *current_model_dims)

    # Make predictions on the frame using the current model
    predictions = current_model.predict(preprocessed_frame)

    # Get the predicted class index directly (assuming it's an integer)
    predicted_class_index = np.argmax(predictions)

    ensemble_predictions.append(predicted_class_index)

    # Update counters based on the predicted class
    total_frames += 1
    if predicted_class_index == 0:  # Assuming "Correct" class is represented by 0
        correct_frames += 1
    elif predicted_class_index == 3:  # Assuming "Partially Correct" class is represented by 3
        partially_correct_frames += 1
    elif predicted_class_index == 1:  # Assuming "Incorrect" class is represented by 1
        incorrect_frames += 1
    elif predicted_class_index == 2:  # Assuming "None" class is represented by 2
        none_frames += 1

    # Get the class label as a string
    class_labels = ["Correct", "Incorrect", "None", "Partially Correct"]
    predicted_class_label = class_labels[predicted_class_index]

    # Display the predicted class on the frame
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    font_thickness = 2
    text_size = cv2.getTextSize(predicted_class_label, font, font_scale, font_thickness)[0]
    text_position = (10, 30)
    text_background_position = (10, 10)
    text_background_end_position = (10 + text_size[0] + 10, 10 + text_size[1] + 10)
    cv2.rectangle(frame, text_background_position, text_background_end_position, (0, 0, 255), cv2.FILLED)
    cv2.putText(frame, predicted_class_label, text_position, font, font_scale, (255, 255, 255), font_thickness)

    # Display the frame
    cv2.imshow('Ensemble Pose Classification', frame)

    # Exit the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Calculate the scores based on ensemble predictions
step = 100 / total_frames
score1 = step * correct_frames
score2 = step / 2 * partially_correct_frames
total_score = score1 + score2
end_time = time.time()

# Calculate the time taken to process the video
processing_time = end_time - start_time
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"None Frames: {none_frames}")
print(f"Total Score: {total_score:.2f}")
print(f"Time taken to process the video: {processing_time:.2f} seconds")
# Release the video capture and close the display window
cap.release()
cv2.destroyAllWindows()









Total Frames: 685
Correct Frames: 48
Partially Correct Frames: 27
Incorrect Frames: 600
None Frames: 10
Total Score: 8.98
Time taken to process the video: 190.85 seconds
