# Yoga Pose Detection

The project focuses on creating a Yoga Pose Detection system. The dataset has been created by me and comprises of 62 subclasses/ poses. Do perform this particular task, we can use two models, the initial one being ResNet50 and then next one is EfficientNetB0. Initially MobileNet and CNNs were tested out for this method but due to their poor accuracy during stage 1, the models have not been considered in this stage. 

## Step 1 : Training with ResNet50

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50  # Using ResNet50
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
import os

# Define paths
train_dir = "/kaggle/input/yoga-pose-62/pose_62/train"
val_dir = "/kaggle/input/yoga-pose-62/pose_62/val"
test_dir = "/kaggle/input/yoga-pose-62/pose_62/test"

# Parameters
img_height = 160  # Reduced size for faster processing
img_width = 160   # Reduced size for faster processing
batch_size = 32   # Reduced batch size
num_classes = 62# Number of yoga pose classes
epochs = 12      # Reduced number of epochs

# Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
    rotation_range=20,  # Adjusted augmentation range
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,  # Disabled vertical flip
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Building the Model with ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = True

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True)
tensorboard_callback = TensorBoard(log_dir='./logs')

# Train the Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
#     callbacks=[early_stopping, model_checkpoint, tensorboard_callback]
)

# Evaluate the Model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Save the Model
# model.save('yoga_pose_classifier.keras')


## Step 2: Training with EfficientNetB0

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
import tensorflow as tf
import os

# Define paths
train_dir = "/kaggle/input/yoga-pose-62/pose_62/train"
val_dir = "/kaggle/input/yoga-pose-62/pose_62/val"
test_dir = "/kaggle/input/yoga-pose-62/pose_62/test"

# Parameters
img_height = 160
img_width = 160
batch_size = 32
num_classes = 62
epochs = 10

# Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Building the Model with EfficientNetB0
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = True

EfficientNet_model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

EfficientNet_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True)
tensorboard_callback = TensorBoard(log_dir='./logs')

# Train the Model
history = EfficientNet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint, tensorboard_callback]
)

# Evaluate the Model
test_loss, test_accuracy = EfficientNet_model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")


In [None]:
EfficientNet_model.save('EfficientNetB0_model.h5')

In [None]:
from IPython.display import FileLink

FileLink('/kaggle/working/EfficientNetB0_model.h5')


## Step 3: Plotting Accuracy and Loss Graphs

Since EfficientNetB0 gave better accuracy, we will now visualize the results. This can be done by plotting Accuracy and Loss Graphs for comparison between Train and Validation dataset

In [None]:
# Plot accuracy and loss curves

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Plot training & validation accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training & validation loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
print(test_generator.class_indices)  # Check this matches your actual classes

In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

# Load the trained model (use the best saved model)
model = load_model('EfficientNetB0_model.h5')

# Get the test generator class indices
labels_map = test_generator.class_indices
labels_map = {v: k for k, v in labels_map.items()}  # Reverse to map indices to label names

# Initialize lists to store data
image_paths = []
predicted_labels = []
actual_labels = []
checks = []

# Loop through each batch in the test generator
for i in range(len(test_generator)):
    # Get the batch of images and corresponding labels
    batch_images, batch_labels = test_generator[i]
    
    # Get the paths of the images in the batch
    batch_image_paths = test_generator.filepaths[i*test_generator.batch_size:(i+1)*test_generator.batch_size]
    
    # Make predictions for the batch
    batch_predictions = model.predict(batch_images)
    batch_predicted_labels = np.argmax(batch_predictions, axis=1)  # Convert probabilities to label indices
    batch_actual_labels = np.argmax(batch_labels, axis=1)  # Convert one-hot labels to indices
    
    # Store the path, predicted label, actual label, and check in lists
    for j, img_path in enumerate(batch_image_paths):
        image_paths.append(img_path)
        
        # Get the predicted and actual class names
        predicted_label = labels_map[batch_predicted_labels[j]]
        actual_label = labels_map[batch_actual_labels[j]]
        predicted_labels.append(predicted_label)
        actual_labels.append(actual_label)
        
        # Check if prediction is correct
        check = 'True' if predicted_label == actual_label else 'False'
        checks.append(check)

# Create a DataFrame with the collected data
df = pd.DataFrame({
    'Image_Path': image_paths,
    'Predicted_Label': predicted_labels,
    'Actual_Label': actual_labels,
    'Check': checks
})

# Print or save the DataFrame to a CSV file
df.head()  # Show the first few rows
#df.to_csv('prediction_results.csv', index=False)  # Save to CSV file


In [None]:
df.to_csv('/kaggle/working/prediction_results.csv', index=False)

In [None]:
test_loss, test_accuracy = EfficientNet_model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Dataset- 52 Poses

## ResNet50 Model

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50  # Using ResNet50
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
import os

# Define paths
train_dir = "/kaggle/input/yoga-pose-54/pose_54/train"
val_dir = "/kaggle/input/yoga-pose-54/pose_54/val"
test_dir = "/kaggle/input/yoga-pose-54/pose_54/test"

# Parameters
img_height = 160  # Reduced size for faster processing
img_width = 160   # Reduced size for faster processing
batch_size = 32   # Reduced batch size
num_classes = 54 # Number of yoga pose classes
epochs = 21    # Reduced number of epochs

# Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
    rotation_range=20,  # Adjusted augmentation range
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,  # Disabled vertical flip
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Building the Model with ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = True

ResNet50_Model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

ResNet50_Model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True)
tensorboard_callback = TensorBoard(log_dir='./logs')

# Train the Model
history = ResNet50_Model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
#     callbacks=[early_stopping, model_checkpoint, tensorboard_callback]
)

# Evaluate the Model
test_loss, test_accuracy = ResNet50_Model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Save the Model
# model.save('yoga_pose_classifier.keras')


## EfficientNetB0

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
import tensorflow as tf
import os

# Define paths
train_dir = "/kaggle/input/yoga-pose-54/pose_54/train"
val_dir = "/kaggle/input/yoga-pose-54/pose_54/val"
test_dir = "/kaggle/input/yoga-pose-54/pose_54/test"

# Parameters
img_height = 160
img_width = 160
batch_size = 32
num_classes = 54
epochs = 14

# Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Building the Model with EfficientNetB0
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = True

EfficientNet_model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

EfficientNet_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True)
tensorboard_callback = TensorBoard(log_dir='./logs')

# Train the Model
history = EfficientNet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    callbacks=[early_stopping, model_checkpoint, tensorboard_callback]
)

# Evaluate the Model
test_loss, test_accuracy = EfficientNet_model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")


# Accuracy and Loss Curves 

In [None]:
# Plot accuracy and loss curves

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Plot training & validation accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot training & validation loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Image Prediction Tests

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import tensorflow as tf

def predict_and_plot_image(model_path, image_path, target_size, class_indices):
    model = load_model(model_path)
    print(f"Model loaded from {model_path}")

    image = load_img(image_path, target_size=target_size)
    image_array = img_to_array(image)

    image_array = tf.keras.applications.efficientnet.preprocess_input(image_array)
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
    print(f"Image loaded and preprocessed from {image_path}")

    predictions = model.predict(image_array)
    predicted_class_index = np.argmax(predictions)
    predicted_class_label = class_indices[predicted_class_index]
    print("Prediction completed.")
 
    plt.figure(figsize=(6, 6))
    plt.imshow(plt.imread(image_path))
    plt.title(f"Predicted Pose: {predicted_class_label}")
    plt.axis('off')
    plt.show()

    return predicted_class_label


if __name__ == "__main__":

    model_path = "EfficientNet_54_Augmented.h5"
    image_path = "Ardha_chandrasana_test.png"  
    target_size = (160, 160) 

    class_indices = {v: k for k, v in train_generator.class_indices.items()} 
    

    predicted_pose = predict_and_plot_image(model_path, image_path, target_size, class_indices)
    print("Predicted Yoga Pose:", predicted_pose)


In [None]:
if __name__ == "__main__":

    model_path = "EfficientNet_54_Augmented.h5"
    image_path = "Ardha_chandrasana_test.png"  
    target_size = (160, 160) 

    class_indices = {v: k for k, v in train_generator.class_indices.items()}
predicted_pose = predict_and_plot_image(model_path, image_path, target_size, class_indices)
print("Predicted Yoga Pose:", predicted_pose)

# Video Tester Code

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array
import mediapipe as mp

# Paths and Parameters
train_dir = "path_to_train_dataset"
model_path = "EfficientNet_54_Augmented.h5"
video_path = "Dhanurasana_Tutorial.mp4"  # Change to your video path
output_path = "Dhanurasana_Tutorial_output.mp4"

# Load Model
model = tf.keras.models.load_model(model_path)
print(f"Model loaded from {model_path}")

# Class Labels
pose_classes = sorted([dirname for dirname in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, dirname))])

print("Available Poses:")
for i, pose in enumerate(pose_classes):
    print(f"{i+1}. {pose}")

target_pose = input("Enter the name of the pose you want to try: ")

# MediaPipe Pose Setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Preprocessing function for EfficientNet
def preprocess_image(image, img_height=160, img_width=160):
    image = cv2.resize(image, (img_width, img_height))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = tf.keras.applications.efficientnet.preprocess_input(image)
    return image

# Predict Pose Function
def check_pose(frame, target_pose):
    # Process the frame with MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(frame_rgb)

    if results.pose_landmarks:
        # Draw landmarks on the frame
        mp.solutions.drawing_utils.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # Crop the region around the detected person
        height, width, _ = frame.shape
        landmarks = results.pose_landmarks.landmark

        # Get bounding box from landmarks
        x_min = min([lm.x for lm in landmarks]) * width
        y_min = min([lm.y for lm in landmarks]) * height
        x_max = max([lm.x for lm in landmarks]) * width
        y_max = max([lm.y for lm in landmarks]) * height

        x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
        x_max, y_max = min(width, int(x_max)), min(height, int(y_max))

        # Extract the person region
        roi = frame[y_min:y_max, x_min:x_max]
        if roi.size == 0:
            return False, 0, "No ROI"

        # Preprocess the ROI for model prediction
        processed_image = preprocess_image(roi)
        predictions = model.predict(processed_image)
        predicted_class = np.argmax(predictions)
        predicted_pose = pose_classes[predicted_class]
        confidence = predictions[0][predicted_class]

        is_correct = predicted_pose == target_pose
        return is_correct, confidence, predicted_pose

    return False, 0, "No Pose Detected"

# Video Processing
if target_pose not in pose_classes:
    print("Invalid pose selected.")
else:
    cap = cv2.VideoCapture(video_path)

    # Video Writer Setup
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    print(f"Processing video for pose: {target_pose}")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        is_correct, confidence, predicted_pose = check_pose(frame, target_pose)

        frame_display = frame.copy()
        if predicted_pose != "No Pose Detected":
            cv2.putText(frame_display, f"Pose: {predicted_pose}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

            # Overlay green if pose matches
            if is_correct:
                cv2.putText(frame_display, f"Correct Pose - Confidence: {confidence:.2f}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                height, width, _ = frame_display.shape
                cv2.rectangle(frame_display, (50, 150), (width-50, height-50), (0, 255, 0), 2)
            else:
                cv2.putText(frame_display, f"Incorrect Pose - Confidence: {confidence:.2f}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        out.write(frame_display)

    cap.release()
    out.release()
    print(f"Video saved as {output_path}")
