# 2.0 Data Augmentation & Feature Extraction

In [18]:
import os
from shutil import copyfile
import pandas as pd
import cv2
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

from helpers.file_system_utils import *
from helpers.image_utils import *

## 2.1 Training Dataset

### 2.1.1 Image Mirroring

In [14]:
def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'))

def flip_image_horizontally(image_path, save_path):
    img = Image.open(image_path)
    flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
    flipped_img.save(save_path)

def pose_image_mirror(root_dir):
    error = 0
    copy_count = 0
    flipped_horizontal_count = 0
    flipped_vertical_count = 0

    lft_images_dir = root_dir + "lft/"
    rgt_images_dir = root_dir + "rgt/"

    for dir_path, suffix in [(lft_images_dir, '-lft.png'), (rgt_images_dir, '-rgt.png')]:
        for filename in os.listdir(dir_path):
            if is_image_file(filename):
                base_filename = os.path.splitext(filename)[0]
                source_path = os.path.join(dir_path, filename)
                save_path = os.path.join(root_dir, base_filename + suffix)
                copyfile(source_path, save_path)
                copy_count += 1
    print(f"Successfully copied {copy_count} images")

    for filename in os.listdir(root_dir):
        if is_image_file(filename) and filename.lower().endswith('-lft.png'):
            base_filename = filename.replace('-lft.png', '')
            flipped_path = os.path.join(root_dir, base_filename + '-rgt.png')
            source_path = os.path.join(root_dir, filename)
            flip_image_horizontally(source_path, flipped_path)
            flipped_horizontal_count += 1 
        elif is_image_file(filename) and filename.lower().endswith('-rgt.png'):
            base_filename = filename.replace('-rgt.png', '')
            flipped_path = os.path.join(root_dir, base_filename + '-lft.png')
            source_path = os.path.join(root_dir, filename)
            flip_image_horizontally(source_path, flipped_path)
            flipped_horizontal_count += 1 
        else:
            error += 1
            
    print(f"Successfully flipped horizontally {flipped_horizontal_count} images")

### 2.1.2 Mediapipe Human Pose Estimation

In [15]:
def initialize_landmarker(model_path):
    base_options = python.BaseOptions(model_asset_path=model_path)
    options = vision.PoseLandmarkerOptions(base_options=base_options, output_segmentation_masks=True)
    return vision.PoseLandmarker.create_from_options(options)

def populate_pose_data_with_landmarks(pose_info, landmarks):
    for idx, landmark in enumerate(landmarks):
        idx_str = str(idx).zfill(2)
        pose_info[f'landmark_{idx_str}_x'] = landmark.x
        pose_info[f'landmark_{idx_str}_y'] = landmark.y
        pose_info[f'landmark_{idx_str}_z'] = landmark.z
        pose_info[f'landmark_{idx_str}_v'] = landmark.visibility
        
def generate_pose_landmark_dictionary(source_dir, model_path, is_video=False):
    annotated_dir = create_annotated_directory(source_dir)
    filenames = get_image_filenames(source_dir)
    landmarker = initialize_landmarker(model_path)

    if is_video:
        pose_data, errors = batch_process_video_images(annotated_dir, filenames, landmarker)
    else:
        pose_data, errors = batch_process_static_images(annotated_dir, filenames, landmarker)
    
    pose_data_df = pd.DataFrame(pose_data)
    pose_data_df.to_csv(f'{source_dir}/pose_data_raw.csv', index=False)
    
    errors = write_error_log(source_dir, errors)
    
    return print(annotated_dir)

def batch_process_video_images(annotated_dir, filenames, landmarker):
    pose_data = []
    errors = []

    for image_file_path in sorted(filenames):
        
        image_bgr = cv2.imread(image_file_path, cv2.IMREAD_COLOR)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)

        image_filename, second, frame_no = parse_video_filename(image_file_path)
        detection_result = landmarker.detect(mp_image)

        if detection_result.pose_landmarks:
            annotate_and_save_image(annotated_dir, image_filename, detection_result, image_rgb, scale=0.4)
            
            for landmarks in detection_result.pose_landmarks:
                pose_info = {
                    'image_filename': image_filename,
                    'secs': second,
                    'frame_no': frame_no
                }
                populate_pose_data_with_landmarks(pose_info, landmarks)
            pose_data.append(pose_info)
        else:
            errors.append(image_file_path)
    
    return pose_data, errors

def batch_process_static_images(annotated_dir, filenames, landmarker):
    
    pose_data = []
    errors = []

    for image_file_path in sorted(filenames):
        
        image_bgr = cv2.imread(image_file_path, cv2.IMREAD_COLOR)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)

        image_filename = os.path.basename(image_file_path)        
        pose_name = image_filename.split('.')[0]
        
        detection_result = landmarker.detect(mp_image)
        
        if detection_result.pose_landmarks:
            annotate_and_save_image(annotated_dir, image_filename, detection_result, image_rgb, scale=1)
            
            for landmarks in detection_result.pose_landmarks:
                pose_info = {
                    'image_filename': image_filename,
                    'pose_name': pose_name
                }
                populate_pose_data_with_landmarks(pose_info, landmarks)
            pose_data.append(pose_info)
        else:
            errors.append(image_file_path)
    
    return pose_data, errors

**Load Model**

In [16]:
model_path = 'models/hpe/pose_landmarker.task'

**Implementation for Benchmark Data (Static Images)**

In [17]:
dict_source_dir = 'data/training/'
generate_pose_landmark_dictionary(source_dir=dict_source_dir,model_path=model_path,is_video=False)

data/training/annotated


### 2.1.3 Pose Reconstruction using Rotation Matrix

See notebook (2-1-3-augmentation-rotation-matrix.ipynb) for details.

## 2.2 Test Dataset

### 2.2.1 Decompose Video to Image Frames

In [5]:
from helpers.video_utils import *

def decompose_video_to_frames(input_video_path, output_dir):
    if not is_video_openable(input_video_path):
        return False
    fps, _, _ = get_video_properties(input_video_path)
    image_count = process_video_images(input_video_path, output_dir, fps)
    print(f"Processed {image_count} frames from the video.")
    return True

In [7]:
input_video_path = 'data/test/superman/01.mp4'
output_dir = 'data/test/processed/superman/01/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/superman/02.mp4'
output_dir = 'data/test/processed/superman/02/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/superman/03.mp4'
output_dir = 'data/test/processed/superman/03/'
decompose_video_to_frames(input_video_path, output_dir)

Processed 247 frames from the video.
Processed 133 frames from the video.
Processed 96 frames from the video.


True

In [20]:
input_video_path = 'data/test/raw_videos/superman/01.mp4'
output_dir = 'data/test/processed/superman/01/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/superman/02.mp4'
output_dir = 'data/test/processed/superman/02/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/superman/03.mp4'
output_dir = 'data/test/processed/superman/03/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/crucifix/01.mp4'
output_dir = 'data/test/processed/crucifix/01/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/crucifix/02.mp4'
output_dir = 'data/test/processed/crucifix/02/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/crucifix/03.mp4'
output_dir = 'data/test/processed/crucifix/03/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/fireman/01.mp4'
output_dir = 'data/test/processed/fireman/01/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/fireman/02.mp4'
output_dir = 'data/test/processed/fireman/02/'
decompose_video_to_frames(input_video_path, output_dir)

input_video_path = 'data/test/raw_videos/fireman/03.mp4'
output_dir = 'data/test/processed/fireman/03/'
decompose_video_to_frames(input_video_path, output_dir)


Processed 247 frames from the video.
Processed 133 frames from the video.
Processed 96 frames from the video.
Processed 849 frames from the video.
Processed 342 frames from the video.
Processed 143 frames from the video.
Processed 96 frames from the video.
Processed 185 frames from the video.
Processed 123 frames from the video.


True

### 2.2.2 Mediapipe Human Pose Estimation (for Videos)

Body landmark features are extracted the same way as the training data, but descriptive labels would be different.

In [21]:
output_dir = 'data/test/processed/superman/01/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/superman/02/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/superman/03/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/crucifix/01/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/crucifix/02/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/crucifix/03/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/fireman/01/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/fireman/02/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)

output_dir = 'data/test/processed/fireman/03/'
generate_pose_landmark_dictionary(source_dir=output_dir,model_path=model_path,is_video=True)



data/test/processed/superman/01/annotated
data/test/processed/superman/02/annotated
data/test/processed/superman/03/annotated
data/test/processed/crucifix/01/annotated
data/test/processed/crucifix/02/annotated
data/test/processed/crucifix/03/annotated
data/test/processed/fireman/01/annotated
data/test/processed/fireman/02/annotated
data/test/processed/fireman/03/annotated
