In [65]:
KEYPOINTS_NAMES = [
    "0 - nose",
    "1 - left eye (inner)",
    "2 - left eye",
    "3 - left eye (outer)",
    "4 - right eye (inner)",
    "5 - right eye",
    "6 - right eye (outer)",
    "7 - left ear",
    "8 - right ear",
    "9 - mouth (left)",
    "10 - mouth (right)",
    "11 - left shoulder",
    "12 - right shoulder",
    "13 - left elbow",
    "14 - right elbow",
    "15 - left wrist",
    "16 - right wrist",
    "17 - left pinky",
    "18 - right pinky",
    "19 - left index",
    "20 - right index",
    "21 - left thumb",
    "22 - right thumb",
    "23 - left hip",
    "24 - right hip",
    "25 - left knee",
    "26 - right knee",
    "27 - left ankle",
    "28 - right ankle",
    "29 - left heel",
    "30 - right heel",
    "31 - left foot index",
    "32 - right foot index"
]

In [66]:
import numpy as np
import os
from pathlib import Path
import mediapipe as mp
import cv2
import random
from PIL import Image
from IPython.display import display
from typing import Tuple

current_working_dir = os.getcwd()
GENERAL_DATA_PATH = Path(current_working_dir) / 'data'
DATA_PATH = GENERAL_DATA_PATH / 'yogaposes-aii22-challenge'

In [72]:
def get_image_numpy(image_file_name : str) -> np.ndarray:
    return np.array(Image.open(DATA_PATH / 'Train' / image_file_name))

def get_random_image_numpy() -> np.ndarray:
    images = os.listdir(DATA_PATH / 'Train')
    random_image_file_name = random.choice(images)
    return get_image_numpy(random_image_file_name)

class PoseExtractor():
    keypoints_names = KEYPOINTS_NAMES
    def __init__(self,
                 source_data_path : str,
                 destination_data_path : str,
                 save_file_name : str,
                 model_complexity : int,
                 n_samples : int
                 ):
        self.n_samples = n_samples
        self.source_data_path = source_data_path
        self.destination_data_path = destination_data_path
        self.save_file_name = save_file_name
        self.pose = mp.solutions.pose.Pose(
            static_image_mode=True,
            model_complexity=model_complexity,
            smooth_landmarks=True,
            enable_segmentation=False,
            smooth_segmentation=False,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
    
    def extract_pose(self,
                     image: np.ndarray
                     ) -> Tuple:
        return np.hstack([np.array([landmark.x, landmark.y, landmark.z, landmark.visibility], dtype=np.float64) for landmark in self.pose.process(image).pose_landmarks.landmark])
    
    def extract_poses(self):
        for img in os.listdir(self.source_data_path):
            
            

In [76]:
pose_extractor = PoseExtractor(
    DATA_PATH,
    GENERAL_DATA_PATH,
    'train-poses.csv',
    1
)

image = get_image_numpy('01300.jpg')
landmarks = pose_extractor.extract_pose(image)
landmarks.shape
# for landmark in landmarks:
#     print(landmark)

(132,)

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
import torch
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
from typing import Tuple, List, Union

class CustomDataset(Dataset):
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.image_files = os.listdir(data_dir)
        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])

    def get_label(self, img_name: str) -> int:
        # Extract the label from the image name (assuming it's the first digit)
        label = int(img_name[0])  # Convert the first character to an integer
        return label

    def __len__(self) -> int:
        return len(self.image_files)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        img_name = os.path.join(self.data_dir, self.image_files[idx])
        image = cv2.imread(img_name)  # Read image using OpenCV
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        tensor_image = self.transform(image)
        label = self.get_label(self.image_files[idx])

        # Save the preprocessed image to the "NewTrain" folder
        save_path = os.path.join("NewTrain", f"preprocessed_{self.image_files[idx]}")
        if tensor_image.shape[0] == 1:
            # If it's a single-channel image, convert it to three channels before saving
            tensor_image_rgb = torch.cat([tensor_image] * 3, dim=0)
            cv2.imwrite(save_path, cv2.cvtColor(tensor_image_rgb.numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR))
        else:
            # If it's already a three-channel image, save as is
            cv2.imwrite(save_path, cv2.cvtColor(tensor_image.numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR))


        return tensor_image, label

# Example usage:
data_directory = "Train"
custom_dataset = CustomDataset(data_directory)

# Example loading one image
sample_image, label = custom_dataset[2]
file = os.listdir(data_directory)
print(file[2])
print(sample_image.shape)  # Check the shape (should be torch.Size([1, 224, 224]))
print("Label:", label)
print(sample_image)

min_value = torch.min(sample_image)
max_value = torch.max(sample_image)

print("Minimum value:", min_value.item())
print("Maximum value:", max_value.item())

# Convert the tensor to a NumPy array
numpy_image = sample_image.squeeze().numpy()

# Display the image
plt.imshow(numpy_image, cmap='gray')  # Use 'gray' colormap for single-channel images
plt.title("Image Title")
plt.show()

In [25]:
image_np = get_random_image_numpy()

pose = mp.solutions.pose.Pose(
            num_poses=1,
            min_detection_confidence=0.5
            )

results = pose.process(image_np)#cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

keypoints = results.pose_landmarks.landmark  # List of normalized keypoints (x, y, z)

feature_vector = np.array([kp.x for kp in keypoints] + [kp.y for kp in keypoints] + [kp.z for kp in keypoints])

TypeError: Pose.__init__() got an unexpected keyword argument 'num_poses'

In [14]:
keypoints[0]

x: 0.2824716
y: 0.43324316
z: 0.058916435
visibility: 0.9987006

In [10]:
len(feature_vector)

99