In [28]:
import numpy as np
import os
from pathlib import Path
import mediapipe as mp
import cv2
import random
from PIL import Image
from IPython.display import display
from typing import Tuple
import csv
import pickle

current_working_dir = os.getcwd()
GENERAL_DATA_PATH = Path(current_working_dir) / 'data'
DATA_PATH = GENERAL_DATA_PATH

with open(DATA_PATH / 'keypoints_names.pkl', 'rb') as file:
    KEYPOINTS_NAMES = pickle.load(file)

In [103]:
def get_image_numpy(image_file_name : str) -> np.ndarray:
    return np.array(Image.open(DATA_PATH / 'train' / image_file_name))

def get_random_image_numpy() -> np.ndarray:
    images = os.listdir(DATA_PATH / 'train')
    random_image_file_name = random.choice(images)
    return get_image_numpy(random_image_file_name)

class PoseExtractor():
    keypoints_names = KEYPOINTS_NAMES.copy()
    extraction_output_len = 132
    
    def __init__(
            self,
            source_data_path : str,
            destination_data_path : str,
            model_complexity : int,
            min_detection_confidence : float = 0.5
            ) -> None:
        self.source_data_path = Path(source_data_path)
        self.destination_data_path = Path(destination_data_path)
        self.pose = mp.solutions.pose.Pose(
            static_image_mode=True,
            model_complexity=model_complexity,
            smooth_landmarks=True,
            enable_segmentation=False,
            smooth_segmentation=False,
            min_detection_confidence=min_detection_confidence,
            min_tracking_confidence=0.5
        )
    
    @property
    def columns_names(self) -> np.ndarray:
        columns_names = []
        
        for keypoint_name in self.keypoints_names:
            columns_names.extend([f'{keypoint_name}_x', f'{keypoint_name}_y', f'{keypoint_name}_z', f'{keypoint_name}_visibility'])

        return columns_names

    def load_image_as_ndarray(
            self,
            image_file_name : str,
            train : bool
            ) -> np.ndarray:
        if train:
            dataset_type = 'train'
        else:
            dataset_type = 'test'

        return np.array(Image.open(self.source_data_path / dataset_type / image_file_name))

    def extract_pose(
            self,
            image: np.ndarray
            ) -> Tuple:
        extraction_res = self.pose.process(image).pose_landmarks
        
        if extraction_res is None:
            return [None for _ in range(4 * self.extraction_output_len)]

        return list(np.hstack([np.array([landmark.x, landmark.y, landmark.z, landmark.visibility], dtype=np.float64) for landmark in extraction_res.landmark]))
    
    def extract_poses_and_write_to_csv(
            self,
            save_file_name : str,
            sample : bool,
            n_samples : int,
            train : bool = True
            ) -> None:
        if train:
            images = os.listdir(self.source_data_path / 'train')
        else:
            images = os.listdir(self.source_data_path / 'test')
        
        if sample:
            images = random.sample(images, n_samples)
        
        with open(self.destination_data_path / save_file_name, 'w') as write_file:
            writer = csv.writer(write_file, delimiter=';')
            writer.writerow(self.columns_names)
            
            data_rows = []
            
            for image_filename in images:
                image = self.load_image_as_ndarray(image_filename, train)
                data_rows.append(self.extract_pose(image))
            
            writer.writerows(data_rows)   
            return data_rows

In [104]:
pose_extractor = PoseExtractor(
    source_data_path=DATA_PATH,
    destination_data_path=GENERAL_DATA_PATH,
    model_complexity=1
)

# image = get_image_numpy('01300.jpg')
# landmarks = pose_extractor.extract_pose(image)
# print(landmarks.shape, len(pose_extractor.columns_names[0]))
# for landmark in landmarks:
#     print(landmark)
# print(pose_extractor.columns_names)

In [105]:
data_rows = pose_extractor.extract_poses_and_write_to_csv(
    sample=True,
    n_samples=1,
    train=True,
    save_file_name='train-poses.csv',
    )

In [99]:
import csv
import numpy as np

# Example data to write to CSV file
data = [
    list(*np.random.rand(1, 3)),
    list(*np.random.rand(1, 3)),
    list(*np.random.rand(1, 3))
]

# Specify the file path
file_path = 'try-out.csv'

# Open the file in write mode
with open(file_path, 'w', newline='') as csvfile:
    # Create a CSV writer object
    csvwriter = csv.writer(csvfile)

    # Write multiple rows to the CSV file
    csvwriter.writerows(data)

print("Data has been written to", file_path)


Data has been written to try-out.csv


In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
import torch
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
from typing import Tuple, List, Union

class CustomDataset(Dataset):
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.image_files = os.listdir(data_dir)
        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])

    def get_label(self, img_name: str) -> int:
        # Extract the label from the image name (assuming it's the first digit)
        label = int(img_name[0])  # Convert the first character to an integer
        return label

    def __len__(self) -> int:
        return len(self.image_files)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        img_name = os.path.join(self.data_dir, self.image_files[idx])
        image = cv2.imread(img_name)  # Read image using OpenCV
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        tensor_image = self.transform(image)
        label = self.get_label(self.image_files[idx])

        # Save the preprocessed image to the "NewTrain" folder
        save_path = os.path.join("NewTrain", f"preprocessed_{self.image_files[idx]}")
        if tensor_image.shape[0] == 1:
            # If it's a single-channel image, convert it to three channels before saving
            tensor_image_rgb = torch.cat([tensor_image] * 3, dim=0)
            cv2.imwrite(save_path, cv2.cvtColor(tensor_image_rgb.numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR))
        else:
            # If it's already a three-channel image, save as is
            cv2.imwrite(save_path, cv2.cvtColor(tensor_image.numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR))


        return tensor_image, label

# Example usage:
data_directory = "Train"
custom_dataset = CustomDataset(data_directory)

# Example loading one image
sample_image, label = custom_dataset[2]
file = os.listdir(data_directory)
print(file[2])
print(sample_image.shape)  # Check the shape (should be torch.Size([1, 224, 224]))
print("Label:", label)
print(sample_image)

min_value = torch.min(sample_image)
max_value = torch.max(sample_image)

print("Minimum value:", min_value.item())
print("Maximum value:", max_value.item())

# Convert the tensor to a NumPy array
numpy_image = sample_image.squeeze().numpy()

# Display the image
plt.imshow(numpy_image, cmap='gray')  # Use 'gray' colormap for single-channel images
plt.title("Image Title")
plt.show()

In [25]:
image_np = get_random_image_numpy()

pose = mp.solutions.pose.Pose(
            num_poses=1,
            min_detection_confidence=0.5
            )

results = pose.process(image_np)#cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

keypoints = results.pose_landmarks.landmark  # List of normalized keypoints (x, y, z)

feature_vector = np.array([kp.x for kp in keypoints] + [kp.y for kp in keypoints] + [kp.z for kp in keypoints])

TypeError: Pose.__init__() got an unexpected keyword argument 'num_poses'

In [14]:
keypoints[0]

x: 0.2824716
y: 0.43324316
z: 0.058916435
visibility: 0.9987006

In [10]:
len(feature_vector)

99