# Dataset

https://www.kaggle.com/datasets/shrutisaxena/yoga-pose-image-classification-dataset

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from PIL import Image


def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'))

# Function to copy and convert files
def copy_and_convert_files(indices, psedo_labels, images, classes, destination):
    for idx in tqdm(indices):
        img_path = images[idx]
        class_name = classes[psedo_labels[idx]]
        dest_dir = os.path.join(destination, class_name)
        png_img_path = os.path.join(dest_dir, os.path.splitext(os.path.basename(img_path))[0] + '.png')

        with Image.open(img_path) as img:
            converted_img = img.convert('RGBA')
            converted_img.save(png_img_path, 'PNG')

def split_train_and_test(dataset_path, train_path, test_path, test_size=0.2, random_seed=0):

    classes = os.listdir(dataset_path)

    os.makedirs(train_path, exist_ok=True)
    os.makedirs(test_path, exist_ok=True)

    for class_name in classes:
        os.makedirs(os.path.join(train_path, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_path, class_name), exist_ok=True)

    images = []
    psedo_labels = []

    for class_idx, class_name in enumerate(classes):
        class_dir = os.path.join(dataset_path, class_name)
        for img_name in os.listdir(class_dir):
            if is_image_file(img_name):
                images.append(os.path.join(class_dir, img_name))
                psedo_labels.append(class_idx)

    train_idx, test_idx = train_test_split(range(len(images)), test_size=test_size, stratify=psedo_labels, random_state=random_seed)
    copy_and_convert_files(train_idx, psedo_labels, images, classes, train_path)
    copy_and_convert_files(test_idx, psedo_labels, images, classes, test_path)

# Split the dataset to a train set and a test set

In [None]:
split_train_and_test(dataset_path='/content/drive/My Drive/dataset/yoga_pose/original',
                     train_path='/content/drive/My Drive/dataset/yoga_pose/train',
                     test_path='/content/drive/My Drive/dataset/yoga_pose/test',
                     test_size=0.2, random_seed=0)

# Split the training set to a train set and a validation set

In [6]:
split_train_and_test(dataset_path='/content/drive/My Drive/dataset/yoga_pose/train',
                     train_path='/content/drive/My Drive/dataset/yoga_pose/train/train',
                     test_path='/content/drive/My Drive/dataset/yoga_pose/train/validation',
                     test_size=0.2, random_seed=0)

100%|██████████| 3835/3835 [28:30<00:00,  2.24it/s]
100%|██████████| 959/959 [06:50<00:00,  2.34it/s]


In [13]:
shutil.make_archive('/content/drive/My Drive/dataset/yoga_pose/train', 'zip', '/content/drive/My Drive/dataset/yoga_pose/train/train')

'/content/drive/My Drive/dataset/yoga_pose/train.zip'

In [14]:
shutil.make_archive('/content/drive/My Drive/dataset/yoga_pose/validation', 'zip', '/content/drive/My Drive/dataset/yoga_pose/train/train')

'/content/drive/My Drive/dataset/yoga_pose/validation.zip'

In [15]:
shutil.make_archive('/content/drive/My Drive/dataset/yoga_pose/test', 'zip', '/content/drive/My Drive/dataset/yoga_pose/test')

'/content/drive/My Drive/dataset/yoga_pose/test.zip'