In [None]:
import os
import shutil
import random
from glob import glob
from tqdm import tqdm

To train on custom data, we need to organize the files in a specific format, and then we can run:  

python train.py --img 96 --batch 16 --epochs 3 --data ../genderDetectionCfg.yaml --cfg ./models/yolov5s.yaml --weights ../weights/yolov5s-seg.pt  
For Testing if it works or not, use:  
python detect.py --weights runs/train/exp11/weights/best.pt --img 96 --conf 0.25 --source 0

In [None]:
root_directories = ['/mnt/c/Users/nodeX/Documents/SIH/datasets/GenderClassifficationDataset/Training/female/',
                  '/mnt/c/Users/nodeX/Documents/SIH/datasets/GenderClassifficationDataset/Training/male/',
                  '/mnt/c/Users/nodeX/Documents/SIH/datasets/GenderClassifficationDataset/Validation/female/',
                  '/mnt/c/Users/nodeX/Documents/SIH/datasets/GenderClassifficationDataset/Validation/male/']

def renameFilesBecauseItsTheRightThingToDo(root_directories):
    """
    Renames all '.jpg' files in the specified root directories to sequentially numbered files.

    This function traverses each root directory provided, and for each file with a '.jpg' extension,
    renames it to a new filename in the format '1.jpg', '2.jpg', etc. The renaming is done sequentially
    across all directories provided.

    Args:
        root_directories (list of str): A list of paths to the root directories where the renaming should occur.

    Returns:
        None: The function performs file renaming operations and prints the old and new filenames to the console.

    Example:
        >>> renameFilesBecauseItsTheRightThingToDO(['/path/to/dir1', '/path/to/dir2'])
        Renamed: /path/to/dir1/oldname.jpg to /path/to/dir1/1.jpg
        Renamed: /path/to/dir2/anothername.jpg to /path/to/dir2/2.jpg
    """
    counter = 1
    for root_directory in root_directories:
        for subdir, _, files in os.walk(root_directory):
            for file in sorted(files):
                if file.endswith('.jpg'):
                    new_filename = f"{counter}.jpg"
                    old_file = os.path.join(subdir, file)
                    new_file = os.path.join(subdir, new_filename)
                    os.rename(old_file, new_file)
                    print(f"Renamed: {old_file} to {new_file}")
                    counter += 1

In [None]:
dataset_dir = '../datasets/GenderClassifficationDataset/'
output_dir = '../datasets/out/'
classes = {'male': 0, 'female': 1}

In [None]:
classes.keys()

In [None]:
os.listdir(dataset_dir+"/Training/female").__len__()

In [None]:
for split in ['train', 'val']:
    for class_name, class_id in classes.items():
        print(class_name, class_id)

In [None]:
def create_dirs(base_dir):
    os.makedirs(os.path.join(base_dir, 'images', 'train'), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'images', 'val'), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'images', 'test'), exist_ok=True)
    for class_name in classes.keys():
        os.makedirs(os.path.join(base_dir, 'images', 'train', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'images', 'val', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'images', 'test', class_name), exist_ok=True)

In [None]:
create_dirs(output_dir)
def train_test_val_split(image_dir, label, label_id, split):
    destination = os.path.join(output_dir,"images",split,label)
    image_paths = glob(os.path.join(dataset_dir+("Training" if split=="train" else "Validation")+"/"+label, '*.jpg'))
    for image_path in image_paths:
        image_name = os.path.basename(image_path)
        shutil.copy(image_path, os.path.join(destination, image_name)) 
    print(destination)
splits = ["train","val"]
for split in splits:
    for class_name, class_id in classes.items():
        train_test_val_split(dataset_dir+("Training" if split=="train" else "Validation"), class_name, class_id, split)

for class_name in classes.keys():
    val_images = glob(os.path.join(output_dir, 'images', 'val',class_name, '*.jpg'))
    random.shuffle(val_images)
    test_size = int(len(val_images) * 0.2)  # Using 20% of validation data for testing
    test_images = val_images[:test_size]
    for test_image in test_images:
        # Move the image and its label to the test set
        test_image_name = os.path.basename(test_image)
        shutil.move(test_image, os.path.join(output_dir, 'images', 'test',class_name, test_image_name))
        

In [None]:
base_dir = "../datasets/out/"
os.makedirs(os.path.join(base_dir, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(base_dir, 'labels', 'val'), exist_ok=True)
os.makedirs(os.path.join(base_dir, 'labels', 'test'), exist_ok=True)
for class_name in classes.keys():
    os.makedirs(os.path.join(base_dir, 'labels', 'train', class_name), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'labels', 'val', class_name), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'labels', 'test', class_name), exist_ok=True)

In [None]:
def create_labels(base_dir):
    
    splits = ["train","val","test"]
    for split in splits:
        for class_name, class_id in classes.items():
            # print(os.path.join(base_dir,split, class_name, '*.jpg'))
            image_paths = glob(os.path.join(base_dir,split, class_name, '*.jpg'))
            print(image_paths)
            for image_path in image_paths:
                image_name = os.path.basename(image_path)
                label_path = os.path.join("../datasets/out/", 'labels', split, class_name, image_name.replace('.jpg', '.txt'))
                # print(label_path)
                with open(label_path, 'w') as label_file:
                    label_file.write(f"{class_id} 0.5 0.5 1.0 1.0\n")  # Dummy YOLO format: class_id x_center y_center width height
create_labels("../datasets/out/images")

In [None]:
label_path = os.path.join(output_dir, 'labels', split, image_name.replace('.jpg', '.txt'))
with open(label_path, 'w') as label_file:
    label_file.write(f"{label_id} 0.5 0.5 1.0 1.0\n")  # Dummy YOLO format: class_id x_center y_center width height
            
test_label = test_image.replace('/images/val/', '/labels/val/').replace('.jpg', '.txt')
shutil.move(test_label, os.path.join(output_dir, 'labels', 'test', os.path.basename(test_label)))

In [None]:
label_path = os.path.join(output_dir, 'labels', split, image_name.replace('.jpg', '.txt'))
        
        # Assuming one bounding box covering the whole image (change this logic as per your data)
with open(label_path, 'w') as label_file:
    label_file.write(f"{class_id} 0.5 0.5 1.0 1.0\n")  # Dummy YOLO format: class_id x_center y_center width height

In [None]:
for class_name in classes.keys():
    val_images = glob(os.path.join(output_dir, 'images', 'val',class_name, '*.jpg'))
    random.shuffle(val_images)
    test_size = int(len(val_images) * 0.2)  # Using 20% of validation data for testing
    test_images = val_images[:test_size]
    for test_image in test_images:
        # Move the image and its label to the test set
        test_image_name = os.path.basename(test_image)
        shutil.move(test_image, os.path.join(output_dir, 'images', 'test',class_name, test_image_name))
        test_label = test_image.replace('/images/val/', '/labels/val/').replace('.jpg', '.txt')
        shutil.move(test_label, os.path.join(output_dir, 'labels', 'test', os.path.basename(test_label)))