In [None]:
import logging
logging.getLogger().setLevel(logging.CRITICAL)

In [None]:
from pylabel import importer

In [None]:
#Download the annotations and images
path_to_annotations = r"D:\Road_data\annotations"

#Identify the path to get from the annotations to the images
path_to_images = r"D:\Road_data\images"

dataset = importer.ImportVOC(path=path_to_annotations, path_to_images=path_to_images, name="Road_data")
dataset.df.head(5)

In [None]:
#Confirm that there are images by displaying an image
from IPython.display import display
display(dataset.visualize.ShowBoundingBoxes(67))

In [None]:
print(f"Number of images: {dataset.analyze.num_images}")
print(f"Number of classes: {dataset.analyze.num_classes}")
print(f"Classes:{dataset.analyze.classes}")
print(f"Class counts:\n{dataset.analyze.class_counts}")

In [None]:
dataset.export.ExportToYoloV5()

In [None]:
import os
import shutil

def compare_folders(txt_folder, img_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate over the files in the text folder
    for txt_file in os.listdir(txt_folder):
        if txt_file.endswith('.txt'):
            # Extract the base name (without extension) from the text file
            base_name = os.path.splitext(txt_file)[0]
            
            # Iterate over image files in the image folder
            for img_file in os.listdir(img_folder):
                if img_file.startswith(base_name) and any(img_file.endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif']):  # Adjust the list of extensions as needed
                    # Copy the image file to the output folder
                    shutil.copy(os.path.join(img_folder, img_file), output_folder)
                    break  # Stop searching for matching images once one is found

if __name__ == "__main__":
    txt_folder = r"D:\training\labels"  # Replace with the path to your text files folder
    img_folder = r"D:\Road_data\images"  # Replace with the path to your images folder
    output_folder = r"D:\training\images"  # Replace with the desired path for the output folder
    
    compare_folders(txt_folder, img_folder, output_folder)

In [None]:
import os
import random
import shutil

def split_data(image_folder, label_folder, train_folder, val_folder, split_ratio=0.2):
    # Create train and val folders if they don't exist
    for folder in [train_folder, val_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)
        # Create image and label folders within train and val folders
        image_subfolder = os.path.join(folder, 'images')
        label_subfolder = os.path.join(folder, 'labels')
        os.makedirs(image_subfolder)
        os.makedirs(label_subfolder)

    # Get the list of images
    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.jpg', '.jpeg', '.png', '.gif'))]

    # Shuffle the images
    random.shuffle(image_files)

    # Split the data
    split_index = int(len(image_files) * split_ratio)
    train_images = image_files[split_index:]
    val_images = image_files[:split_index]

    # Copy images and corresponding labels to train folder
    for image in train_images:
        label = os.path.splitext(image)[0] + '.txt'
        if label in os.listdir(label_folder):
            shutil.copy(os.path.join(image_folder, image), os.path.join(train_folder, 'images', image))
            shutil.copy(os.path.join(label_folder, label), os.path.join(train_folder, 'labels', label))

    # Copy images and corresponding labels to val folder
    for image in val_images:
        label = os.path.splitext(image)[0] + '.txt'
        if label in os.listdir(label_folder):
            shutil.copy(os.path.join(image_folder, image), os.path.join(val_folder, 'images', image))
            shutil.copy(os.path.join(label_folder, label), os.path.join(val_folder, 'labels', label))

if __name__ == "__main__":
    image_folder = r"D:\training\images"  # Replace with the path to your images folder
    label_folder = r"D:\training\labels"  # Replace with the path to your labels folder
    train_folder = r"D:\Hackathon_Dataset\train"  # Replace with the path where you want to store train data
    val_folder = r"D:\Hackathon_Dataset\val"  # Replace with the path where you want to store validation data
    
    split_data(image_folder, label_folder, train_folder, val_folder)
