In [13]:
import Dataset as ds
import config as cfg

from sklearn.model_selection import train_test_split
import os
from tqdm.auto import tqdm

In [14]:
# Base paths for the images and labels
train_images_path = f'{cfg.BASE_DATASET}/images/train'
train_labels_path = f'{cfg.BASE_DATASET}/labels/train'
val_images_path = f'{cfg.BASE_DATASET}/images/valid'
val_labels_path = f'{cfg.BASE_DATASET}/labels/valid'

# Base path for metadata
metadata_path = f'{cfg.BASE_DATASET}/metadata.json'

# Create the DataFrames for the train and validation sets
train_df = ds.create_dataframe(train_images_path, train_labels_path, metadata_path)
valid_df = ds.create_dataframe(val_images_path, val_labels_path, metadata_path)

In [15]:
dataset_dir = cfg.CLF_DATASET_DIR
class_names = cfg.CLF_CLASS_NAMES
dataset_name = 'test100'

_, test_train_df = train_test_split(
    train_df,
    test_size=100,  # Number of items you want in your sample
    stratify=train_df['ac'],  # Stratify based on the combined column
    #random_state=42  # Ensures reproducibility
)

_, test_val_df = train_test_split(
    valid_df,
    test_size=20,  # Number of items you want in your sample
    stratify=valid_df['ac'],  # Stratify based on the combined column
    #random_state=42  # Ensures reproducibility
)

ds.create_sub_dataset(dataset_name, test_train_df, test_val_df, class_names, dataset_dir)


Removing dataset if pre-existing
Copying training files:


Copying files:   0%|          | 0/200 [00:00<?, ?it/s]

Copying validation files:


Copying files:   0%|          | 0/40 [00:00<?, ?it/s]

Dataset 'test100' created at C:/github/Third-Year-Project/Intruder-Aircraft-Detection/datasets/Custom


In [16]:
def correct_dataset_labels(dataset_dir, train_df, val_df, class_names):
    # Assuming dataset_dir is the root that contains 'labels/train' and 'labels/valid'
    train_labels_path = dataset_dir + f'/labels/train'
    val_labels_path = dataset_dir + f'/labels/valid'
    
    # Creating dictionaries to map filenames to new class indices based on class_names
    train_label_mapping = {os.path.basename(row['label_path']): class_names.index(row['ac']) for _, row in train_df.iterrows()}
    val_label_mapping = {os.path.basename(row['label_path']): class_names.index(row['ac']) for _, row in val_df.iterrows()}

    # Function to update the labels in a given directory
    def update_labels(labels_path, label_mapping, type):
        for label_filename in tqdm(label_mapping, desc=f'Processing {type} labels in {os.path.basename(dataset_dir)}'):
            new_class_index = label_mapping[label_filename]
            label_file_path = os.path.join(labels_path, label_filename)
            if os.path.isfile(label_file_path):
                with open(label_file_path, 'r') as file:
                    lines = file.readlines()

                updated_lines = []
                for line in lines:
                    parts = line.strip().split()
                    if parts:
                        parts[0] = str(new_class_index)  # Update the class index
                        updated_lines.append(' '.join(parts))
                
                with open(label_file_path, 'w') as file:
                    file.writelines('\n'.join(updated_lines))
            else:
                print(f"File not found: {label_file_path}")

    # Update labels in both train and validation directories using respective mappings
    update_labels(train_labels_path, train_label_mapping, 'train')
    update_labels(val_labels_path, val_label_mapping, 'valid')

    print("Label correction completed.")



correct_dataset_labels(f'{dataset_dir}/{dataset_name}', test_train_df, test_val_df, class_names)

Processing train labels in test100:   0%|          | 0/100 [00:00<?, ?it/s]

Processing valid labels in test100:   0%|          | 0/20 [00:00<?, ?it/s]

Label correction completed.
