In [4]:
import os

def find_unique_labels(label_dir):
    unique_labels = set()

    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            file_path = os.path.join(label_dir, filename)
            with open(file_path, 'r') as f:
                for line in f:
                    if line.strip():  # skip empty lines
                        class_id = line.strip().split()[0]
                        unique_labels.add(class_id)

    return sorted(unique_labels)

# Example usage:
label_directory = "C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory/train/labels"
#label_directory = "C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data/train/labels"
classes = find_unique_labels(label_directory)
print(f"Unique class labels found: {classes}")


Unique class labels found: ['0', '1']


divide external validation data into train, test val

In [None]:
import os
import random
import shutil

# Set seed for reproducibility
random.seed(42)

# Paths
image_dir = 'C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/all-images/images'       # folder with 300 .jpg/.png images
label_dir = 'C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/all-images/labels'       # folder with 300 .txt files
output_base = 'C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory'     # folder where train/val/test folders will be created

# Collect image files
image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
image_files.sort()  # optional: sort to ensure consistent order
random.shuffle(image_files)

# Split
train_imgs = image_files[:60]
val_imgs = image_files[60:90]
test_imgs = image_files[90:300]

splits = {'train': train_imgs, 'val': val_imgs, 'test': test_imgs}

# Copy files
for split, files in splits.items():
    split_img_dir = os.path.join(output_base, split, 'images')
    split_lbl_dir = os.path.join(output_base, split, 'labels')
    os.makedirs(split_img_dir, exist_ok=True)
    os.makedirs(split_lbl_dir, exist_ok=True)

    for img in files:
        label = os.path.splitext(img)[0] + '.txt'
        shutil.copy2(os.path.join(image_dir, img), os.path.join(split_img_dir, img))
        shutil.copy2(os.path.join(label_dir, label), os.path.join(split_lbl_dir, label))

print("✅ Data split into train, val, test successfully.")


✅ Data split into train, val, test successfully.


DATA-UTI-LR class mapping for EV train

In [7]:
import os
from pathlib import Path

# Dataset root path
root_dir = Path("C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data")
splits = ['train', 'val', 'test']

def clean_and_remap_labels(filepath):
    updated_lines = []
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if not parts:
                continue
            cls = int(parts[0])
            # Remap logic
            if cls == 3:
                parts[0] = '2'  # epithn → epith
            elif cls == 4 or cls == 5:
                parts[0] = '3'  # eryth, leuko → eryth_wbc
            elif cls in [0, 1, 6]:
                continue  # skip mycete or other classes
            updated_lines.append(" ".join(parts))
    # Save if not empty
    with open(filepath, 'w') as f:
        f.write("\n".join(updated_lines))

# Apply to all label files
for split in splits:
    label_dir = root_dir / split / "labels"
    for label_file in label_dir.glob("*.txt"):
        clean_and_remap_labels(label_file)

print("✅ Labels remapped and unwanted classes removed.")


✅ Labels remapped and unwanted classes removed.


map 2,3 class label to 0,1

In [1]:
import os
import glob

# Define root directories for both datasets
datasets = [
    "C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory",            # Clinical data
    "C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data"     # DATA-UTI-LR data
]

# Subfolders to check
splits = ['train', 'val', 'test']

# Class remapping: old → new
class_map = {
    2: 0,
    3: 1
}

def update_labels(label_dir):
    label_files = glob.glob(os.path.join(label_dir, '*.txt'))
    for label_file in label_files:
        new_lines = []
        with open(label_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if not parts:
                    continue
                cls = int(parts[0])
                if cls in class_map:
                    parts[0] = str(class_map[cls])
                    new_lines.append(' '.join(parts))
                else:
                    continue  # skip all other classes
        with open(label_file, 'w') as f:
            f.write('\n'.join(new_lines))

# Apply changes to each dataset and split
for dataset_path in datasets:
    for split in splits:
        label_path = os.path.join(dataset_path, split, 'labels')
        if os.path.exists(label_path):
            print(f"Processing: {label_path}")
            update_labels(label_path)


Processing: C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory\train\labels
Processing: C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory\val\labels
Processing: C:/Mansura/UTI-Revision2/ExternalValidation/A-clinical-1/data-directory\test\labels
Processing: C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data\train\labels
Processing: C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data\val\labels
Processing: C:/Mansura/UTI-Revision2/ExternalValidation/DATA-UTI-LR/Data\test\labels
