In [None]:
import os
import numpy as np

# Specify the main folder path
folder_path = 'E:\YOLO\Dataset_Repository'

# Get subfolders (non-recursive)
subfolders = [f.name for f in os.scandir(folder_path) if f.is_dir()]

# Convert to NumPy array
subfolders_array = np.array(subfolders)

# Display results
print("List of subfolders (Python list):")
print(subfolders)

print("\nList of subfolders (NumPy array):")
print(subfolders_array)


In [None]:
import re
import numpy as np
import pandas as pd
from difflib import get_close_matches

# -----------------------------
# STEP 1: Reference class mapping
# -----------------------------
reference_classes = {
    1: "A1-ExcavationSoil",
    2: "A2-Formwork",
    3: "A3-RebarReinforcement",
    4: "A4-PouredConcreteSlabs",
    5: "A5-PrecastConcreteBlock",
    6: "B1-WoodFraming",
    7: "B2-SteelFraming",
    8: "B3-Indoor_Sheathing",
    9: "B4-RoofFraming",
    10: "C1-ExteriorWall_Block",
    11: "C2-RoofingSystems",
    12: "C3-Windows_Opening",
    13: "C4-Doors",
    14: "C5-Waterproofing&Insulation",
    15: "D1-DrywallFraming",
    16: "D2-Ceilings",
    17: "D3-Flooring",
    18: "D4-WallFinishes",
    19: "E1-PlumbingRough-ins",
    20: "E2-PlumbingFixtures",
    21: "E3-HVACEquipment",
    22: "E4-MechanicalPiping&Insulation",
    23: "F1-Conduit&Wiring",
    24: "F2-ElectricalPanels&Breakers",
    25: "F3-LightingFixtures",
    26: "F4-SolarPanels&Batteries",
    27: "G1-Cabinetry&Millwork",
    28: "G2-Fireplaces&Chimneys",
    29: "G3-BathroomAccessories",
    30: "G4-ExteriorDecks&Railings",
    31: "G5-ConstructionEquipmentX",
    32: "G6-ConstructionStaffX",
    33: "B5-MassTimber"
}

# -----------------------------
# STEP 2: Subfolders from dataset
# -----------------------------
subfolders_array = subfolders_array

# -----------------------------
# STEP 3: Helper functions
# -----------------------------
def clean_name(name):
    return re.sub(r'[^a-zA-Z0-9]', '', name).lower()

clean_reference = {clean_name(v): k for k, v in reference_classes.items()}

folder_to_id = {}
used_ids = set()

# -----------------------------
# STEP 4: Match folders with reference
# -----------------------------
for folder in subfolders_array:
    folder_clean = clean_name(folder)
    matches = get_close_matches(folder_clean, list(clean_reference.keys()), n=1, cutoff=0.7)

    if matches:
        matched_key = matches[0]
        matched_id = clean_reference[matched_key]
        if matched_id not in used_ids:  # Only assign if not used
            folder_to_id[folder] = matched_id
            used_ids.add(matched_id)
        else:
            folder_to_id[folder] = None  # Will assign later
    else:
        folder_to_id[folder] = None  # Will assign later

# -----------------------------
# STEP 5: Assign remaining IDs
# -----------------------------
all_possible_ids = set(range(1, max(reference_classes.keys()) + 1))
missing_ids = sorted(list(all_possible_ids - used_ids))
next_id = max(reference_classes.keys()) + 1

for folder, cid in folder_to_id.items():
    if cid is None:
        if missing_ids:
            folder_to_id[folder] = missing_ids.pop(0)
        else:
            folder_to_id[folder] = next_id
            next_id += 1

# -----------------------------
# STEP 6: Create DataFrame
# -----------------------------
df = pd.DataFrame({
    'Folder': list(folder_to_id.keys()),
    'Assigned_Class_ID': list(folder_to_id.values())
}).sort_values('Assigned_Class_ID')

display(df)


In [None]:
# -----------------------------
# STEP 7: Save to a text file
# -----------------------------

output_file = "folder_class_mapping.txt"

with open(output_file, "w") as f:
    f.write("Class_ID\tFolder\n")
    for folder, cid in sorted(folder_to_id.items(), key=lambda x: x[1]):
        f.write(f"{cid}\t{folder}\n")

print(f"Mapping saved successfully to '{output_file}'")
