In [6]:
import re

# Define the ordered list of bones
bone_list = ["C1", "C2", "C3", "C4", "C5", "C6", "C7",
             "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", "T10", "T11", "T12",
             "L1", "L2", "L3", "L4", "L5",
             "S1", "S2", "S3", "S4", "S5"]

# Map each bone to its index for easy look-up
bone_to_index = {bone: idx for idx, bone in enumerate(bone_list)}

# Define the report text
report = "Plan to treat C2-T5, and T7-MidS3 disease."

# Find matches for individual bones and ranges (ignore words like "Mid" within ranges)
matches = re.findall(r'\b(C\d|T\d{1,2}|L\d|S\d)\b(?:-(?:Mid)?(C\d|T\d{1,2}|L\d|S\d))?', report)

# Extract bones based on the matches
extracted_bones = []

for match in matches:
    start_bone = match[0]
    end_bone = match[1] if match[1] else start_bone  # If no range, treat as single bone
    
    # If it's a range, expand it based on bone order
    if start_bone != end_bone:
        start_idx = bone_to_index[start_bone]
        end_idx = bone_to_index[end_bone]
        extracted_bones.extend(bone_list[start_idx:end_idx + 1])
    else:
        extracted_bones.append(start_bone)

# Remove duplicates and keep the bones in order
extracted_bones = sorted(set(extracted_bones), key=lambda x: bone_to_index[x])

# Print the result as a comma-separated list
print("Extracted labels:", ', '.join(extracted_bones))


Extracted labels: C2, C3, C4, C5, C6, C7, T1, T2, T3, T4, T5, T7, T8, T9, T10, T11, T12, L1, L2, L3, L4, L5, S1, S2, S3
