In [3]:
import xml.etree.ElementTree as ET
from collections import defaultdict
import os

def parse_kml_folders(kml_file_path):
    """
    Parse a KML file and extract all tags found within folder tags.
    
    Args:
        kml_file_path (str): Path to the KML file
        
    Returns:
        dict: Dictionary with folder names as keys and lists of contained tags as values
    """
    try:
        # Parse the KML file
        tree = ET.parse(kml_file_path)
        root = tree.getroot()
        
        # KML files use namespaces, so we need to handle them
        # Common KML namespace
        namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
        
        # If no namespace is found, try without namespace
        if root.tag.startswith('{'):
            # Extract namespace from root tag
            ns_end = root.tag.find('}')
            if ns_end != -1:
                namespace_uri = root.tag[1:ns_end]
                namespace = {'kml': namespace_uri}
        
        folder_contents = {}
        
        # Find all folder elements
        folders = root.findall('.//kml:Folder', namespace) or root.findall('.//Folder')
        
        for folder in folders:
            # Get folder name
            folder_name = None
            name_elem = folder.find('kml:name', namespace) or folder.find('name')
            if name_elem is not None:
                folder_name = name_elem.text
            else:
                folder_name = "Unnamed Folder"
            
            # Get all child elements (tags) within this folder recursively
            folder_tags = []
            
            def extract_tags_recursive(element, level=0):
                tag_name = element.tag
                # Remove namespace prefix if present
                if '}' in tag_name:
                    tag_name = tag_name.split('}')[1]
                
                # Store tag info
                tag_info = {
                    'tag': tag_name,
                    'text': element.text.strip() if element.text else None,
                    'attributes': element.attrib if element.attrib else {},
                    'level': level,
                    'children': []
                }
                
                # Process child elements
                for child in element:
                    child_info = extract_tags_recursive(child, level + 1)
                    tag_info['children'].append(child_info)
                
                return tag_info
            
            # Extract all tags from the folder
            for child in folder:
                tag_info = extract_tags_recursive(child)
                folder_tags.append(tag_info)
            
            folder_contents[folder_name] = folder_tags
        
        return folder_contents
        
    except ET.ParseError as e:
        print(f"Error parsing KML file: {e}")
        return {}
    except FileNotFoundError:
        print(f"File not found: {kml_file_path}")
        return {}
    except Exception as e:
        print(f"Unexpected error: {e}")
        return {}

def get_all_unique_tags_in_folders(kml_file_path):
    """
    Get all unique tag names found within folder tags (including nested tags).
    
    Args:
        kml_file_path (str): Path to the KML file
        
    Returns:
        set: Set of unique tag names found in folders
    """
    folder_contents = parse_kml_folders(kml_file_path)
    unique_tags = set()
    
    def collect_tags_recursive(tag_info):
        unique_tags.add(tag_info['tag'])
        for child in tag_info['children']:
            collect_tags_recursive(child)
    
    for folder_name, tags in folder_contents.items():
        for tag_info in tags:
            collect_tags_recursive(tag_info)
    
    return unique_tags

def print_folder_contents(kml_file_path, show_details=True):
    """
    Print the contents of all folders in a KML file.
    
    Args:
        kml_file_path (str): Path to the KML file
        show_details (bool): Whether to show tag details or just tag names
    """
    folder_contents = parse_kml_folders(kml_file_path)
    
    if not folder_contents:
        print("No folders found or error parsing file.")
        return
    
    def print_tag_recursive(tag_info, indent="  "):
        print(f"{indent}Tag: {tag_info['tag']}")
        if show_details:
            if tag_info['text']:
                print(f"{indent}  Text: {tag_info['text']}")
            if tag_info['attributes']:
                print(f"{indent}  Attributes: {tag_info['attributes']}")
        
        # Print children with increased indentation
        for child in tag_info['children']:
            print_tag_recursive(child, indent + "  ")
    
    for folder_name, tags in folder_contents.items():
        print(f"\n=== Folder: {folder_name} ===")
        
        if not tags:
            print("  (No tags found)")
            continue
            
        for tag_info in tags:
            print_tag_recursive(tag_info)

# Example usage
if __name__ == "__main__":
    # Example KML content to test with
    example_kml = '''<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
<Document>
    <Folder>
        <name>Beth-jeshimoth</name>
        <description><![CDATA[Num <a href="https://www.biblegateway.com/passage/?search=Num.33.49&version=ESV">33:49</a>, Josh <a href="https://www.biblegateway.com/passage/?search=Josh.12.3&version=ESV">12:3</a>, <a href="https://www.biblegateway.com/passage/?search=Josh.13.20&version=ESV">13:20</a>, Ezek <a href="https://www.biblegateway.com/passage/?search=Ezek.25.9&version=ESV">25:9</a>]]></description>
        <Placemark>
            <name>Beth-jeshimoth / Tell Azeimeh</name>
            <styleUrl>#landpoint</styleUrl>
            <Point>
                <coordinates>35.62125,31.782551</coordinates>
            </Point>
        </Placemark>
    </Folder>
</Document>
</kml>'''
    
    # Save example to file for testing
    with open("example.kml", "w", encoding="utf-8") as f:
        f.write(example_kml)
    
    # Replace with your KML file path
    kml_file =  "..\geo\doc.kml"
    
    # Check if file exists
    if os.path.exists(kml_file):
        print("=== All Folder Contents ===")
        print_folder_contents(kml_file)
        
        print("\n=== Unique Tags in Folders ===")
        unique_tags = get_all_unique_tags_in_folders(kml_file)
        for tag in sorted(unique_tags):
            print(f"- {tag}")
        
        print("\n=== Detailed Folder Analysis ===")
        folder_data = parse_kml_folders(kml_file)
        for folder, tags in folder_data.items():
            print(f"{folder}: {len(tags)} direct child tags")
            
            # Count all tags recursively
            def count_all_tags(tag_info):
                count = 1
                for child in tag_info['children']:
                    count += count_all_tags(child)
                return count
            
            total_tags = sum(count_all_tags(tag) for tag in tags)
            print(f"  Total tags (including nested): {total_tags}")
    else:
        print(f"Please provide a valid KML file path. '{kml_file}' not found.")
        
    # Clean up example file
    if os.path.exists("example.kml"):
        os.remove("example.kml")
        
    # Example of how to use with a specific file
    # folder_contents = parse_kml_folders("my_map.kml")
    # unique_tags = get_all_unique_tags_in_folders("my_map.kml")

=== All Folder Contents ===

=== Folder: Unnamed Folder ===
  Tag: name
    Text: Zuph
  Tag: description
    Text: 1Sam <a href="https://www.biblegateway.com/passage/?search=1Sam.9.5&amp;version=ESV">9:5</a>
  Tag: Placemark
    Tag: name
      Text: Zuph / about 10 km around Al Ram
    Tag: description
      Text: about 10 km around Ramathaim-zophim
    Tag: styleUrl
      Text: #region
    Tag: Polygon
      Tag: tesselate
        Text: 1
      Tag: outerBoundaryIs
        Tag: LinearRing
          Tag: coordinates
            Text: 35.23161,31.94427 35.22053,31.94378 35.20958,31.9423 35.19886,31.93987 35.18851,31.93649 35.17863,31.93221 35.16933,31.92708 35.16071,31.92115 35.15288,31.91449 35.1459,31.90717 35.13987,31.89927 35.13485,31.89088 35.13088,31.88209 35.12802,31.873 35.1263,31.8637 35.12573,31.8543 35.12632,31.8449 35.12807,31.8356 35.13094,31.82651 35.13492,31.81772 35.13996,31.80934 35.146,31.80145 35.15298,31.79414 35.16082,31.78749 35.16943,31.78157 35.17872,31.77645 3

In [4]:
folder_contents = parse_kml_folders( "..\geo\doc.kml")

In [5]:
folder_contents

{'Unnamed Folder': [{'tag': 'name',
   'text': 'Zuph',
   'attributes': {},
   'level': 0,
   'children': []},
  {'tag': 'description',
   'text': '1Sam <a href="https://www.biblegateway.com/passage/?search=1Sam.9.5&amp;version=ESV">9:5</a>',
   'attributes': {},
   'level': 0,
   'children': []},
  {'tag': 'Placemark',
   'text': '',
   'attributes': {},
   'level': 0,
   'children': [{'tag': 'name',
     'text': 'Zuph / about 10 km around Al Ram',
     'attributes': {},
     'level': 1,
     'children': []},
    {'tag': 'description',
     'text': 'about 10 km around Ramathaim-zophim',
     'attributes': {},
     'level': 1,
     'children': []},
    {'tag': 'styleUrl',
     'text': '#region',
     'attributes': {},
     'level': 1,
     'children': []},
    {'tag': 'Polygon',
     'text': '',
     'attributes': {},
     'level': 1,
     'children': [{'tag': 'tesselate',
       'text': '1',
       'attributes': {},
       'level': 2,
       'children': []},
      {'tag': 'outerBounda

In [6]:
unique_tags = get_all_unique_tags_in_folders( "..\geo\doc.kml")

In [7]:
unique_tags

{'LinearRing',
 'Placemark',
 'Polygon',
 'coordinates',
 'description',
 'name',
 'outerBoundaryIs',
 'styleUrl',
 'tesselate'}

In [None]:
 "..\geo\doc.kml"