In [None]:
import xml.etree.ElementTree as ET
from pathlib import Path
import csv

def txt(node, tag):
    e = node.find(f".//{tag}")
    return e.text.strip() if e is not None and e.text else None

def txt_all(node, tag):
    return [e.text.strip() for e in node.findall(f".//{tag}") if e.text]

def parse_fgdc(xml_path):
    root = ET.parse(xml_path).getroot()
    out = {}

    # 1) Identity / time
    out["title"]   = txt(root, "title")
    out["pubdate"] = txt(root, "pubdate")          # YYYYMMDD or YYYY
    out["year"]    = txt(root, "caldate")          # temporal coverage (often a single year)
    out["abstract"]= txt(root, "abstract")
    out["purpose"] = txt(root, "purpose")d

    # 2) Spatial domain
    out["west"], out["east"] = txt(root,"westbc"), txt(root,"eastbc")
    out["south"], out["north"] = txt(root,"southbc"), txt(root,"northbc")

    # 3) CRS (planar + geodetic)
    out["projection_name"] = txt(root, "mapprojn")                 # e.g., Albers Conical Equal Area
    out["std_parallels"]   = txt_all(root, "stdparll")             # two values in Albers
    out["central_meridian"]= txt(root, "longcm")
    out["latitude_origin"] = txt(root, "latprjo")
    out["false_easting"]   = txt(root, "feast")
    out["false_northing"]  = txt(root, "fnorth")
    out["planar_units"]    = txt(root, "plandu")                   # meters
    out["horiz_datum"]     = txt(root, "horizdn")                  # NAD83
    out["ellipsoid"]       = txt(root, "ellips")
    out["semimajor_axis"]  = txt(root, "semiaxis")
    out["inv_flattening"]  = txt(root, "denflat")

    # 4) Resolution / raster size
    out["x_res"] = txt(root, "absres")                             # 30
    out["y_res"] = txt(root, "ordres")                             # 30
    out["rows"]  = txt(root, "rowcount")
    out["cols"]  = txt(root, "colcount")

    # 5) Attribute schema (field name, defn, coded domains)
    attrs = []
    for a in root.findall(".//eainfo//detailed//attr"):
        name = txt(a, "attrlabl")
        definition = txt(a, "attrdef")
        # enumerated domain values
        enums = [txt(v, "edomv") for v in a.findall(".//attrdomv//edom") if txt(v,"edomv")]
        # range domain
        rmin = txt(a, "rdommin")
        rmax = txt(a, "rdommax")
        # unrepresentable domain (free text list)
        udom = txt(a, "udom")
        attrs.append({
            "name": name,
            "definition": definition,
            "enums": enums if enums else None,
            "range_min": rmin,
            "range_max": rmax,
            "udom": udom
        })
    out["attributes"] = attrs

    return out

In [7]:
# Usage:
import glob
metadata = parse_fgdc("Dataset/AnnualDisturbance_1999_present/LF2015_Dist_200_CONUS/General_Metadata/LC15_Dist_200.xml")
print(metadata["projection_name"], metadata["x_res"], metadata["horiz_datum"])
print(metadata)

Albers Conical Equal Area 30 North American Datum of 1983 (NAD 83)
{'title': 'LANDFIRE Remap Annual Disturbance CONUS 2015', 'pubdate': '20200731', 'year': '2016', 'abstract': "LANDFIRE's (LF) 2016 Remap (Remap) Annual Disturbance (Dist) product provides temporal and spatial information related to landscape change. Dist depicts areas that have experienced a disturbance within a given year of 4.5 hectares (11 acres) or larger, along with cause and severity. Information sources include national fire mapping programs such as Monitoring Trends in Burn Severity (MTBS), Burned Area Reflectance Classification (BARC), and Rapid Assessment of Vegetation Condition after Wildfire (RAVG), local user/agency contributed data (LF Events Geodatabase), and remotely sensed Landsat imagery. Composite Landsat image pairs from the current year, prior year, and following year are spectrally compared to determine where change occurred and its corresponding severity. Additionally, vegetation indices (Normaliz

In [8]:
import json

data = metadata
json_str = json.dumps(data, indent=2)
print(json_str)


{
  "title": "LANDFIRE Remap Annual Disturbance CONUS 2015",
  "pubdate": "20200731",
  "year": "2016",
  "abstract": "LANDFIRE's (LF) 2016 Remap (Remap) Annual Disturbance (Dist) product provides temporal and spatial information related to landscape change. Dist depicts areas that have experienced a disturbance within a given year of 4.5 hectares (11 acres) or larger, along with cause and severity. Information sources include national fire mapping programs such as Monitoring Trends in Burn Severity (MTBS), Burned Area Reflectance Classification (BARC), and Rapid Assessment of Vegetation Condition after Wildfire (RAVG), local user/agency contributed data (LF Events Geodatabase), and remotely sensed Landsat imagery. Composite Landsat image pairs from the current year, prior year, and following year are spectrally compared to determine where change occurred and its corresponding severity. Additionally, vegetation indices (Normalized Differenced Vegetation Index [NDVI] and Normalized Burn