# 1. Download LiDAR and DEM data from USGS

This function is to download DEM and LiDAR at the same time tiles that Atlanta's City Boundary only

In [None]:
import requests
import xml.etree.ElementTree as ET
import geopandas as gpd
from shapely.geometry import Polygon
import os
import re
import pandas as pd

### --- Configuration (User MUST update these paths) --- ###
TIFF_URL_LIST_FILE = r"C:\Users\HojungYu\Documents\GitHub\fast-thermal-comfort\data\0_file_download_links.txt" # This will be downloaded text file.
DOWNLOAD_DIR_DEM = r"C:\Users\HojungYu\Documents\GitHub\fast-thermal-comfort\data\DEM" # DEM directory
DOWNLOAD_DIR_LIDAR = r"C:\Users\HojungYu\Documents\GitHub\fast-thermal-comfort\data\LiDAR" # LiDAR directory 
ATLANTA_BOUNDARY_FILE = r"C:\Users\HojungYu\Documents\GitHub\fast-thermal-comfort\data\Atlanta_City_buffered.geojson" # Atlanta Boundary geojson. Used buffered version to make sure you have all tiles inside.
### ---------------------------------------------------- ###

In [7]:
# --- Helper Functions ---
def load_atlanta_boundary(boundary_filepath):
    """
    Loads Atlanta's city boundary using geopandas.
    Args:
        boundary_filepath (str): Path to the geospatial file for Atlanta's boundary.
    Returns:
        gpd.GeoDataFrame: GeoDataFrame containing Atlanta's boundary.
    """
    try:
        atlanta_gdf = gpd.read_file(boundary_filepath)
        if atlanta_gdf.crs is None or atlanta_gdf.crs.to_epsg() != 4326:
            print(f"Warning: Atlanta boundary CRS is {atlanta_gdf.crs}. Reprojecting to EPSG:4326.")
            atlanta_gdf = atlanta_gdf.to_crs(epsg=4326)
        print(f"Successfully loaded Atlanta boundary from: {boundary_filepath}")
        return atlanta_gdf
    except Exception as e:
        print(f"Could not load Atlanta boundary from '{boundary_filepath}': {e}")
        print("Using a simplified placeholder bounding box for Atlanta. Please provide a real file for accuracy.")
        west = -84.50
        south = 33.65
        east = -84.30
        north = 33.85
        atlanta_bbox_polygon = Polygon([(west, south), (east, south), (east, north), (west, north), (west, south)])
        return gpd.GeoDataFrame(
            {'geometry': [atlanta_bbox_polygon]},
            crs="EPSG:4326"
        )

def extract_id_from_url(url):
    """
    Extracts the ID (e.g., 'e0999n1337') from a TIFF URL.
    """
    match = re.search(r'_(e\d+n\d+)\.tif$', url)
    if match:
        return match.group(1)
    return None

def construct_xml_url(tif_url, tif_id):
    """
    Constructs the XML metadata URL from a TIFF URL and its ID.
    Assumes a consistent URL structure for metadata.
    """
    # Example TIFF URL: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/OPR/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B2_2018/TIFF/USGS_OPR_GA_Statewide_2018_B18_DRRA_e0999n1337.tif
    # Example XML URL: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/OPR/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B2_2018/metadata/USGS_OPR_GA_Statewide_2018_B18_DRRA_e1089n1369.xml
    # Note the 'B2_2018' vs 'B3_2018' and 'TIFF' vs 'metadata'
    base_url = tif_url.rsplit('/', 2)[0] # Get base up to 'GA_Statewide_B2_2018'
    xml_url = f"{base_url}/metadata/USGS_OPR_GA_Statewide_2018_B18_DRRA_{tif_id}.xml"
    return xml_url

def construct_laz_url(tif_url, tif_id):
    """
    Constructs the LAZ download URL from an ID.
    Assumes a consistent URL structure for LAZ files.
    Example TIF URL: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/OPR/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B2_2018/TIFF/USGS_OPR_GA_Statewide_2018_B18_DRRA_{ID}.tif
    Example LAZ URL: https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/LPC/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B2_2018/LAZ/USGS_LPC_GA_Statewide_2018_B18_DRRA_{ID}.laz
    """
    # This URL structure is different from the TIFF/XML, so it's hardcoded based on the example.
    laz_base_url = tif_url.split('/')[7:9]
    laz_base_url_str = "/".join(laz_base_url) 
    laz_base_url_2 = tif_url.rsplit('/', 1)[1]
    laz_base_url_2=laz_base_url_2.replace("USGS_OPR_GA", "USGS_LPC_GA")
    laz_base_url_2=laz_base_url_2.replace("tif","laz")
    laz_url = f"https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/LPC/Projects/{laz_base_url_str}/LAZ/{laz_base_url_2}"
    return laz_url

def download_file(url, destination_folder):
    """
    Downloads a file from a given URL to a specified folder.
    Returns the path to the downloaded file, or None on failure.
    """
    # os.makedirs(destination_folder, exist_ok=True)
    local_filename = os.path.join(destination_folder, url.split('/')[-1])
    try:
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            with open(local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        # print(f"Downloaded: {local_filename}")
        return local_filename
    except requests.exceptions.RequestException as e:
        print(f"Error downloading {url}: {e}")
        return None

def parse_xml_bounding_box(xml_filepath):
    """
    Parses an XML file to extract bounding box coordinates.
    Returns a shapely Polygon representing the bounding box, or None if not found/parsed.
    """
    try:
        tree = ET.parse(xml_filepath)
        root = tree.getroot()

        # Define the namespace if present, or search without it

        def find_element_text(parent, tag_name):
            # Search for element with or without namespace
            element = parent.find(tag_name)
            if element is None:
                # Try with common namespaces
                for ns_prefix in ['', '{http://www.fgdc.gov/metadata/fgdc-std-001-1998.xsd}', '{http://www.isotc211.org/2005/gmd}']:
                    element = parent.find(f'{ns_prefix}{tag_name}')
                    if element is not None:
                        break
            return element.text if element is not None else None

        # Find the bounding box elements
        westbc = find_element_text(root.find('.//bounding'), 'westbc')
        eastbc = find_element_text(root.find('.//bounding'), 'eastbc')
        northbc = find_element_text(root.find('.//bounding'), 'northbc')
        southbc = find_element_text(root.find('.//bounding'), 'southbc')

        if all([westbc, eastbc, northbc, southbc]):
            west = float(westbc)
            east = float(eastbc)
            north = float(northbc)
            south = float(southbc)
            bbox_polygon = Polygon([(west, south), (east, south), (east, north), (west, north), (west, south)])
            return bbox_polygon
        else:
            print(f"Warning: Could not find all bounding box coordinates in {xml_filepath}")
            return None
    except ET.ParseError as e:
        print(f"Error parsing XML file {xml_filepath}: {e}")
        return None
    except ValueError as e:
        print(f"Error converting bounding box coordinates to float in {xml_filepath}: {e}")
        return None
    except AttributeError: # Happens if .//bounding is not found
        print(f"Warning: 'bounding' element not found in XML file {xml_filepath}.")
        return None

In [None]:
def main():
    # 1. Load Atlanta's boundary
    atlanta_boundary_gdf = load_atlanta_boundary(ATLANTA_BOUNDARY_FILE)
    if atlanta_boundary_gdf is None or atlanta_boundary_gdf.empty:
        print("Failed to load or create Atlanta boundary. Exiting.")
        return

    atlanta_union_geometry = atlanta_boundary_gdf.geometry.unary_union

    os.makedirs(DOWNLOAD_DIR_DEM, exist_ok=True)
    os.makedirs(DOWNLOAD_DIR_LIDAR, exist_ok=True)
    
    # 2. Process the list of URLs
    try:
        with open(TIFF_URL_LIST_FILE, 'r') as f:
            tiff_urls = [line.strip() for line in f if line.strip()]
    except FileNotFoundError:
        print(f"Error: TIFF URL list file not found at {TIFF_URL_LIST_FILE}")
        return

    print(f"\nProcessing {len(tiff_urls)} TIFF URLs...")

    for i, tif_url in enumerate(tiff_urls):
        tif_id = extract_id_from_url(tif_url)
        if not tif_id:
            print(f"Skipping: Could not extract ID from {tif_url}")
            continue
        xml_url = construct_xml_url(tif_url, tif_id)
        if not xml_url:
            print(f"Skipping: Could not construct XML URL for {tif_id}")
            continue

        xml_filepath = download_file(xml_url, DOWNLOAD_DIR_LIDAR)
        if not xml_filepath:
            print(f"Skipping: Failed to download XML for {tif_id}")
            continue
        data_bbox_polygon = parse_xml_bounding_box(xml_filepath)
        try:
            os.remove(xml_filepath)
            # print(f"Removed temporary XML file: {xml_filepath}")
        except OSError as e:
            print(f"Error removing temporary XML file {xml_filepath}: {e}")
        if data_bbox_polygon:
            data_bbox_geoseries = gpd.GeoSeries([data_bbox_polygon], crs="EPSG:4326")
            if data_bbox_geoseries.intersects(atlanta_union_geometry).any():
                # print(f"Bounding box for ID {tif_id} intersects with Atlanta's boundary.")

                # 3. Download TIFF and LAZ files
                laz_url = construct_laz_url(tif_url, tif_id)

                print(f"Initiating download for TIFF: {tif_url}")
                download_file(tif_url, DOWNLOAD_DIR_DEM)

                print(f"Initiating download for LAZ: {laz_url}")
                download_file(laz_url, DOWNLOAD_DIR_LIDAR)
            # else:
                # print(f"Bounding box for ID {tif_id} does NOT intersect with Atlanta's boundary. Skipping downloads.")
        else:
            print(f"Skipping: Could not get valid bounding box for ID {tif_id} from XML.")

In [None]:
if __name__== "__main__":
    main()

Successfully loaded Atlanta boundary from: C:\Users\HojungYu\Documents\GitHub\fast-thermal-comfort\data\Atlanta_City_buffered.geojson


  atlanta_union_geometry = atlanta_boundary_gdf.geometry.unary_union


Initiating download for TIFF: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/OPR/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B3_2018/TIFF/USGS_OPR_GA_Statewide_2018_B18_DRRA_e1072n1251.tif
Initiating download for LAZ: https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/LPC/Projects/GA_Statewide_2018_B18_DRRA/GA_Statewide_B3_2018/LAZ/USGS_LPC_GA_Statewide_2018_B18_DRRA_e1072n1251.laz
