In [1]:
import os
import requests
import json

def github_raw_url(url):
    """Get the raw URL for a GitHub file.

    Args:
        url (str): The GitHub URL.
    Returns:
        str: The raw URL.
    """
    if isinstance(url, str) and url.startswith("https://github.com/") and "blob" in url:
        url = url.replace("github.com", "raw.githubusercontent.com").replace(
            "blob/", ""
        )
    return url

def download_file(
    url=None,
    output=None,
    quiet=False,
    proxy=None,
    speed=None,
    use_cookies=True,
    verify=True,
    id=None,
    fuzzy=False,
    resume=False,
    unzip=True,
    overwrite=False,
):
    """Download a file from URL, including Google Drive shared URL.

    Args:
        url (str, optional): Google Drive URL is also supported. Defaults to None.
        output (str, optional): Output filename. Default is basename of URL.
        quiet (bool, optional): Suppress terminal output. Default is False.
        proxy (str, optional): Proxy. Defaults to None.
        speed (float, optional): Download byte size per second (e.g., 256KB/s = 256 * 1024). Defaults to None.
        use_cookies (bool, optional): Flag to use cookies. Defaults to True.
        verify (bool | str, optional): Either a bool, in which case it controls whether the server's TLS certificate is verified, or a string, in which case it must be a path to a CA bundle to use. Default is True.. Defaults to True.
        id (str, optional): Google Drive's file ID. Defaults to None.
        fuzzy (bool, optional): Fuzzy extraction of Google Drive's file Id. Defaults to False.
        resume (bool, optional): Resume the download from existing tmp file if possible. Defaults to False.
        unzip (bool, optional): Unzip the file. Defaults to True.
        overwrite (bool, optional): Overwrite the file if it already exists. Defaults to False.

    Returns:
        str: The output file path.
    """

    import gdown

    if output is None:
        if isinstance(url, str) and url.startswith("http"):
            output = os.path.basename(url)

    if isinstance(url, str):
        if os.path.exists(os.path.abspath(output)) and (not overwrite):
            print(
                f"{output} already exists. Skip downloading. Set overwrite=True to overwrite."
            )
            return os.path.abspath(output)
        else:
            url = github_raw_url(url)

    if "https://drive.google.com/file/d/" in url:
        fuzzy = True

    output = gdown.download(
        url, output, quiet, proxy, speed, use_cookies, verify, id, fuzzy, resume
    )

    if unzip and output.endswith(".zip"):

        with zipfile.ZipFile(output, "r") as zip_ref:
            if not quiet:
                print("Extracting files...")
            zip_ref.extractall(os.path.dirname(output))

    return os.path.abspath(output)

class The_national_map_USGS():
    """
    The national map is a collection of topological datasets, maintained by the USGS. 

    It provides an API endpoint which can be used to find downloadable links for the products offered.
        - Full description of datasets available can retrieved.
          This consists of metadata such as detail description and publication dates.
        - A wide range of dataformats are availble

    More complete documentation for the API can be found at
        https://apps.nationalmap.gov/tnmaccess/#/
    """

    def __init__(self):
        self.api_endpoint = r'https://tnmaccess.nationalmap.gov/api/v1/'
        self.DS = self.datasets_full()
    
    def datasets_full(self) -> list:
        """
        Full description of datasets provided.
        Returns a JSON or empty list.
        """
        try:
            return requests.get(f'{self.api_endpoint}datasets?').json()
        except Exception:
            print('Failed to load metadata from The National Map API endpoint V1')
            return []

    @property
    def prodFormats(self) -> list:
        """
        Return all datatypes available in any of the collections. 
        Note that "All" is only peculiar to one dataset. 
        """
        return set(i['displayName'] for ds in self.DS for i in ds['formats'])

    @property
    def datasets(self) -> list:
        """
        Returns a list of dataset tags (most common human readable self description for specific datasets).
        """
        return set(y['sbDatasetTag'] for x in self.DS for y in x['tags'])

    def parse_region(region, geopandas_args={}):
        """
        Translate a Vector dataset to a polygon.

        Args:
            region (str | list): an URL|filepath to a vector dataset to a polygon
            geopandas_reader_args (dict, optional): A dictionary of arguments to pass to the geopandas.read_file() function. 
                Used for reading a region URL|filepath.
        """
        import geopandas as gpd

        if isinstance(region, str):
            if region.startswith("http"):
                region = github_raw_url(region)
                region = download_file(region)
            elif not os.path.exists(region):
                raise ValueError("region must be a path or a URL to a vector dataset.")

            roi = gpd.read_file(region, **geopandas_args)
            roi = roi.to_crs(epsg=4326)
        return roi.total_bounds
        
    def download_tiles(self, region=None, out_dir=None, download_args={}, geopandas_args={}, API={'max':10}):
        """
        Download the US National Elevation Datasets (NED) for a region.

        Args:
            region (str | list, optional): An URL|filepath to a vector dataset Or a list of bounds in the form of [minx, miny, maxx, maxy].
                Alternatively you could use API parameters such as polygon or bbox.
            out_dir (str, optional): The directory to download the files to. Defaults to None, which uses the current working directory.
            download_args (dict, optional): A dictionary of arguments to pass to the download_file function. Defaults to {}.
            geopandas_args (dict, optional): A dictionary of arguments to pass to the geopandas.read_file() function. 
                Used for reading a region URL|filepath.
            API (dict, optional): A dictionary of arguments to pass to the self.find_details() function. 
                Exposes most of the documented API.
                Defaults to {'max':10} to avoid accidental large downloads.

        Returns:
            None
        """
        if out_dir is None:
            out_dir = os.getcwd()
        else:
            out_dir = os.path.abspath(out_dir)
        
        tiles = self.find_tiles(region, return_type='list', geopandas_args=geopandas_args, API=API)
        T = len(tiles)
        errors = 0
        done = 0

        for i, link in enumerate(tiles):
            file_name = os.path.basename(link)
            out_name = os.path.join(out_dir, file_name)
            if i<5 or (i<50 and not(i%5)) or not(i%20):
                print(f"Downloading {i+1} of {T}: {file_name}")
            try:
                download_file(link, out_name, **download_args)
                done += 1
            except KeyboardInterrupt:
                print('Cancelled download')
                break
            except Exception:     
                errors += 1           
                print(f"Failed to download {i+1} of {T}: {file_name}")
                
        print(f"{done} Downloads completed, {errors} downloads failed")
        return 


    def find_tiles(self, region=None, return_type='list', geopandas_args={}, API={}):
        """
        Download the US National Elevation Datasets (NED) for a region.

        Args:
            region (str | list, optional): An URL|filepath to a vector dataset Or a list of bounds in the form of [minx, miny, maxx, maxy].
                Alternatively you could use API parameters such as polygon or bbox.
            out_dir (str, optional): The directory to download the files to. Defaults to None, which uses the current working directory.
            return_type (str): list | dict. Defaults to list. Changes the return output type and content.
            geopandas_args (dict, optional): A dictionary of arguments to pass to the geopandas.read_file() function. 
                Used for reading a region URL|filepath.
            API (dict, optional): A dictionary of arguments to pass to the self.find_details() function. 
                Exposes most of the documented API parameters. Defaults to {}.

        Returns:
            list: A list of download_urls to the found tiles. 
            dict: A dictionary containing the metadata of the found tiles.
        """
        assert region or API, 'Provide a region or use the API'

        if isinstance(region,str):
            API['polygon'] = self.parse_region(region, geopandas_args)
        if isinstance(region, list):
            API['bbox'] = region

        results = self.find_details(**API)
        if return_type == 'list':
            return [i["downloadURL"] for i in results.get('items')]
        return results

    def find_details(self, 
                   bbox:list[float] = None, 
                   polygon:list[tuple[float,float]] = None, 
                   datasets:list[str] | str = [], 
                   prodFormats:list[str] | str = [],
                   prodExtents:list[str] | str = [], 
                   q:str = None, 
                   dateType:str = None, 
                   start:str = None, 
                   end:str = None, 
                   offset:int = 0, 
                   max:int = None, 
                   outputFormat:str = 'JSON', 
                   polyType:str = None, 
                   polyCode:str = None, 
                   extentQuery:int = None) -> dict:
        """
        Possible search parameters (kwargs) support by API

        Parameter               Values                      
            Description
        ---------------------------------------------------------------------------------------------------    
        bbox                    'minx, miny, maxx, maxy'
            Geographic longitude/latitude values expressed in  decimal degrees in a comma-delimited list.
        polygon                 '[x,y x,y x,y x,y x,y]'       
            Polygon, longitude/latitude values expressed in decimal degrees in a space-delimited list.
        datasets                See: Datasets (Optional)       
            Comma-delimited list of valid dataset tag names (sbDatasetTag)
            From https://apps.nationalmap.gov/tnmaccess/#/product
        prodFormats             See: Product Formats (Optional)
            Comma-delimited list of dataset-specific formats
            From https://apps.nationalmap.gov/tnmaccess/#/product
        prodExtents             See: Product Extents (Optional)
            Comma-delimited list of dataset-specific extents
            From https://apps.nationalmap.gov/tnmaccess/#/product
        q                       free text 
            Text input which can be used to filter by product titles and text descriptions.
        dateType                dateCreated | lastUpdated | Publication 
            Type of date to search by.
        start                   'YYYY-MM-DD' 
            Start date
        end                     'YYYY-MM-DD' 
            End date (required if start date is provided)
        offset                  integer 
            Offset into paginated results - default=0
        max                     integer 
            Number of results returned
        outputFormat            JSON | CSV | pjson
            Default=JSON
        polyType                state | huc2 | huc4 | huc8 
            Well Known Polygon Type. Use this parameter to deliver data by state or HUC
            (hydrologic unit codes defined by the Watershed Boundary Dataset/WBD)
        polyCode                state FIPS code or huc number 
            Well Known Polygon Code. This value needs to coordinate with the polyType parameter.
        extentQuery             integer 
            A Polygon code in the science base system, typically from an uploaded shapefile
        """
       
        # call locals before creating new locals
        used_locals = {k:v for k,v in locals().items() if v and k != 'self'}

        # Parsing
    
        def convert_polygon(x):
            return ','.join(' '.join(map(str,point)) for point in x)
        if polygon:
            used_locals['polygon'] = convert_polygon(polygon)        
        
        # Fetch list seems broken in API ???, only takes list with 1 item or str.
        # Looks like list is evaluated as AND instead of OR.

        assert set(datasets).issubset(self.datasets) or datasets in self.datasets, f'Unknown datasets, must be elements of {self.datasets}'
        assert set(prodFormats).issubset(self.prodFormats) or prodFormats in self.prodFormats, f'Unknown prodFormats, must be element of {self.prodFormats}'

        # Validations handled better (f.e. psjon) by API endpoint error responses
        # 'JSON', 'CSV' (misses pjson)
        # 'dateCreated', 'lastUpdated', 'Publication'
        # start or end or dateType / YYYY-MM-DD
            
        # Fetch response

        response = requests.get(f'{self.api_endpoint}products?', params=used_locals)
        if response.status_code//100 == 2:
            return response.json()
        else:
            print(response.json())
        return {}

def download_tnm(region=None, out_dir=None, download_args={}, geopandas_args={}, API={}):
    """
    Download the US National Elevation Datasets (NED) for a region.

    Args:
        region (str | list, optional): An URL|filepath to a vector dataset Or a list of bounds in the form of [minx, miny, maxx, maxy].
            Alternatively you could use API parameters such as polygon or bbox.
        out_dir (str, optional): The directory to download the files to. Defaults to None, which uses the current working directory.
        download_args (dict, optional): A dictionary of arguments to pass to the download_file function. Defaults to {}.
        geopandas_args (dict, optional): A dictionary of arguments to pass to the geopandas.read_file() function. 
            Used for reading a region URL|filepath.
        API (dict, optional): A dictionary of arguments to pass to the The_national_map_USGS.find_details() function. 
            Exposes most of the documented API.
            Defaults to {'max':10}
            Using API={'q':'NED'} yields similar results to download_ned

    Returns:
        None
    """    
    assert region or API, 'Provide a region or use the API' 
    TNM = The_national_map_USGS()
    if not(API.get('max')):
        API['max'] = 10
    return TNM.download_tiles(region, out_dir, download_args, geopandas_args, API)

In [2]:
U = The_national_map_USGS()

https://tnmaccess.nationalmap.gov/api/v1/datasets?


In [3]:
paras = {
         'q':'National Elevation Dataset (NED) 1/3 arc-second',
         'polyCode': '01010002',
         'polyType': 'huc8'
        }

U.find_details(**paras)['items'][0]

{'title': 'USGS 1 Arc Second n47w069 20210611',
 'moreInfo': 'This tile of the 3D Elevation Program (3DEP) seamless products is 1 Arc Second resolution. 3DEP data serve as the elevation layer of The National Map, and provide basic elevation information for Earth science studies and mapping applications in the United States. Scientists and resource managers use 3DEP data for global change research, hydrologic modeling, resource monitoring, mapping and visualization, and many other applications. 3DEP data compose an elevation dataset that consists of seamless layers and a high resolution layer. Each of these layers consists of the best available raster elevation data of the conterminous United States, Alaska, Hawaii, territorial islands, Mexico and Canada. 3DEP data are updated continually as new [...]',
 'sourceId': '60d2c049d34e8409865288fc',
 'sourceName': 'ScienceBase',
 'sourceOriginId': None,
 'sourceOriginName': 'gda',
 'metaUrl': 'https://www.sciencebase.gov/catalog/item/60d2c049

In [4]:
U.find_details(start='2021-01-01',end='2022-01-01',q='NED',outputFormat='JSON')

{'errorMessage': "[BadRequest] 'date type is required when start date and enddate are provided' "}


{}

In [5]:
U.find_details(start='2021-01-01',end='2022-31-01',q='NED',outputFormat='JSON')

{'errorMessage': "[BadRequest] 'Value '2022-31-01' of property end is not a valid date (YYYY-MM-DD)' "}


{}

In [6]:
paras = {'prodFormats':'LAS,LAZ', 
         'datasets':['Lidar Point Cloud (LPC)'],
         'polygon': [
                     (-104.94262695312236, 41.52867510196275),
                     (-102.83325195312291, 41.52867510196275),
                     (-102.83325195312291, 40.45065268246805),
                     (-104.94262695312236, 40.45065268246805),
                     (-104.94262695312236, 41.52867510196275),
                   ]
        }

U.find_details(**paras)['items'][0]

{'title': 'USGS Lidar Point Cloud CO SoPlatteRiver-Lot1 2013 13TFE627477 LAS 2015',
 'moreInfo': 'Lidar (Light detection and ranging) discrete-return point cloud data are available in the American Society for Photogrammetry and Remote Sensing (ASPRS) LAS format. The LAS format is a standardized binary format for storing 3-dimensional point cloud data and point attributes along with header information and variable length records specific to the data. Millions of data points are stored as a 3-dimensional data cloud as a series of x (longitude), y (latitude) and z (elevation) points. A few older projects in this collection are in ASCII format. Please refer to http://www.asprs.org/Committee-General/LASer-LAS-File-Format-Exchange-Activities.html for additional information.',
 'sourceId': '5a801f46e4b00f54eb2a10dc',
 'sourceName': 'ScienceBase',
 'sourceOriginId': None,
 'sourceOriginName': 'gda',
 'metaUrl': 'https://www.sciencebase.gov/catalog/item/5a801f46e4b00f54eb2a10dc',
 'vendorMetaUr

In [7]:
p = {'polyCode':'01010002','polyType':'huc8','q':'NED'}
U.find_tiles(p)[:5]

['https://prd-tnm.s3.amazonaws.com/StagedProducts/NAIP/fl_2015/24080/m_2408002_ne_17_1_20150719_20160222.jp2',
 'https://prd-tnm.s3.amazonaws.com/StagedProducts/NAIP/fl_2015/24080/m_2408002_nw_17_1_20151212_20160222.jp2',
 'https://prd-tnm.s3.amazonaws.com/StagedProducts/NAIP/fl_2015/24080/m_2408002_se_17_1_20150719_20160222.jp2',
 'https://prd-tnm.s3.amazonaws.com/StagedProducts/NAIP/fl_2015/24080/m_2408002_sw_17_1_20151212_20160222.jp2',
 'https://prd-tnm.s3.amazonaws.com/StagedProducts/NAIP/fl_2015/24080/m_2408003_ne_17_1_20150719_20160222.jp2']

In [8]:
samples = U.find_details(**p)
samples['total'], samples.keys()

(3396,
 dict_keys(['total', 'items', 'errors', 'messages', 'sciencebaseQuery', 'filteredOut']))

In [9]:
U.download_tiles(API=p)

Downloading 1 of 50: USGS_1_n47w069_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w069_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 2 of 50: USGS_1_n47w070_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w070_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 3 of 50: USGS_1_n48w069_20210611.tif


Downloading...
From: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/historical/n48w069/USGS_1_n48w069_20210611.tif
To: C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n48w069_20210611.tif
100%|█████████████████████████████████████████████████████████████████████████████| 52.2M/52.2M [00:19<00:00, 2.63MB/s]


Downloading 4 of 50: USGS_1_n48w070_20210611.tif


Downloading...
From: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/historical/n48w070/USGS_1_n48w070_20210611.tif
To: C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n48w070_20210611.tif
100%|█████████████████████████████████████████████████████████████████████████████| 42.6M/42.6M [00:13<00:00, 3.05MB/s]


Downloading 5 of 50: USGS_1_n47w069_20190930.tif


Downloading...
From: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/historical/n47w069/USGS_1_n47w069_20190930.tif
To: C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w069_20190930.tif
 13%|█████████▋                                                                   | 6.82M/54.5M [00:03<00:24, 1.98MB/s]

Cancelled download
4 Downloads completed, 0 downloads failed





In [10]:
download_tnm(API=p)

https://tnmaccess.nationalmap.gov/api/v1/datasets?
Downloading 1 of 10: USGS_1_n47w069_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w069_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 2 of 10: USGS_1_n47w070_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w070_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 3 of 10: USGS_1_n48w069_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n48w069_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 4 of 10: USGS_1_n48w070_20210611.tif
C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n48w070_20210611.tif already exists. Skip downloading. Set overwrite=True to overwrite.
Downloading 5 of 10: USGS_1_n47w069_20190930.tif


Downloading...
From: https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1/TIFF/historical/n47w069/USGS_1_n47w069_20190930.tif
To: C:\Users\Karel\Repos\leafmap\leafmap\examples\notebooks\USGS_1_n47w069_20190930.tif
  7%|█████▏                                                                       | 3.67M/54.5M [00:02<00:40, 1.27MB/s]

Cancelled download
4 Downloads completed, 0 downloads failed



