In [1]:
import os
import requests 
import urllib3
from alive_progress import alive_bar
import concurrent

In [3]:
dataset= "National Elevation Dataset (NED) 1 arc-second"

us_west_bbox = "-100.336147,50.059513,-127.850165,29.983312"
oregon_bbox = "-124.566244,46.864746,-116.463504,41.991794"
# -124.566244 	41.991794 	-116.463504 	46.292035

# fetch from USGS national map
url = "https://tnmaccess.nationalmap.gov/api/v1/products?datasets={}&bbox={}".format(dataset, oregon_bbox)

response = requests.get(url)
data = response.json()

urls = []
download_bytes = 0

total_items = data['total']
n = len(data['items'])

while n < total_items:
    print("requesting next page of items to {}".format(n))
    url += '&offset=' + str(n)
    response = requests.get(url + "&offset={}".format(n))
    data = response.json()
    for item in data['items']:
        urls.append(item['downloadURL'])
        download_bytes += item['sizeInBytes']

    n += len(data['items'])


print('Total download size: {:.2f} GB'.format(download_bytes / 1e9))

requesting next page of items to 50
requesting next page of items to 100
Total download size: 0.89 GB


In [4]:
# create data/sources/ dir if it doesn't already exist
if not os.path.exists('data/sources'):
    os.makedirs('data/sources')

# filter out any urls that already exist as saved files
filtered_urls = [url for url in urls if not os.path.exists(os.path.join('data/sources', url.split('/')[-1]))]

print("Skipping downloading {} files that already exist".format(len(urls) - len(filtered_urls)))

def request(method, url):
    try:
        r = urllib3.request(method, url)
        return r
    except Exception as e:
        print('Failed to download', url, e)
        return None

# download the data in parallel and save each file to data/sources/
with alive_bar(len(filtered_urls), title="Downloading data...", force_tty=True) as bar:
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_to_url = {executor.submit(request, "GET", url): url for url in filtered_urls}
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            filename = url.split('/')[-1]
            try:
                filepath = os.path.join('data/sources', filename)
                if not os.path.exists(filepath):
                    data = future.result()
                    # save the data to filepath
                    with open(filepath, 'wb') as f:
                        f.write(data.data)
            except Exception as e:
                print('Failed to download', filename, e)
            bar()


Skipping downloading 0 files that already exist
Downloading data... |████████████████████████████████████████| 65/65 [100%] in 4
