### Import packages


In [1]:
import requests
import getpass
import socket
import json
import zipfile
import io
import math
import os
import shutil
import pprint
import re
import time
from statistics import mean
from requests.auth import HTTPBasicAuth

### Select data sets and determine version numbers

In [2]:
# Create dictionary of data set parameters we'll use in our access API command below. We'll start with data set IDs (e.g. ATL07) of interest here, also known as "short name".

data_dict = {
    'sea_ice_fb' : {'short_name' : 'ATL10'},
    'sea_ice_height' : {'short_name' : 'ATL07'},
    'ist' : {'short_name' : 'MOD29'},
}

In [3]:
# Get json response from CMR collection metadata to grab version numbers and add to data_dict

for i in range(len(data_dict)):
    cmr_collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json'
    response = requests.get(cmr_collections_url, params=list(data_dict.values())[i])
    results = json.loads(response.content) 

    # Find all instances of 'version_id' in metadata and print most recent version number
    versions = [el['version_id'] for el in results['feed']['entry']]
    versions = [i for i in versions if not any(c.isalpha() for c in i)]
    data_dict[list(data_dict.keys())[i]]['version'] = max(versions)

### Select time and area of interest

Data granules are returned based on a spatial bounding box and temporal range.

In [4]:
# Bounding Box spatial parameter in 'W,S,E,N' decimal degrees format 

bounding_box = '140,72,153,80' 

#add bounding_box to each data set dictionary
for k, v in data_dict.items(): data_dict[k]['bounding_box'] = bounding_box

In [5]:
#Input temporal range in 'YYYY-MM-DDThh:mm:ssZ,YYYY-MM-DDThh:mm:ssZ' format

temporal = '2019-03-23T00:00:00Z,2019-03-23T23:59:59Z'

#add temporal to each data set dictionary
for k, v in data_dict.items(): data_dict[k]['temporal'] = temporal

### Determine how many granules exist over this time and area of interest, as well as the average size and total volume of those granules

In [6]:
# Query number of granules (paging over results)
granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'
for i in range(len(data_dict)):
    params = {
        'short_name': list(data_dict.values())[i]['short_name'],
        'version': list(data_dict.values())[i]['version'],
        'bounding_box': bounding_box,
        'temporal': temporal,
        'page_size': 100,
        'page_num': 1
    }
    granules = []
    headers={'Accept': 'application/json'}
    while True:
        response = requests.get(granule_search_url, params=params, headers=headers)
        results = json.loads(response.content)

        if len(results['feed']['entry']) == 0:
            # Out of results, so break out of loop
            break

        # Collect results and increment page_num
        granules.extend(results['feed']['entry'])
        params['page_num'] += 1
    print('There are', len(granules), 'granules of', list(data_dict.values())[i]['short_name'], 'version', list(data_dict.values())[i]['version'], 'over my area and time of interest.')
    for k, v in data_dict.items(): data_dict[k]['gran_num'] = len(granules)
    granule_sizes = [float(granule['granule_size']) for granule in granules]
    print(f'The average size of each granule is {mean(granule_sizes):.2f} MB and the total size of all {len(granules)} granules is {sum(granule_sizes):.2f} MB')
    print()

There are 2 granules of ATL10 version 005 over my area and time of interest.
The average size of each granule is 168.34 MB and the total size of all 2 granules is 336.69 MB

There are 4 granules of ATL07 version 005 over my area and time of interest.
The average size of each granule is 320.07 MB and the total size of all 4 granules is 1280.29 MB

There are 13 granules of MOD29 version 61 over my area and time of interest.
The average size of each granule is 2.80 MB and the total size of all 13 granules is 36.40 MB



Note that subsetting, reformatting, or reprojecting can alter the size of the granules if those services are applied to your request.

### Select subsetting, reformatting, and reprojection options for each dataset

### Input Earthdata Login credentials

An Earthdata Login account is required to access data from the NSIDC DAAC. If you do not already have an Earthdata Login account, visit http://urs.earthdata.nasa.gov to register.

In [7]:
uid = 'amy.steiker' # Enter Earthdata Login user name
pswd = getpass.getpass('Earthdata Login password: ') # Input and store Earthdata Login password
email = 'amy.steiker@colorado.edu' # Enter email associated with Earthata Login account

Earthdata Login password:  ·········


The NSIDC DAAC supports customization services on many of our NASA Earthdata mission collections. See the Customize and Access Data notebook to query the subsetting, reformatting, and reprojection service options available for your data set of interest. Since we already know these options, we'll add our subsetting requests directly into our data dictionary. 

In [8]:
# Spatial and temporal subsetting for ATL10

data_dict['sea_ice_fb']['bbox'] = bounding_box
data_dict['sea_ice_fb']['time'] = '2019-03-23T00:00:00,2019-03-23T23:59:59'

# Spatial and temporal subsetting for ATL07

data_dict['sea_ice_height']['bbox'] = bounding_box
data_dict['sea_ice_height']['time'] = '2019-03-23T00:00:00,2019-03-23T23:59:59'

# Spatial subsetting and polar stereographic reprojection for MOD29

data_dict['ist']['bbox'] = bounding_box

Now let's select a subset of variables. We'll use these primary variables of interest for the ICESat-2 ATL07 product:


In [9]:
#ATL07
#Use only strong beams

data_dict['sea_ice_height']['coverage'] = '/gt1l/sea_ice_segments/delta_time,\
/gt1l/sea_ice_segments/latitude,\
/gt1l/sea_ice_segments/longitude,\
/gt1l/sea_ice_segments/heights/height_segment_confidence,\
/gt1l/sea_ice_segments/heights/height_segment_height,\
/gt1l/sea_ice_segments/heights/height_segment_quality,\
/gt1l/sea_ice_segments/heights/height_segment_surface_error_est,\
/gt2l/sea_ice_segments/delta_time,\
/gt2l/sea_ice_segments/latitude,\
/gt2l/sea_ice_segments/longitude,\
/gt2l/sea_ice_segments/heights/height_segment_confidence,\
/gt2l/sea_ice_segments/heights/height_segment_height,\
/gt2l/sea_ice_segments/heights/height_segment_quality,\
/gt2l/sea_ice_segments/heights/height_segment_surface_error_est,\
/gt3l/sea_ice_segments/delta_time,\
/gt3l/sea_ice_segments/latitude,\
/gt3l/sea_ice_segments/longitude,\
/gt3l/sea_ice_segments/heights/height_segment_confidence,\
/gt3l/sea_ice_segments/heights/height_segment_height,\
/gt3l/sea_ice_segments/heights/height_segment_quality,\
/gt3l/sea_ice_segments/heights/height_segment_surface_error_est'


### Select data access configurations

The data request can be accessed asynchronously or synchronously. The asynchronous option will allow concurrent requests to be queued and processed without the need for a continuous connection. Those requested orders will be delivered to the specified email address, or they can be accessed programmatically as shown below. Synchronous requests will automatically download the data as soon as processing is complete. For this tutorial, we will be selecting the asynchronous method. 

In [10]:
#Set NSIDC data access base URL
base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'

for k, v in data_dict.items():
    #Add email address
    data_dict[k]['email'] = email
    
    #Set the request mode to asynchronous
    data_dict[k]['request_mode'] = 'async'

    #Set the page size to the maximum for asynchronous requests 
    page_size = 2000
    data_dict[k]['page_size'] = page_size

    #Determine number of orders needed for requests over 2000 granules. 
    page_num = math.ceil(data_dict[k]['gran_num']/page_size)
    data_dict[k]['page_num'] = page_num
    del data_dict[k]['gran_num']

### Create the API endpoint 

Programmatic API requests are formatted as HTTPS URLs that contain key-value-pairs specifying the service operations that we specified above. The following command can be executed via command line, a web browser, or in Python below. 

In [11]:
endpoint_list = [] 
for k, v in data_dict.items():
    param_string = '&'.join("{!s}={!r}".format(k,v) for (k,v) in v.items())
    param_string = param_string.replace("'","")
    
    #Print API base URL + request parameters
    API_request = api_request = f'{base_url}?{param_string}'
    endpoint_list.append(API_request)
    if data_dict[k]['page_num'] > 1:
        for i in range(data_dict[k]['page_num']):
            page_val = i + 2
            data_dict[k]['page_num'] = page_val
            API_request = api_request = f'{base_url}?{param_string}'
            endpoint_list.append(API_request)

print("\n".join("\n"+s for s in endpoint_list))


https://n5eil02u.ecs.nsidc.org/egi/request?short_name=ATL10&version=005&bounding_box=140,72,153,80&temporal=2019-03-23T00:00:00Z,2019-03-23T23:59:59Z&bbox=140,72,153,80&time=2019-03-23T00:00:00,2019-03-23T23:59:59&email=amy.steiker@colorado.edu&request_mode=async&page_size=2000&page_num=1

https://n5eil02u.ecs.nsidc.org/egi/request?short_name=ATL07&version=005&bounding_box=140,72,153,80&temporal=2019-03-23T00:00:00Z,2019-03-23T23:59:59Z&bbox=140,72,153,80&time=2019-03-23T00:00:00,2019-03-23T23:59:59&coverage=/gt1l/sea_ice_segments/delta_time,/gt1l/sea_ice_segments/latitude,/gt1l/sea_ice_segments/longitude,/gt1l/sea_ice_segments/heights/height_segment_confidence,/gt1l/sea_ice_segments/heights/height_segment_height,/gt1l/sea_ice_segments/heights/height_segment_quality,/gt1l/sea_ice_segments/heights/height_segment_surface_error_est,/gt2l/sea_ice_segments/delta_time,/gt2l/sea_ice_segments/latitude,/gt2l/sea_ice_segments/longitude,/gt2l/sea_ice_segments/heights/height_segment_confidence,/g

### Request data

We will now download data using the Python requests library. The data will be downloaded directly to this notebook directory in a new Outputs folder. The progress of each order will be reported.

In [12]:
from xml.etree import ElementTree as ET

# Create an output folder if the folder does not already exist.
path = str(os.getcwd() + '/Outputs')
if not os.path.exists(path):
    os.mkdir(path)
    
# Create session to store cookie and pass credentials to capabilities url
session = requests.session()

# Request data service for each page number, and unzip outputs
for k, v in data_dict.items():
    for i in range(data_dict[k]['page_num']):
        page_val = i + 1
        print(v['short_name'], 'Order: ', page_val)

    # For all requests other than spatial file upload, use get function
        request = session.get(base_url, params=v.items())
        print('Request HTTP response: ', request.status_code)

    # Raise bad request: Loop will stop for bad response code.
        request.raise_for_status()
        #print('Order request URL: ', request.url)
        esir_root = ET.fromstring(request.content)
        #print('Order request response XML content: ', request.content)

    #Look up order ID
        orderlist = []   
        for order in esir_root.findall("./order/"):
            orderlist.append(order.text)
        orderID = orderlist[0]
        print('order ID: ', orderID)

    #Create status URL
        statusURL = base_url + '/' + orderID
        print('status URL: ', statusURL)

    #Find order status
        request_response = session.get(statusURL)    
        print('HTTP response from order response URL: ', request_response.status_code)

    # Raise bad request: Loop will stop for bad response code.
        request_response.raise_for_status()
        request_root = ET.fromstring(request_response.content)
        statuslist = []
        for status in request_root.findall("./requestStatus/"):
            statuslist.append(status.text)
        status = statuslist[0]
        print('Data request ', page_val, ' is submitting...')
        print('Initial request status is ', status)

    #Continue loop while request is still processing
        while status == 'pending' or status == 'processing': 
            print('Status is not complete. Trying again.')
            time.sleep(10)
            loop_response = session.get(statusURL)

    # Raise bad request: Loop will stop for bad response code.
            loop_response.raise_for_status()
            loop_root = ET.fromstring(loop_response.content)

    #find status
            statuslist = []
            for status in loop_root.findall("./requestStatus/"):
                statuslist.append(status.text)
            status = statuslist[0]
            print('Retry request status is: ', status)
            if status == 'pending' or status == 'processing':
                continue

    #Order can either complete, complete_with_errors, or fail:
    # Provide complete_with_errors error message:
        if status == 'complete_with_errors' or status == 'failed':
            messagelist = []
            for message in loop_root.findall("./processInfo/"):
                messagelist.append(message.text)
            print('error messages:')
            pprint.pprint(messagelist)

    # Download zipped order if status is complete or complete_with_errors
        if status == 'complete' or status == 'complete_with_errors':
            downloadURL = 'https://n5eil02u.ecs.nsidc.org/esir/' + orderID + '.zip'
            print('Zip download URL: ', downloadURL)
            print('Beginning download of zipped output...')
            zip_response = session.get(downloadURL)
            # Raise bad request: Loop will stop for bad response code.
            zip_response.raise_for_status()
            with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
                z.extractall(path)
            print('Data request', page_val, 'is complete.')
        else: print('Request failed.')
    print()

ATL10 Order:  1
Request HTTP response:  201
order ID:  5000003709700
status URL:  https://n5eil02u.ecs.nsidc.org/egi/request/5000003709700
HTTP response from order response URL:  201
Data request  1  is submitting...
Initial request status is  processing
Status is not complete. Trying again.
Retry request status is:  complete
Zip download URL:  https://n5eil02u.ecs.nsidc.org/esir/5000003709700.zip
Beginning download of zipped output...
Data request 1 is complete.

ATL07 Order:  1
Request HTTP response:  201
order ID:  5000003709701
status URL:  https://n5eil02u.ecs.nsidc.org/egi/request/5000003709701
HTTP response from order response URL:  201
Data request  1  is submitting...
Initial request status is  processing
Status is not complete. Trying again.
Retry request status is:  complete
Zip download URL:  https://n5eil02u.ecs.nsidc.org/esir/5000003709701.zip
Beginning download of zipped output...
Data request 1 is complete.

MOD29 Order:  1
Request HTTP response:  201
order ID:  5000003

### Finally, we will clean up the Output folder by removing individual order folders:

In [13]:
# Clean up Outputs folder by removing individual granule folders 

for root, dirs, files in os.walk(path, topdown=False):
    for file in files:
        try:
            shutil.move(os.path.join(root, file), path)
        except OSError:
            pass
    for name in dirs:
        os.rmdir(os.path.join(root, name))    