In [1]:
# Clone https://github.com/nasa/eo-metadata-tools
import argparse
from os import path
import sys
sys.path.append(path.abspath('../eo-metadata-tools/CMR/python'))
import cmr.auth.token as t
import cmr.search.collection as coll
import cmr.search.common as scom
import cmr.util.network as net
import netCDF4 as nc4
"""import argparse
import cmr.auth.token as t
import cmr.search.collection as coll
import cmr.search.common as scom
import cmr.util.network as net
import netCDF4 as nc4"""

'import argparse\nimport cmr.auth.token as t\nimport cmr.search.collection as coll\nimport cmr.search.common as scom\nimport cmr.util.network as net\nimport netCDF4 as nc4'

In [2]:
def mySearch(env, search_type, query, show=False):
    ''' Constructs a CMR search URL based on three inputs: 
        the environment, type of search, and the query syntax.
        Fourth optional argument will print out the search URL if True. 
        Leverages the eo-metadata-tools python package to submit the
        request, download the results, and return them to the calling module.
    '''
    # CMR base URLs
    cmr_root = {'PROD': 'https://cmr.earthdata.nasa.gov',
                'UAT': 'https://cmr.uat.earthdata.nasa.gov',
                'SIT': 'https://cmr.sit.earthdata.nasa.gov'}
    # Construct the search URL using the inputs
    base = cmr_root[env]
    url = '{}/{}?{}'.format(base, search_type, net.expand_query_to_parameters(query))
    if show:
        print(url)
    try:
        result = net.get(url, accept="application/vnd.nasa.cmr.umm_results+json")
        items = result.get("items", [])
        return items
    except:
        return False

In [None]:
def getCollMeta(cname, env, daac='GES_DISC'):
    ''' Returns a collection's metadata record.
        Input cname has syntax: shortname_version 
        Input env is one of these: SIT, UAT, or PROD
        Input daac is 'GES_DISC' by default
    '''
    return mySearch(env, 'collections', {'native-id':cname,'provider':daac})

 
def getCollId(coll, daac='GES_DISC'):
    ''' Returns the collection ID, e.g. C1244165121-GES_DISC
        Input is a collection metadata record
    '''
    return coll["meta"]["concept-id"]


def getCollSvcs(coll, env, daac='GES_DISC'):
    ''' Searches for a collection's associated services.
        Inputs are the collection metadata record and the env.
        Returns the concept_ids and names.
    '''
    try:
        svcL = coll['meta']['associations']['services']
        svcRecs = []
        for svc in svcL: 
            sitems = mySearch(env, 'services', {'provider':daac, 'concept_id':svc})
            svcRecs.append(sitems)
    except:
        svcRecs = []
    return svcRecs

def getCollVars(cid, env, pageSize=500):
    ''' Returns the UMM-Var records for variables associated with a collection
    '''
    return mySearch(env, 'variables', {'keyword':cid, 'page_size':pageSize})

        
def getCollGranules(sn, ver, env, pageSize=10, sortKey='-revision_date', daac='GES_DISC'):
    ''' Performs a granule search for a collection 
        Returns metdata records for a set number of granules
        Granules are sorted with most recently updated first
        Inputs are collection shortname and version and
        Input arg pageSize limits the number of returned results
    '''
    return mySearch(env, 'granules.umm_json',
                    {'provider':daac, 'short_name':sn, 'version':ver,
                     'sort_key':sortKey, 'page_size':pageSize})


def getGranTimes(g):
    beg = g['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime']
    end = g['umm']['TemporalExtent']['RangeDateTime']['EndingDateTime']
    return beg, end


def getOpendapUrl(g):
    for url in g["umm"]["RelatedUrls"]:
        if (url['Type']=='USE SERVICE API' and 
            url['Subtype']=='OPENDAP DATA' and
           'earthdata.nasa.gov' in url['URL']): 
            return url['URL']

        
def getDataUrl(g):
    for url in g["umm"]["RelatedUrls"]:
        if (url['Type']=='GET DATA'): 
            dataUrl = url['URL']
        if (url['Type']=='GET DATA VIA DIRECT ACCESS'):
            s3Url = url['URL']
    return dataUrl, s3Url

In [None]:

collNames = ['S5P_L2__AER_AI_HiR_2',
             'S5P_L2__CO_____HiR_2',
             'S5P_L2__CH4____HiR_2',
            'S5P_L2__AER_AI_1',
            'S5P_L2__AER_AI_HiR_1',
            'S5P_L2__AER_LH_1',
            'S5P_L2__AER_LH_2',
            'S5P_L2__AER_LH_HiR_1',
            'S5P_L2__AER_LH_HiR_2',
            'S5P_L2__CH4____1',
            'S5P_L2__CH4____2',
            'S5P_L2__CH4____HiR_1',
            'S5P_L2__CO_____1',
            'S5P_L2__CO_____HiR_1',
            'S5P_L2__CO_____2',
            'S5P_L2__HCHO___1',
            'S5P_L2__HCHO___HiR_1',
            'S5P_L2__HCHO___HiR_2',
            ]
#collNames = ['GPM_3IMERGHH_06']
#collNames = ['SNDRSNIML2CCPRET_2']
#collNames = ['M2I3NVAER_5.12.4', 'SNDRAQIML2CCPRET_2','GPM_3IMERGHH_06','M2T1NXSLV_5.12.4']

In [None]:
env = 'UAT'
print(env)
for cname in collNames:
    # split out short name and version
    sname = cname.rsplit('_',maxsplit=1)[0]
    ver = cname.rsplit('_',maxsplit=1)[-1]

    # get collection metadata, extract collection ID
    cRecs = getCollMeta(cname, env)
    if cRecs:
        # we should only find one, but just in case...
        for cMeta in cRecs:
            #print(cMeta)  # for debugging or seeing the entire record
            cid = getCollId(cMeta)
            print(cMeta["meta"]["native-id"])
            print(cid) 

            # get services associated with the collection 
            cSvcs = getCollSvcs(cMeta, env)
            if cSvcs: 
                for svc in cSvcs:
                    print(' ',svc[0]['meta']['concept-id'], svc[0]['umm']['LongName'])
            else:
                print('No associated services')

            # get variables associated to the collection 
            maxVars = 15
            cVars = getCollVars(cid, env, maxVars)
            if cVars:
                for v in cVars:
                    # show the umm-var id (V*_GES-DISC) and the name
                    print('  ',v["meta"]["concept-id"], v["umm"]["Name"])
            else:
                print('No associated variables')

            # get a collection granule
            cGrans = getCollGranules(sname, ver, env, pageSize=3)
            if cGrans: 
                for g in cGrans:
                    # show some metadata from the granules
                    dataUrl,s3Url = getDataUrl(g)
                    dapUrl  = getOpendapUrl(g)
                    beg,end = getGranTimes(g)
                    print('   ',g['meta']['concept-id'],beg,'-to-', end)
                    print('   ',dataUrl)
                    print('   ',s3Url)
                    if dapUrl: print('   ',dapUrl)
            else:
                print('No granules found')
    else:
        print(sname,ver,'was not found in CMR')
    print()


In [None]:
env = 'PROD'
print(env)
for cname in collNames:
    # split out short name and version
    sname = cname.rsplit('_',maxsplit=1)[0]
    ver = cname.rsplit('_',maxsplit=1)[-1]

    # get collection metadata, extract collection ID
    cRecs = getCollMeta(cname, env)
    if cRecs:
        # we should only find one, but just in case...
        for cMeta in cRecs:
            #print(cMeta)  # for debugging or seeing the entire record
            cid = getCollId(cMeta)
            print(cMeta["meta"]["native-id"])
            print(cid) 

            # get services associated with the collection 
            cSvcs = getCollSvcs(cMeta, env)
            if cSvcs: 
                for svc in cSvcs:
                    print(' ',svc[0]['meta']['concept-id'], svc[0]['umm']['LongName'])
            else:
                print('No associated services')

            # get variables associated to the collection 
            maxVars = 15
            cVars = getCollVars(cid, env, maxVars)
            if cVars:
                for v in cVars:
                    # show the umm-var id (V*_GES-DISC) and the name
                    print('  ',v["meta"]["concept-id"], v["umm"]["Name"])
            else:
                print('No associated variables')

            # get a collection granule
            cGrans = getCollGranules(sname, ver, env, pageSize=3)
            if cGrans: 
                for g in cGrans:
                    # show some metadata from the granules
                    dataUrl = getDataUrl(g)
                    dapUrl  = getOpendapUrl(g)
                    beg,end = getGranTimes(g)
                    print(dataUrl)
                    if dapUrl: print(dapUrl)
                    print(beg, ' -to- ', end)
            else:
                print('No granules found')
    else:
        print(sname,ver,'was not found in CMR')
    print()

In [None]:
# Shows OPeNDAP links from collection level metadata 
env = 'UAT'
cmr_root = 'https://cmr.uat.earthdata.nasa.gov'

dapColls = mySearch(env,'collections',
                    {'provider':'GES_DISC','has_opendap_url':True,'page_size':50})

print('At least',len(dapColls),'opendap-enabled collections in',env)
for coll in dapColls: 
    opendapUrl = ''
    for url in coll['umm']['RelatedUrls']:
        if url['Type'] == 'USE SERVICE API' and url['Subtype'] == 'OPENDAP DATA': 
            opendapUrl = url['URL']
    cmrVirDir = cmr_root+'/virtual-directory/collections/'+coll['meta']['concept-id']+'/temporal'

    print(coll['meta']['native-id'])
    print('  ',opendapUrl)
    print('  ',cmrVirDir)