# HTTPS access to EOSDIS data stored in the Earthdata Cloud

In [None]:
from urllib import request, parse
from http.cookiejar import CookieJar
import getpass
import netrc
import requests
import json
import os

## Registration and authentication

This function below will allow Python scripts to log into the Earthdata Login application programmatically. To avoid being prompted for credentials every time you run and also allow clients such as curl to log in, you can add the following to a .netrc (_netrc on Windows) file in your home directory:

machine urs.earthdata.nasa.gov
    login <your username>
    password <your password>
Make sure that this file is only readable by the current user or you will receive an error stating "netrc access too permissive."

$ chmod 0600 ~/.netrc

In [None]:
def setup_earthdata_login_auth(endpoint):
    """
    Set up the request library so that it authenticates against the given Earthdata Login
    endpoint and is able to track cookies between requests.  This looks in the .netrc file 
    first and if no credentials are found, it prompts for them.

    Valid endpoints include:
        uat.urs.earthdata.nasa.gov - Earthdata Login UAT
        urs.earthdata.nasa.gov - Earthdata Login production
    """
    try:
        username, _, password = netrc.netrc().authenticators(endpoint)
    except (FileNotFoundError, TypeError):
        # FileNotFound = There's no .netrc file
        # TypeError = The endpoint isn't in the netrc file, causing the above to try unpacking None
        print('Please provide your Earthdata Login credentials to allow data access')
        print('Your credentials will only be passed to %s and will not be exposed in Jupyter' % (endpoint))
        username = input('Username:')
        password = getpass.getpass()

    manager = request.HTTPPasswordMgrWithDefaultRealm()
    manager.add_password(None, endpoint, username, password)
    auth = request.HTTPBasicAuthHandler(manager)

    jar = CookieJar()
    processor = request.HTTPCookieProcessor(jar)
    opener = request.build_opener(auth, processor)
    request.install_opener(opener)

Let's set up our EDL authentication against the producton environment at urs.earthdata.nasa.gov

In [None]:
setup_earthdata_login_auth('urs.earthdata.nasa.gov')

## Data discovery via the Common Metadata Repository (CMR)
### Step 1: Collection/Dataset discovery.
We can search for collections of interest in our cloud provider POCLOUD using CMR

In [None]:
response = requests.get('https://cmr.earthdata.nasa.gov/search/collections.json', params={'provider': 'POCLOUD'})
results = json.loads(response.content)
concept_id = results["feed"]["entry"][0]["id"]
print(concept_id)

### Step 2: Granule/file discovery.
Using the unique identifier for the first collection returned, we can search for granules.

In [None]:
response = requests.get('https://cmr.earthdata.nasa.gov/search/granules.json', params={'concept_id': concept_id})
results = json.loads(response.content)

## Data download using CMR granule/file metadata
Each granule has a set of links. One of those links allows you to download the data via HTTPS
That link has a rel type of 'http://esipfed.org/ns/fedsearch/1.1/data#'

In [None]:
links = results["feed"]["entry"][0]["links"]
for link in links:
    if link['rel'] == "http://esipfed.org/ns/fedsearch/1.1/data#":
        url = link['href']
        break;
print("HTTPS URL for data: " + url)

Now we can make a request for the file in the PODAAC archive using the HTTPS protocol

In [None]:
response = requests.get(url)
print("Size of data: " + response.headers["Content-Length"] + " bytes")