In [1]:
import requests
import getpass
import socket 
import json
import zipfile
import io
import math
import os 
import shutil
import time
import h5py
import re
import requests
import bs4
import subprocess
import numpy
import base64
# To read KML files with geopandas, we will need to enable KML support in fiona (disabled by default)
import pprint
from statistics import mean
from requests.auth import HTTPBasicAuth
from xml.etree import ElementTree as ET

In [2]:
try:
    from urllib.parse import urlparse
    from urllib.request import urlopen, Request, build_opener, HTTPCookieProcessor
    from urllib.error import HTTPError, URLError
except ImportError:
    from urlparse import urlparse
    from urllib2 import urlopen, Request, HTTPError, URLError, build_opener, HTTPCookieProcessor

## Get Token

In [10]:
uid = 'karans04'
pswd = getpass.getpass('Earthdata Login password: ')

Earthdata Login password: ········


In [11]:
token_api_url = 'https://cmr.earthdata.nasa.gov/legacy-services/rest/tokens'
hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)

data = {
    'token': {
        'username': uid,
        'password': pswd,
        'client_id': 'NSIDC_client_id',
        'user_ip_address': ip
    }
}

headers={'Accept': 'application/json'}
response = requests.post(token_api_url, json=data, headers=headers)
print(response)
token = json.loads(response.content)['token']['id']
print(token)

<Response [422]>


KeyError: 'token'

## Set Parameters

In [None]:
#Set API request parameters
short_name = 'ATL03'
dataset_version = '002'
time_start = '2018-10-13T00:00:00Z'
page_size = 100
page_num = 1
request_mode = 'async'

# Bounding Box spatial parameter in 'W,S,E,N' format
# Input lower left longitude in decimal degrees
LL_lon = '-178.55'
# Input lower left latitude in decimal degrees
LL_lat = '-18.76'
# Input upper right longitude in decimal degrees
UR_lon = '-178.40'
# Input upper right latitude in decimal degrees
UR_lat = '-18.61'

#Bounding box subsetting (bbox) in same format as bounding_box
bounding_box = LL_lon + ',' + LL_lat + ',' + UR_lon + ',' + UR_lat
bbox = bounding_box

# bounding box input:
search_params = {
    'short_name': short_name,
    'version': dataset_version,
    'temporal': time_start,
    'page_size': 100,
    'page_num': 1,
    'bounding_box': bounding_box,
    }
download_params = {
    'short_name': short_name,
    'version': dataset_version,
    'temporal': time_start,
     #'time': time_var, 
    'bounding_box': bounding_box,
     'bbox': bbox, 
     #'format': reformat, 
     #'projection': projection, 
     #'projection_parameters': projection_parameters, 
     #'Coverage': coverage, 
    'page_size': 100,
     #'page_num': 1,
    'token': token,
    'request_mode': request_mode, 
     #'agent': agent, 
#     'email': email,
    }

#Set Earthdata search URL
granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'

#Set NSIDC data access base URL
base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'

In [None]:
# Check available versions
params = {
    'short_name': short_name
}

cmr_collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json'
response = requests.get(cmr_collections_url, params=params)
results = json.loads(response.content)

# Find all instances of 'version_id' in metadata and print most recent version number

versions = [el['version_id'] for el in results['feed']['entry']]
latest_version = max(versions)
print('The available versions of ', short_name, ' are ', versions)

## Get dataset information

In [None]:
granules = []
headers={'Accept': 'application/json'}

while True:
    response = requests.get(granule_search_url, params=search_params, headers=headers)
    results = json.loads(response.content)

    if len(results['feed']['entry']) == 0:
        # Out of results, so break out of loop
        break

    # Collect results and increment page_num
    granules.extend(results['feed']['entry'])
    search_params['page_num'] += 1

print('There are', len(granules), 'granules of', short_name, 'version', dataset_version, 'over my area and time of interest.')

In [None]:
#print(*granules, sep = "\n") 
urls = []
for granule in granules:
    print(granule['links'][0]['href'])
    urls.append(granule['links'][0]['href'])
# Average/Total size of granules in MB
granule_sizes = [float(granule['granule_size']) for granule in granules]
print(sum(granule_sizes)/len(granules))
print(sum(granule_sizes))

## Get Data

In [None]:
#Create an output folder if the folder does not already exist.
datadir = str(os.path.realpath('.') + '/data')
if not os.path.exists(datadir):
    os.mkdir(datadir)
download_path = os.path.join(datadir,'is2')
if not os.path.exists(download_path):
    os.mkdir(download_path)
print(download_path)
current_dir = os.getcwd()

In [None]:
credentials = '{0}:{1}'.format(uid,pswd)
credentials = base64.b64encode(credentials.encode('ascii')).decode('ascii')

In [None]:
url_count = len(urls)
print('Downloading {0} files...'.format(url_count))
for index, url in enumerate(urls):
    filename = url.split('/')[-1]
    print('{0}/{1}: {2}'.format(str(index).zfill(len(str(url_count))),
                            url_count,
                            filename))
    if os.path.exists(download_path + '/' + filename):
            continue
    try:
        # In Python 3 we could eliminate the opener and just do 2 lines:
        # resp = requests.get(url, auth=(username, password))
        # open(filename, 'wb').write(resp.content)
        req = Request(url)
        if credentials:
            req.add_header('Authorization', 'Basic {0}'.format(credentials))
        opener = build_opener(HTTPCookieProcessor())
        data = opener.open(req).read()
        open(download_path + '/' + filename, 'wb').write(data)
    except HTTPError as e:
        print('HTTP error {0}, {1}'.format(e.code, e.reason))
    except URLError as e:
        print('URL error: {0}'.format(e.reason))
    except IOError:
        raise
    except KeyboardInterrupt:
        quit()