### Download UTIG radargrams from NSIDC

* 2009-2010 radargrams from HiCARS1 are here: https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IR1HI1B.001/
* 2010-2013 radargrams from HiCARS2 are here: https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IR2HI1B.001/

In [79]:
from bs4 import BeautifulSoup
import netrc  # Used to parse authentication token from ~/.netrc
import re
import requests

In [36]:
def credentials_from_netrc():
    hostname = 'urs.earthdata.nasa.gov'
    try:
        nn = netrc.netrc()
        username, _, token = nn.authenticators(hostname)        
        if username != "token":
            msg = "This function only supports logging in via authentication tokens." 
            print(msg)
            raise Exception(msg)
    except FileNotFoundError as ex:
        print("Can't authenticate -- .netrc file not found")
        raise(ex)
        
    return token

In [56]:
# Annoyingly, requests.get(...) gets a 401 response. 
# Looks like we need to be logged into NSIDC for this to work / provide a bearer token.

hicars1_url = "https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IR1HI1B.001/"
hicars2_url = "https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IR2HI1B.001/"

In [69]:
# This takes FOREVER. But it does work. Why so long?
token = credentials_from_netrc()
reqs = requests.get(hicars1_url, headers={'Authorization': 'Bearer {0}'.format(token)})
reqs


<Response [200]>

In [84]:
# Will "sessions" help at all?

In [89]:
ss = requests.sessions.Session()
ss.headers.update({'Authorization': 'Bearer {0}'.format(token)})

In [91]:
foo = ss.get(hicars1_url)

In [92]:
foo

<Response [200]>

In [78]:
soup = BeautifulSoup(reqs.text, 'html.parser')
flight_days = [link.get('href') for link in soup.find_all('a') 
        if re.match("[0-9]{4}.[0-9]{2}.[0-9]{2}", link.get('href')) is not None]
print(flight_days)
for flight_day in flight_days:
    print("Listing segments for {}".format(flight_day))
    flight_url = "{}/{}".format(hicars1_url, flight_day)
    resp = requests.get(flight_url, headers={'Authorization': 'Bearer {0}'.format(token)})
    print(resp)
    soup = BeautifulSoup(resp.text, 'html.parser')
    flight_files = [link.get('href') for link in soup.find_all('a')
                    if link.get('href').endswith('nc')]
    print(flight_files)
    break

['2009.01.02/', '2009.01.02/', '2009.01.06/', '2009.01.06/', '2009.01.07/', '2009.01.07/', '2009.01.08/', '2009.01.08/', '2009.01.11/', '2009.01.11/', '2009.01.12/', '2009.01.12/', '2009.01.13/', '2009.01.13/', '2009.01.18/', '2009.01.18/', '2009.01.19/', '2009.01.19/', '2009.01.20/', '2009.01.20/', '2009.01.21/', '2009.01.21/', '2009.01.22/', '2009.01.22/', '2009.01.23/', '2009.01.23/', '2009.01.26/', '2009.01.26/', '2009.01.28/', '2009.01.28/', '2009.01.29/', '2009.01.29/', '2009.01.30/', '2009.01.30/', '2009.01.31/', '2009.01.31/', '2009.02.01/', '2009.02.01/', '2009.11.09/', '2009.11.09/', '2009.11.10/', '2009.11.10/', '2009.11.11/', '2009.11.11/', '2009.11.16/', '2009.11.16/', '2009.11.17/', '2009.11.17/', '2009.11.18/', '2009.11.18/', '2009.11.20/', '2009.11.20/', '2009.11.25/', '2009.11.25/', '2009.11.26/', '2009.11.26/', '2009.11.27/', '2009.11.27/', '2009.12.01/', '2009.12.01/', '2009.12.02/', '2009.12.02/', '2009.12.03/', '2009.12.03/', '2009.12.07/', '2009.12.07/', '2009.12.

In [81]:
soup = BeautifulSoup(resp.text, 'html.parser')

In [82]:
flight_files = [link.get('href') for link in soup.find_all('a')
                    if link.get('href').endswith('nc')]

In [83]:
flight_files

['IR1HI1B_2009002_MCM_JKB1a_BYRD03a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03a_002.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03a_002.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03b_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03b_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03b_002.nc',
 'IR1HI1B_2009002_MCM_JKB1a_BYRD03b_002.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGC02a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGC02a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGC02a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGC02a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT02a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT02a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT02a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT02a_001.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT03a_000.nc',
 'IR1HI1B_2009002_MCM_JKB1a_DGT03a_000.nc']