## Downloading NEXRAD data
Scans are an available open data source: https://www.ncdc.noaa.gov/data-access/radar-data/noaa-big-data-project
They are stored in AWS warehousing by year, month, date, and time of scan for each radar location. 

Documentation: https://docs.opendata.aws/noaa-nexrad/readme.html
code adopted from https://www1.ncdc.noaa.gov/pub/data/radar/bdp/scripts/

Website to get sunrise time: https://www.esrl.noaa.gov/gmd/grad/solcalc/sunrise.html

In [1]:
import sys
import urllib.request
from xml.dom import minidom
from sys import stdin
from subprocess import call
import datetime
import pandas as pd
from pandas.io.json import json_normalize
import json

sys.path.append('../')
from envir import config

In [2]:
date_range = pd.date_range(start="2019/03/01", end="2019/03/30").astype(str)

In [3]:
def getText(nodelist):
    rc = []
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc.append(node.data)
    return ''.join(rc)

In [4]:
sunrise_scans = pd.DataFrame()
for i in date_range:
    date = i.replace("-", "/")
    # get sunrise time
    sundoc = urllib.request.urlopen('https://api.sunrise-sunset.org/json?lat=40.730959&lng=-73.9987294&date='+i)
    sundoc = json.loads(sundoc.read())
    sunrise = sundoc['results']['sunrise'][:-3].split(':')
    sunrise = int(''.join(sunrise))
    
    site = "KOKX"
    bucketURL = "http://noaa-nexrad-level2.s3.amazonaws.com"
    dirListURL = bucketURL+ "/?prefix=" + date + "/" + site
    #print("listing files from %s" % dirListURL)
    
    #xmldoc = minidom.parse(stdin)
    xmldoc = minidom.parse(urllib.request.urlopen(dirListURL))
    itemlist = xmldoc.getElementsByTagName('Key')
    #print(len(itemlist) , "keys found...")
    
    # For this test, WCT is downloaded and unzipped directly in the working directory
    # The output files are going in 'output'
    # http://www.ncdc.noaa.gov/wct/install.php
    row_list = []
    for x in itemlist:
        file = getText(x.childNodes)
        filetime = int(file.split("_")[1])
        timediff = abs(filetime - sunrise)
        temp_rows = {'date':i, 
              'filename': file, 
              'timediff': timediff}
        row_list.append(temp_rows)
    row = pd.DataFrame(row_list)
    row = row.loc[row.groupby("date")["timediff"].idxmin()]
    sunrise_scans = sunrise_scans.append(row)

In [5]:
for scan in sunrise_scans.filename:
    print(scan)
    url = 'https://noaa-nexrad-level2.s3.amazonaws.com/'+scan
    filename = scan.replace("/", "_")
    urllib.request.urlretrieve(url, config.dataFol+'nexrad_downloads/'+filename+'.h5')

2019/03/01/KOKX/KOKX20190301_113144_V06
2019/03/02/KOKX/KOKX20190302_112948_V06
2019/03/03/KOKX/KOKX20190303_112630_V06
2019/03/04/KOKX/KOKX20190304_112650_V06
2019/03/05/KOKX/KOKX20190305_112127_V06
2019/03/06/KOKX/KOKX20190306_112037_V06
2019/03/07/KOKX/KOKX20190307_111814_V06
2019/03/08/KOKX/KOKX20190308_112017_V06
2019/03/09/KOKX/KOKX20190309_111630_V06
2019/03/10/KOKX/KOKX20190310_111541_V06
2019/03/11/KOKX/KOKX20190311_111535_V06
2019/03/12/KOKX/KOKX20190312_110851_V06
2019/03/13/KOKX/KOKX20190313_111123_V06
2019/03/14/KOKX/KOKX20190314_110957_V06
2019/03/15/KOKX/KOKX20190315_110708_V06
2019/03/16/KOKX/KOKX20190316_110328_V06
2019/03/17/KOKX/KOKX20190317_110313_V06
2019/03/18/KOKX/KOKX20190318_111015_V06
2019/03/19/KOKX/KOKX20190319_110335_V06
2019/03/20/KOKX/KOKX20190320_105358_V06
2019/03/21/KOKX/KOKX20190321_105731_V06
2019/03/22/KOKX/KOKX20190322_105558_V06
2019/03/23/KOKX/KOKX20190323_105455_V06
2019/03/24/KOKX/KOKX20190324_105224_V06
2019/03/25/KOKX/KOKX20190325_104848_V06
