### Import packages

In [None]:
from xml.dom import minidom
from sys import stdin
from urllib import request
from subprocess import call
import numpy as np
import pandas as pd
import itertools
from tabulate import tabulate
import pyart
from sphere import RegionCoverer, Cell, LatLng, LatLngRect, CellId
from datetime import datetime, timedelta
import time

### initialize user-defined variables

In [None]:
"""
date = "2020/01/01"
site = "KCRP"
bucketURL = "http://noaa-nexrad-level2.s3.amazonaws.com"
dirListURL = bucketURL+ "/?prefix=" + date + "/" + site
"""
print("")

### list available files

In [None]:
"""
def getText(nodelist):
    rc = []
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc.append(node.data)
    return ''.join(rc)

print ("listing files from %s" % dirListURL)

xmldoc = minidom.parse(request.urlopen(dirListURL))
itemlist = xmldoc.getElementsByTagName('Key')
print (len(itemlist) , "keys found...")

for x in itemlist:
    file = getText(x.childNodes)
"""
print("")

### read NEXRAD radar file -- having issue reading from http, temporarily reading from local directory

In [None]:
#https://s3.amazonaws.com/noaa-nexrad-level2/index.html#2020/01/01/KCRP/
#https://noaa-nexrad-level2.s3.amazonaws.com/2020/01/01/KCRP/KCRP20200101_000431_V06

#radar = pyart.io.read_nexrad_archive('https://noaa-nexrad-level2.s3.amazonaws.com/2020/01/01/KCRP/' + 'KCRP20200101_000431_V06')
#radar = pyart.io.read('https://noaa-nexrad-level2.s3.amazonaws.com/2020/01/01/KCRP/' + 'KCRP20200101_000431_V06')
radar = pyart.io.read('data/NEXRAD/KCRP20200101_000431_V06')

### show radar info

In [None]:
radar.info()

### write station info

In [None]:
station_info = ['StationName', 'Product', 'Pattern', 'Latitude', 'Longitude', 'Altitude', 'StartTime']
station_row = []

### confirm station location
### process start datetime

In [None]:
station_name = radar.metadata['instrument_name']
product = radar.metadata['original_container']
pattern = radar.metadata['vcp_pattern']

latitude0 = radar.latitude['data'][0]
longitude0 = radar.longitude['data'][0]
altitude0 = radar.altitude['data'][0]

volume_start = datetime.strptime(radar.time['units'][14:34], '%Y-%m-%dT%H:%M:%SZ')

station_row.extend([station_name, product, pattern, latitude0, longitude0, altitude0, volume_start] )

print(station_info)
print(station_row)

In [None]:
print(radar.sweep_mode)

### confirm number of rays and gates
# Ray attributes: azimuth, elevation, time
# Gate attributes: range
# Ray x Gate attributes: lat, lon, alt

In [None]:
print(radar.nrays)
print(radar.ngates)

### unfold lists of lists

In [None]:
merged_lat = list(itertools.chain.from_iterable(radar.gate_latitude['data']))
merged_lon = list(itertools.chain.from_iterable(radar.gate_longitude['data']))
merged_alt = list(itertools.chain.from_iterable(radar.gate_altitude['data']))
merged_refl = list(itertools.chain.from_iterable(radar.fields['reflectivity']['data']))
merged_velo = list(itertools.chain.from_iterable(radar.fields['velocity']['data']))

### expand time field

In [None]:
time_x1 = [volume_start + timedelta(seconds=s) for s in radar.time['data']]
time_xgates = [val for val in time_x1 for _ in range(radar.ngates)]

### visual checks -- make into real tests

In [None]:
nsamples = radar.nrays * radar.ngates
ndata_rays = len(radar.gate_latitude['data'])
ndata_gates = len(radar.gate_latitude['data'][0])
ntime_rays = len(radar.time['data'])
ntime_samples = len(time_xgates)
ndata_samples = len(merged_refl)


if radar.nrays == ntime_rays:
    print('time match ' + str(radar.nrays))
else:
    print('time mismatch')
    
if radar.nrays == ndata_rays:
    print('data match ' + str(radar.nrays))
else:
    print('data mismatch')
    
if radar.ngates == ndata_gates:
    print('data match ' + str(radar.ngates))
else:
    print('data mismatch')    
    
if nsamples == ntime_samples:
    print('time match ' + str(nsamples))
else:
    print('time mismatch')
    
if nsamples == ndata_samples:
    print('data match ' + str(nsamples))
else:
    print('data mismatch')

### combine data into dataframe

In [None]:
samples = pd.DataFrame(
    {'GateLat': merged_lat,
     'GateLon': merged_lon,
     'GateAlt': merged_alt,
     'GateTime': time_xgates,
     'Reflectivity': merged_refl,
     'Velocity': merged_velo
    })
print(len(samples))
print(samples[0:5000000:100000])

### get cellid from lat /lon = 471sec
# get level 10 parent from cellid = 77sec
# get centroid of cellid = 413sec

In [None]:
s2level = 10

start0 = time.time()
samples['S2LL'] = [LatLng.from_degrees(x, y) for x, y in zip(samples['GateLat'], samples['GateLon'])]
end0 = time.time()
print(end0 - start0)
#57s

start1 = time.time()
samples['S2CellID'] = [CellId().from_lat_lng(xy) for xy in samples['S2LL']]
end1 = time.time()
print(end1 - start1)
#426s



In [None]:
start4 = time.time()
samples['S2Region'] = [z.parent(s2level) for z in samples['S2CellID']]
end4 = time.time()
print(end4 - start4)
#67s

In [None]:
print(tabulate(samples[0:11000000:1000000], headers='keys', tablefmt='github', showindex=False))

In [None]:
#samples.nunique()