In [5]:
"""
Example script that scrapes data from the IEM ASOS download service
"""
from __future__ import print_function
import json
import time
from tqdm.notebook import tqdm
import datetime

# Python 2 and 3: alternative 4
try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"


def download_data(uri):
    """Fetch the data from the IEM
    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.
    Args:
      uri (string): URL to fetch
    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print("download_data(%s) failed with %s" % (uri, exp))
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""


def get_stations_from_filelist(filename):
    """Build a listing of stations from a simple file listing the stations.
    The file should simply have one station per line.
    """
    stations = []
    for line in open(filename):
        stations.append(line.strip())
    return stations


def get_stations_from_networks():
    """Build a station list by using a bunch of IEM networks."""
    stations = []
    states = """MA"""
    networks = []
    for state in states.split():
        networks.append("%s_ASOS" % (state,))

    for network in networks:
        # Get metadata
        uri = (
            "https://mesonet.agron.iastate.edu/geojson/network/%s.geojson"
        ) % (network,)
        data = urlopen(uri)
        jdict = json.load(data)
        for site in jdict["features"]:
            stations.append(site["properties"]["sid"])
    return stations


def download_alldata():
    """An alternative method that fetches all available data.
    Service supports up to 24 hours worth of data at a time."""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2020, 1, 1)
    endts = datetime.datetime(2020, 12, 20)
    interval = datetime.timedelta(hours=24)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    now = startts
    while now < endts:
        thisurl = service
        thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
        thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
        print("Downloading: %s" % (now,))
        data = download_data(thisurl)
        outfn = "%s.txt" % (now.strftime("%Y%m%d"),)
        with open(outfn, "w") as fh:
            fh.write(data)
        now += interval


def main():
    """Our main method"""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2020, 1, 1)
    endts = datetime.datetime(2020, 12, 20)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
    service += endts.strftime("year2=%Y&month2=%m&day2=%d&")

    # Two examples of how to specify a list of stations
    stations = ['12N', '22N', '33N', 'ABE', 'ACK', 'ACY', 'AFN', 'ALB', 'APG',
       'AQW', 'ASH', 'AVP', 'BAF', 'BDL', 'BDR', 'BED', 'BGM', 'BID',
       'BLM', 'BOS', 'BVY', 'CDW', 'CEF', 'CKZ', 'CQX', 'DOV', 'DXR',
       'DYL', 'EWB', 'EWR', 'FIT', 'FMH', 'FOK', 'FRG', 'FWN', 'GHG',
       'GON', 'HFD', 'HPN', 'HVN', 'HWV', 'HYA', 'HZL', 'IJD', 'ILG',
       'ISP', 'JFK', 'JPX', 'JRB', 'LDJ', 'LGA', 'LNS', 'LOM', 'LWM',
       'MGJ', 'MIV', 'MJX', 'MMK', 'MMU', 'MPO', 'MQE', 'MQS', 'MSV',
       'MTP', 'MVY', 'N03', 'NEL', 'NYC', 'OQN', 'OQU', 'ORE', 'ORH',
       'OWD', 'OXC', 'PHL', 'PNE', 'POU', 'PSF', 'PTW', 'PVC', 'PVD',
       'PYM', 'RDG', 'SCH', 'SFZ', 'SMQ', 'SNC', 'SWF', 'TAN', 'TEB',
       'TTN', 'UKT', 'UUU', 'VAY', 'W29', 'WRI', 'WST', 'WWD', 'XLL']
    # stations = get_stations_from_filelist("mystations.txt")
    for station in tqdm(stations):
        uri = "%s&station=%s" % (service, station)
        print("Downloading: %s" % (station,))
        data = download_data(uri)
        outfn = "%s_%s_%s.txt" % (
            station,
            startts.strftime("%Y%m%d%H%M"),
            endts.strftime("%Y%m%d%H%M"),
        )
        out = open(outfn, "w")
        out.write(data)
        out.close()


if __name__ == "__main__":
#     download_alldata()
    main()

  0%|          | 0/99 [00:00<?, ?it/s]

Downloading: 12N
Downloading: 22N
Downloading: 33N
Downloading: ABE
Downloading: ACK
Downloading: ACY
Downloading: AFN
Downloading: ALB
Downloading: APG
Downloading: AQW
Downloading: ASH
Downloading: AVP
Downloading: BAF
Downloading: BDL
Downloading: BDR
Downloading: BED
Downloading: BGM
Downloading: BID
Downloading: BLM
Downloading: BOS
Downloading: BVY
Downloading: CDW
Downloading: CEF
Downloading: CKZ
Downloading: CQX
Downloading: DOV
Downloading: DXR
Downloading: DYL
Downloading: EWB
Downloading: EWR
Downloading: FIT
Downloading: FMH
Downloading: FOK
Downloading: FRG
Downloading: FWN
Downloading: GHG
Downloading: GON
Downloading: HFD
Downloading: HPN
Downloading: HVN
Downloading: HWV
Downloading: HYA
Downloading: HZL
Downloading: IJD
Downloading: ILG
Downloading: ISP
Downloading: JFK
Downloading: JPX
Downloading: JRB
Downloading: LDJ
Downloading: LGA
Downloading: LNS
Downloading: LOM
Downloading: LWM
Downloading: MGJ
Downloading: MIV
Downloading: MJX
Downloading: MMK
Downloading: M

In [21]:
import numpy as np
import networkx as nx
import json
import matplotlib.pyplot as plt
import random
import os
import pydeck as pdk
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily, Hourly
import pyproj
import ipywidgets
from tqdm.notebook import tqdm
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
JFK_LAT_LON = 40.645944, -73.784839
LL = 38.585021, -77.555894
UR = 42.634826, -70.954819

LL = 39.88494891-1, -75.33930212-1
UR = 41.888126+1, -72.73301048+3

DOWNTOWN_BOUNDING_BOX = [
    LL[1],
    LL[0],
    UR[1],
    UR[0],
]

In [2]:
def in_bounding_box(point):
    """Determine whether a point is in our downtown bounding box"""
    lng, lat = point
    in_lng_bounds = DOWNTOWN_BOUNDING_BOX[0] <= lng <= DOWNTOWN_BOUNDING_BOX[2]
    in_lat_bounds = DOWNTOWN_BOUNDING_BOX[1] <= lat <= DOWNTOWN_BOUNDING_BOX[3]
    return in_lng_bounds and in_lat_bounds


In [3]:
files = [file for file in os.listdir() if ".txt" in file]

In [58]:
addval_start_date = datetime.datetime(2020, 1, 1)
addval_end_date = datetime.datetime(2020, 12, 1)
addval_daterange = (pd.date_range(start=addval_start_date, end=addval_end_date, freq='10min')).to_frame(name = 'times')

In [None]:
df_array = []
for i in range(len(files)):
    new_df = pd.read_csv(files[i], parse_dates=['valid'], skiprows=5, low_memory=False)
    new_df = new_df.set_index("valid")
    df = pd.merge_asof(left=addval_daterange,right=new_df,right_index=True,left_index=True,direction='nearest')
    df_array.append(df)

df = pd.concat(df_array)

df = df.replace(['M',"CLR","VV ","FEW","SCT","BKN","OVC"],[0,0,0,0.1,0.3,0.8,1])
df["vsby"] = df["vsby"].astype(float)

Stations = np.unique(df.station)
Times = np.unique(df.times)

df = df.set_index(['times','station'])

In [60]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,lon,lat,tmpf,dwpf,relh,drct,sknt,p01i,alti,mslp,...,wxcodes,ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,peak_wind_gust,peak_wind_drct,peak_wind_time,feel,metar,snowdepth
times,station,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2020-01-01 00:00:00,HFD,-72.6494,41.7367,0,0,0,200.00,2.00,0,29.60,0,...,0,0,0,0,0,0,0,0,KHFD 010000Z AUTO 20002KT 10SM FEW036 BKN055 O...,0
2020-01-01 00:10:00,HFD,-72.6494,41.7367,0,0,0,200.00,4.00,0,29.60,0,...,0,0,0,0,0,0,0,0,KHFD 010010Z AUTO 20004KT 10SM FEW036 FEW045 O...,0
2020-01-01 00:20:00,HFD,-72.6494,41.7367,0,0,0,200.00,5.00,0,29.60,0,...,0,0,0,0,0,0,0,0,KHFD 010015Z AUTO 20005KT 10SM FEW046 OVC090 0...,0
2020-01-01 00:30:00,HFD,-72.6494,41.7367,0,0,0,230.00,2.00,0,29.60,0,...,0,0,0,0,0,0,0,0,KHFD 010030Z AUTO 23002KT 10SM OVC090 04/02 A2...,0
2020-01-01 00:40:00,HFD,-72.6494,41.7367,0,0,0,230.00,2.00,0,29.60,0,...,0,0,0,0,0,0,0,0,KHFD 010035Z AUTO 23002KT 10SM OVC090 04/02 A2...,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-30 23:20:00,ISP,-73.1017,40.7939,63.00,63.00,100.00,160.00,16.00,0.09,29.46,0,...,0,0,0,0,38.00,180.00,2020-11-30 23:10,63.00,KISP 302334Z 16016G26KT 7SM SCT006 BKN029 OVC0...,0
2020-11-30 23:30:00,ISP,-73.1017,40.7939,63.00,63.00,100.00,160.00,16.00,0.09,29.46,0,...,0,0,0,0,38.00,180.00,2020-11-30 23:10,63.00,KISP 302334Z 16016G26KT 7SM SCT006 BKN029 OVC0...,0
2020-11-30 23:40:00,ISP,-73.1017,40.7939,63.00,63.00,100.00,160.00,16.00,0.09,29.46,0,...,0,0,0,0,38.00,180.00,2020-11-30 23:10,63.00,KISP 302334Z 16016G26KT 7SM SCT006 BKN029 OVC0...,0
2020-11-30 23:50:00,ISP,-73.1017,40.7939,63.00,62.10,96.88,160.00,18.00,0.09,29.45,997.30,...,-RA BR,0,0,0,38.00,180.00,2020-12-30 23:10,63.00,KISP 302356Z 16018G28KT 5SM -RA BR FEW006 BKN0...,0


In [61]:
df.loc[Times[0],Stations[0]]

lon                                                           -76.1688
lat                                                            39.4662
tmpf                                                             46.90
dwpf                                                             29.30
relh                                                             50.13
drct                                                            270.00
sknt                                                             10.00
p01i                                                               0.0
alti                                                             29.70
mslp                                                           1006.10
vsby                                                              10.0
gust                                                             18.00
skyc1                                                              0.8
skyc2                                                              1.0
skyc3 

In [62]:
Station_Index = {}
Times_Index = {}

for i, station in enumerate(Stations):
    Station_Index[station] = i
    
for i, time in enumerate(Times):
    Times_Index[time] = i

In [63]:
Node_Data = []
Y_data = []

n_nodes = len(Stations)
for time in tqdm(Times):
    
    Node_Lon_Lat = []
    Node_Color = []
    Size = []
    Vertiport = []
    Tooltip = []
    IFRs = []
    Cloud_Coverages = []
    Cloud_Alts = []
    East_Winds = []
    North_Winds = []
    Precips = []
    Lons = []
    Lats = []

    nData = np.zeros((n_nodes, 8))

    for i, station in enumerate(Stations):
        
        arr = df.loc[time,station]
        
        lon = float(arr["lon"])
        lat = float(arr["lat"])
        vsby = float(arr["vsby"])
        wind_dir = float(arr["drct"]) +180
        wind_speed = float(arr["sknt"]) + 4*np.random.normal()
        precip = float(arr["p01i"])
        
        e_wind = wind_speed*np.sin(np.deg2rad(wind_dir))
        n_wind = wind_speed*np.cos(np.deg2rad(wind_dir))
        
        if vsby < 0.1:
            vsby = 10
        
        IFR = vsby < 5
        
        skycond = float(arr[f'skyc1'] )
        skyalt = float(arr[f'skyl1'])
        
        if skyalt < 0.5:
                skyalt = 100000
                
        for j in range(4):
            
            skycondt = arr[f'skyc{j+1}'] 
            skyaltt = float(arr[f'skyl{j+1}'])
            skyaltt = float(skyaltt)
            
            if skyaltt < 0.5:
                skyaltt = 100000
            
            if (skycondt > 0.8) and (skyaltt < 2000):
                skycond = skycondt
                skyalt = skyaltt
                IFR = True
                
        nData[i,0] = float(IFR)
        nData[i,1] = skycond
        nData[i,2] = skyalt
        nData[i,3] = e_wind
        nData[i,4] = n_wind
        nData[i,5] = precip
        nData[i,6] = lon
        nData[i,7] = lat
        
    Node_Data.append(nData)
    



  0%|          | 0/48241 [00:00<?, ?it/s]

In [12]:
full_data_dict = {}
for i, data in enumerate(Node_Data):
    full_data_dict[i] = data.tolist()

with open("complete_graph_data.json", "w") as outfile:
    json.dump(full_data_dict, outfile)