In [1]:
"""
Example script that scrapes data from the IEM ASOS download service
"""
from __future__ import print_function
import json
import time
from tqdm.notebook import tqdm
import datetime

# Python 2 and 3: alternative 4
try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"


def download_data(uri):
    """Fetch the data from the IEM
    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.
    Args:
      uri (string): URL to fetch
    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print("download_data(%s) failed with %s" % (uri, exp))
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""


def get_stations_from_filelist(filename):
    """Build a listing of stations from a simple file listing the stations.
    The file should simply have one station per line.
    """
    stations = []
    for line in open(filename):
        stations.append(line.strip())
    return stations


def get_stations_from_networks():
    """Build a station list by using a bunch of IEM networks."""
    stations = []
    states = """MA"""
    networks = []
    for state in states.split():
        networks.append("%s_ASOS" % (state,))

    for network in networks:
        # Get metadata
        uri = (
            "https://mesonet.agron.iastate.edu/geojson/network/%s.geojson"
        ) % (network,)
        data = urlopen(uri)
        jdict = json.load(data)
        for site in jdict["features"]:
            stations.append(site["properties"]["sid"])
    return stations


def download_alldata():
    """An alternative method that fetches all available data.
    Service supports up to 24 hours worth of data at a time."""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2020, 1, 1)
    endts = datetime.datetime(2020, 12, 20)
    interval = datetime.timedelta(hours=24)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    now = startts
    while now < endts:
        thisurl = service
        thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
        thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
        print("Downloading: %s" % (now,))
        data = download_data(thisurl)
        outfn = "Training_Data/Data/%s.txt" % (now.strftime("%Y%m%d"),)
        with open(outfn, "w") as fh:
            fh.write(data)
        now += interval


def main():
    """Our main method"""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2019, 1, 1)
    endts = datetime.datetime(2019, 12, 20)

    service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"

    service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
    service += endts.strftime("year2=%Y&month2=%m&day2=%d&")

    # Two examples of how to specify a list of stations
    stations = ['ABE', 'ACK', 'ACY', 'AFN', 'ALB', 'APG',
       'AQW', 'ASH', 'AVP', 'BAF', 'BDL', 'BDR', 'BED', 'BGM', 'BID',
       'BLM', 'BOS', 'BVY', 'CDW', 'CEF', 'CKZ', 'CQX', 'DOV', 'DXR',
       'DYL', 'EWB', 'EWR', 'FIT', 'FMH', 'FOK', 'FRG', 'FWN', 'GHG',
       'GON', 'HFD', 'HPN', 'HVN', 'HWV', 'HYA', 'HZL', 'IJD', 'ILG',
       'ISP', 'JFK', 'JPX', 'LDJ', 'LGA', 'LNS', 'LOM', 'LWM',
       'MGJ', 'MIV', 'MJX', 'MMK', 'MMU', 'MPO', 'MQS', 'MSV',
       'MVY', 'N03', 'NEL', 'NYC', 'OQN', 'OQU', 'ORE', 'ORH',
       'OWD', 'OXC', 'PHL', 'PNE', 'POU', 'PSF', 'PTW', 'PVC', 'PVD',
       'PYM', 'RDG', 'SCH', 'SFZ', 'SMQ', 'SNC', 'SWF', 'TAN', 'TEB',
       'TTN', 'UKT', 'UUU', 'VAY', 'W29', 'WRI', 'WST', 'WWD', 'XLL']
    # stations = get_stations_from_filelist("mystations.txt")
    for station in tqdm(stations):
        uri = "%s&station=K%s" % (service, station)
        print("Downloading: %s" % (station,))
        data = download_data(uri)
        if len(data)< 1000:
            print(f"{station} not an airport")
            continue
            
        outfn = "Training_Data/%s_%s_%s.txt" % (
            station,
            startts.strftime("%Y%m%d%H%M"),
            endts.strftime("%Y%m%d%H%M"),
        )
        out = open(outfn, "w")
        out.write(data)
        out.close()


if __name__ == "__main__":
#     download_alldata()
    main()

  0%|          | 0/93 [00:00<?, ?it/s]

Downloading: ABE


KeyboardInterrupt: 

In [2]:
import numpy as np
import networkx as nx
import json
import matplotlib.pyplot as plt
import random
import os
import pydeck as pdk
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily, Hourly
import pyproj
import ipywidgets
from tqdm.notebook import tqdm
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
JFK_LAT_LON = 40.645944, -73.784839
LL = 38.585021, -77.555894
UR = 42.634826, -70.954819

LL = 39.88494891-1, -75.33930212-1
UR = 41.888126+1, -72.73301048+3

DOWNTOWN_BOUNDING_BOX = [
    LL[1],
    LL[0],
    UR[1],
    UR[0],
]

In [3]:
def in_bounding_box(point):
    """Determine whether a point is in our downtown bounding box"""
    lng, lat = point
    in_lng_bounds = DOWNTOWN_BOUNDING_BOX[0] <= lng <= DOWNTOWN_BOUNDING_BOX[2]
    in_lat_bounds = DOWNTOWN_BOUNDING_BOX[1] <= lat <= DOWNTOWN_BOUNDING_BOX[3]
    return in_lng_bounds and in_lat_bounds


In [4]:
files = [file for file in os.listdir("Training_Data") if ".txt" in file]

In [7]:
addval_start_date = datetime.datetime(2019, 1, 1)
addval_end_date = datetime.datetime(2019, 3, 1)
addval_daterange = (pd.date_range(start=addval_start_date, end=addval_end_date, freq='5min')).to_frame(name = 'times')

In [8]:
df_array = []
for i in range(len(files)):
    new_df = pd.read_csv(f"Training_Data/{files[i]}", parse_dates=['valid'], skiprows=5, low_memory=False)
    new_df = new_df.set_index("valid")
    df = pd.merge_asof(left=addval_daterange,right=new_df,right_index=True,left_index=True,direction='nearest')
    df_array.append(df)

df = pd.concat(df_array)

df = df.replace(['M',"CLR","VV ","FEW","SCT","BKN","OVC"],[0,0,0,0.1,0.3,0.8,1])
df["vsby"] = df["vsby"].astype(float)

Stations = np.unique(df.station)
Times = np.unique(df.times)

df = df.set_index(['times','station'])

In [9]:
Node_Data = []
Y_data = []

n_nodes = len(Stations)
for time in tqdm(Times):
    
    Node_Lon_Lat = []
    Node_Color = []
    Size = []
    Vertiport = []
    Tooltip = []
    IFRs = []
    Cloud_Coverages = []
    Cloud_Alts = []
    East_Winds = []
    North_Winds = []
    Precips = []
    Lons = []
    Lats = []

    nData = np.zeros((n_nodes, 8))

    for i, station in enumerate(Stations):
        
        arr = df.loc[time,station]
        
        lon = float(arr["lon"])
        lat = float(arr["lat"])
        vsby = float(arr["vsby"])
        wind_dir = float(arr["drct"]) + 180
        wind_speed = float(arr["sknt"])*30.8667 #m/min
        precip = float(arr["p01i"])
        
        e_wind = wind_speed*np.sin(np.deg2rad(wind_dir))
        n_wind = wind_speed*np.cos(np.deg2rad(wind_dir))
                
        IFR = vsby < 3
        
        skycond = float(arr[f'skyc1'] )
        skyalt = float(arr[f'skyl1'])
                
        for j in range(4):
            
            skycondt = arr[f'skyc{j+1}'] 
            skyaltt = float(arr[f'skyl{j+1}'])
            
            if (skycondt > 0.7) and (skyaltt < 1000):
                skycond = skycondt
                skyalt = skyaltt
                IFR = True
                break
                
        nData[i,0] = float(IFR)
        nData[i,1] = skycond
        nData[i,2] = skyalt
        nData[i,3] = precip
        nData[i,4] = e_wind
        nData[i,5] = n_wind
        nData[i,6] = lon
        nData[i,7] = lat
        
    Node_Data.append(nData)
    



  0%|          | 0/16993 [00:00<?, ?it/s]

In [10]:
full_data_dict = {}
for i, data in enumerate(Node_Data):
    full_data_dict[i] = data.tolist()

with open("complete_graph_data_training.json", "w") as outfile:
    json.dump(full_data_dict, outfile)