<a href="https://colab.research.google.com/github/m-wessler/nbm-verification/blob/main/get_obs_synopticAPI_streamline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [109]:
import numpy as np

from functools import partial
from multiprocessing import Pool, cpu_count

# Globals

In [121]:
# Multiprocess settings
process_pool_size = cpu_count()*4

# Backend APIs
metadata_api = "https://api.synopticdata.com/v2/stations/metadata?"
qc_api = "https://api.synopticdata.com/v2/stations/qcsegments?"

# Data Query APIs
timeseries_api = "https://api.synopticdata.com/v2/stations/timeseries?"
statistics_api = "https://api.synopticlabs.org/v2/stations/statistics?"
precipitation_api = "https://api.synopticdata.com/v2/stations/precipitation?"

# Assign API to element name
synoptic_apis = {
    'qpf':precipitation_api,
    'maxt':statistics_api,
    'mint':statistics_api}

# Assign synoptic variable to element name
synoptic_vars = {
    'qpf':None,
    'maxt':'air_temp',
    'mint':'air_temp'}

# Assign stat type to element name
stat_type = {
    'qpf':'total',
    'maxt':'maximum',
    'mint':'minimum'}

ob_hours = {
    'qpf':['1200', '1200'],
    'maxt':['1200', '0600'],
    'mint':['0000', '1800']}

# Methods

In [131]:
def cwa_list(input_region):

    input_region = input_region.upper()

    region_dict ={
        "WR":["BYZ", "BOI", "LKN", "EKA", "FGZ", "GGW", "TFX", "VEF", "LOX", "MFR",
            "MSO", "PDT", "PSR", "PIH", "PQR", "REV", "STO", "SLC", "SGX", "MTR",
            "HNX", "SEW", "OTX", "TWC"],

        "CR":["ABR", "BIS", "CYS", "LOT", "DVN", "BOU", "DMX", "DTX", "DDC", "DLH",
            "FGF", "GLD", "GJT", "GRR", "GRB", "GID", "IND", "JKL", "EAX", "ARX",
            "ILX", "LMK", "MQT", "MKX", "MPX", "LBF", "APX", "IWX", "OAX", "PAH",
            "PUB", "UNR", "RIW", "FSD", "SGF", "LSX", "TOP", "ICT"],

        "ER":["ALY", "LWX", "BGM", "BOX", "BUF", "BTV", "CAR", "CTP", "RLX", "CHS",
            "ILN", "CLE", "CAE", "GSP", "MHX", "OKX", "PHI", "PBZ", "GYX", "RAH",
            "RNK", "AKQ", "ILM"],

        "SR":["ABQ", "AMA", "FFC", "EWX", "BMX", "BRO", "CRP", "EPZ", "FWD", "HGX",
            "HUN", "JAN", "JAX", "KEY", "MRX", "LCH", "LZK", "LUB", "MLB", "MEG",
            "MAF", "MFL", "MOB", "MRX", "OHX", "LIX", "OUN", "SJT", "SHV", "TAE",
            "TBW", "TSA"]}

    if input_region == "CONUS":
        return np.hstack([region_dict[region] for region in region_dict.keys()])
    else:
        return region_dict[input_region]

In [123]:
def fetch_obs_from_API(iterable, **req):

    wfo, date = iterable

    output_file = None

    return output_file

# User Input/Multiprocessing Inputs

In [124]:
# Collect user inputs
element = 'qpf'
region_selection = 'wr'

start_date = '20231001'
end_date = '20231031'

# Immediately convert user input to datetime objects
start_date, end_date = [datetime.strptime(date+'0000', '%Y%m%d%H%M')
    for date in [start_date, end_date]]

# Main/Multiprocessing Call

In [130]:
# Build arg dict
synoptic_api_args = {
    'obs_start_hour':ob_hours[element][0],
    'obs_end_hour':ob_hours[element][1],
    'ob_stat':stat_type[element],
    'api':synoptic_apis[element],
    'vars_query':None if element == 'qpf'
        else f'&vars={synoptic_vars[element]}',
    'days_offset':1 if element == 'qpf' else 0}

# Get the CWA list if using regions
region_selection_iterable = cwa_list(region_selection)

# Build an iterable date list from range
iter_date = start_date
date_selection_iterable = []
while iter_date <= end_date:
    date_selection_iterable.append(iter_date.strftime('%Y%m%d'))
    iter_date += timedelta(days=1)

# Build our iterator (single cwa, single date) can add network if needed
multiprocess_iterable = [item for item in itertools.product(
    region_selection_iterable, date_selection_iterable)]

# Assign the fixed kwargs to the function
multiprocess_function = partial(fetch_obs_from_API, **synoptic_api_args)

with Pool(process_pool_size) as pool:
    print(f'Spooling up process pool for {len(multiprocess_iterable)} tasks '
          f'across {process_pool_size} workers')
    output_files = pool.map(multiprocess_function, multiprocess_iterable)
    pool.terminate()
    print('Multiprocessing Complete')

Spooling up process pool for 744 tasks across 8 workers
Multiprocessing Complete
