In [7]:
import pickle
import json
import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

import scipy.special as spcl

import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as psb
import plotly.io as pio

import analysis_module as anlyz

pio.templates.default = "none"

# %load_ext autoreload
# %autoreload 2

%matplotlib widget

In [8]:
def level_evolution_dataset(fdpy):

    # fdpy is flooding days per year

    print("")
    print("--------------------------------------------------------------------")
    print(f"{fdpy} flooding days per year")
    print("--------------------------------------------------------------------")
    print("")

    station_list = anlyz.station_list(exclude="8638901")
    station_datasets = []

    pctl = ["10", "50", "90"]
    scenario_keys = ["low", "int_low", "int", "int_high", "high"]

    for sta_n, station in station_list.iterrows():

        # if sta_n > 4:
        #     continue

        print(f"{station['id']}: {station['tool_name']}")

        meta_arrays = dict()

        meta_arrays["station_id"] = xr.DataArray(
            data=[station["id"]],
            dims=["station"],
            coords=dict(station=("station", [sta_n])),
            attrs=dict(description="unique NOAA station identification number"),
        )

        meta_arrays["station_name"] = xr.DataArray(
            data=[station["tool_name"]],
            dims=["station"],
            coords=dict(station=("station", [sta_n])),
            attrs=dict(description="name of station location"),
        )

        meta_arrays["longitude"] = xr.DataArray(
            data=[station["lon"]],
            dims=["station"],
            coords=dict(station=("station", [sta_n])),
            attrs=dict(description="longitude of station", units="degrees east"),
        )

        meta_arrays["latitude"] = xr.DataArray(
            data=[station["lat"]],
            dims=["station"],
            coords=dict(station=("station", [sta_n])),
            attrs=dict(description="latitude of station", units="degrees north"),
        )

        sl = xr.open_dataset(f"../data/tide_gauge/{station.id}.nc")
        sl = sl.observed.to_pandas().loc["2001":"2020"]*100 # cm
        dymx = sl.groupby(pd.Grouper(freq="D")).apply(
            lambda x: x.max() if x.count() == 24 else None
        )
        target_count = dymx.count()/365.25 * fdpy
        for lev in range(500):
            if (dymx >= lev).sum() > target_count:
                continue
            else:
                lev_fdpy_0120 = lev - 1
                break

        meta_arrays[f"level_{fdpy:02d}_0120"] = xr.DataArray(
            data=[lev_fdpy_0120],
            dims=["station"],
            coords=dict(station=("station", [sta_n])),
            attrs=dict(description=f"highest level exceeded at least {fdpy} times per year on average during 2001–2020", units="centimeters above MHHW"),
        )

        meta_dataset = xr.Dataset(meta_arrays)

        sta_path = f"./ensemble_stats/{station['id']}/"

        scenario_datasets = []
        for scenario in scenario_keys:

            files = glob.glob(f"{sta_path}{scenario}/*")

            a = {p: dict() for p in pctl}
            for fn in files:

                with open(fn, "r") as f:
                    d = json.load(f)

                thrsh = fn[-8:-5]

                for p in pctl:
                    yrs = d["annual_percentiles"]["years"]
                    xd = d["annual_percentiles"]["percentiles"][p]
                    a[p][thrsh] = pd.Series(xd, index=yrs)

            df = {p: pd.DataFrame(a[p]).sort_index(axis=1, ascending=False) for p in pctl}

            lev_fdpy = (df["50"] >= fdpy).idxmax(axis=1)
            years = lev_fdpy.index.tolist()
            variables = dict()
            variables[f"level_{fdpy:02d}"] = {
                "values": [int(lev) for lev in lev_fdpy.tolist()],
                "description": f"highest level exceeded at least {fdpy} times on average for each year",
                "units": "centimeters above MHHW",
            }
            for p in ["10", "50", "90"]:
                variables[f"flood_days_p{p}"] = {
                    "values": [df[p].loc[y, h] for y, h in zip(years, lev_fdpy.values)],
                    "description": f"{p}th percentile of flooding days for level_{fdpy:02d}",
                    "units": "days per year",
                }

            data_arrays = dict()
            for v in variables:
                data_arrays[v] = xr.DataArray(
                    data=[[variables[v]["values"]]],
                    dims=["station", "scenario", "year"],
                    coords=dict(
                        station=("station", [sta_n]),
                        scenario=("scenario", [scenario]),
                        year=("year", years),
                    ),
                    attrs=dict(
                        description=variables[v]["description"],
                        units=variables[v]["units"],
                    ),
                )

            scenario_datasets.append(xr.Dataset(data_arrays))

        station_datasets.append(
            xr.merge([meta_dataset, xr.concat(scenario_datasets, dim="scenario")])
        )

    level_fdpy_dataset = xr.concat(station_datasets, dim="station")

    level_fdpy_dataset["scenario_names"] = xr.DataArray(
        data=["Low", "Intermediate Low", "Intermediate", "Intermediate High", "High"],
        dims=["scenario"],
        coords=dict(scenario=("scenario", scenario_keys)),
        attrs=dict(description="Names of the U.S. Interagency SLR scenarios"),
    )

    level_fdpy_dataset.to_netcdf(f"./ucs_level_{fdpy:02d}days.nc")

    return level_fdpy_dataset



flood_days_per_year = [2, 4, 12, 26]
for fdpy in flood_days_per_year:
    level_fdpy_dataset = level_evolution_dataset(fdpy)



--------------------------------------------------------------------
02 flooding days per year
--------------------------------------------------------------------

1630000: Apra Harbor, Guam
1890000: Wake Island
1820000: Kwajalein, RMI
1619910: Sand Island (Midway)
9461380: Adak Island, AK
1770000: Pago Pago, AS
9462620: Unalaska, AK
9468756: Nome, AK
9463502: Port Moller, AK
9459450: Sand Point, AK
1611400: Nawiliwili, HI
1612340: Honolulu, HI
1612480: Mokuoloe, HI
1615680: Kahului, HI
1617433: Kawaihae, HI
1617760: Hilo, HI
9457292: Kodiak Island, AK
9455500: Seldovia, AK
9455760: Nikiski, AK
9455920: Anchorage, AK
9497645: Prudhoe Bay, AK
9454240: Valdez, AK
9454050: Cordova, AK
9453220: Yakutat, AK
9451600: Sitka, AK
9452400: Skagway, AK
9452210: Juneau, AK
9450460: Ketchikan, AK
9443090: Neah Bay, WA
9431647: Port Orford, CA
9432780: Charleston, OR
9418767: Humboldt Bay, CA
9419750: Crescent City, CA
9435380: South Beach, OR
9440910: Toke Point, WA
9439040: Astoria, OR
9416841: A

In [9]:
level_fdpy_dataset