In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import json


In [53]:
fname = "../data/stations.pickle"
stations = pd.read_pickle(fname)

for idx, sta in stations.iterrows():
    if sta["id"] == "9439040":
        break
sta

id               9439040
name             Astoria
state                 OR
scn_name     Astoria, OR
tool_name    Astoria, OR
lat            46.207306
lon          -123.768306
Name: 107, dtype: object

In [54]:
with open("./levels/" + str(sta["id"]) + ".json") as f:
    lev = json.load(f)
lev

{'datums': {'msl': -1.251,
  'mllw': -2.624,
  'mhhw': 0.0,
  'hat': 0.642,
  'lat': -3.266,
  'gt': 2.624},
 'flood': {'minor': 0.605,
  'moderate': 0.875,
  'major': 1.275,
  'nws_minor': 1.036,
  'nws_moderate': 1.798,
  'nws_major': None},
 'extremes': {'100yr': 1.194, '10yr': 1.004, '2yr': 0.801, '1yr': 0.537},
 'topten': [{'date': '1983-01-27', 'height': 1.145},
  {'date': '1933-12-17', 'height': 1.145},
  {'date': '2006-01-01', 'height': 1.142},
  {'date': '2015-12-10', 'height': 1.085},
  {'date': '1981-11-14', 'height': 1.084},
  {'date': '1967-12-02', 'height': 1.054},
  {'date': '1941-12-18', 'height': 1.054},
  {'date': '2003-12-24', 'height': 1.044},
  {'date': '1978-02-07', 'height': 1.041},
  {'date': '1998-12-02', 'height': 1.031}]}

In [55]:
tg = xr.open_dataset("../data/tide_gauge/" + str(sta["id"]) + ".nc")
tg = tg.observed.to_pandas()
tg

time
1925-01-01 01:00:00      NaN
1925-01-01 02:00:00      NaN
1925-01-01 03:00:00      NaN
1925-01-01 04:00:00      NaN
1925-01-01 05:00:00      NaN
                       ...  
2021-12-31 19:00:00    0.435
2021-12-31 20:00:00    0.070
2021-12-31 21:00:00   -0.561
2021-12-31 22:00:00   -1.218
2021-12-31 23:00:00   -1.806
Length: 847004, dtype: float32

In [56]:
dmax = tg.groupby(pd.Grouper(freq="D")).max()
dmin = tg.groupby(pd.Grouper(freq="D")).min()

In [57]:
yrs = dmax.index.year.unique().values
met_yrs = [[str(y - 1) + "-05-01", str(y) + "-04-30"] for y in yrs[1:]]

fld_dys = {}

thresholds = {**lev["flood"], **lev["extremes"]}
fld_dys["annual"] = pd.DataFrame(
    {
        h: pd.Series(
            [
                (dmax.loc[my[0] : my[1]] > thresholds[h]).sum()
                if dmax.loc[my[0] : my[1]].dropna().count() >= 292
                else None
                for my in met_yrs
            ],
            index=yrs[1:],
        )
        for h in thresholds
    }
)
fld_dys["annual"]


Unnamed: 0,minor,moderate,major,nws_minor,nws_moderate,nws_major,100yr,10yr,2yr,1yr
1926,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0
1927,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0
1928,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
1929,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,7.0
1930,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2017,13.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,24.0
2018,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0
2019,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0
2020,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [58]:
amsl = pd.Series(
    [
        tg.loc[my[0] : my[1]].mean()
        if dmax.loc[my[0] : my[1]].dropna().count() >= 292
        else None
        for my in met_yrs
    ],
    index=yrs[1:],
)
amsl

1926   -1.236079
1927   -1.229825
1928   -1.161484
1929   -1.282968
1930   -1.310332
          ...   
2017   -1.176313
2018   -1.226503
2019   -1.255042
2020   -1.279208
2021   -1.261583
Length: 96, dtype: float64

In [59]:
f10_yr1 = max([fld_dys["annual"].index[0], 1970])
f10_yr2 = f10_yr1 + 9
[f10_yr1, f10_yr2]


[1970, 1979]

In [60]:
l10_yr1, l10_yr2 = fld_dys["annual"].index[[-10, -1]]
[l10_yr1, l10_yr2]

[2012, 2021]

In [61]:
f10_htf = fld_dys["annual"].loc[f10_yr1:f10_yr2].sum(axis=0)
l10_htf = fld_dys["annual"].loc[l10_yr1:l10_yr2].sum(axis=0)
htf_change = l10_htf - f10_htf
htf_change_nrm = (
    l10_htf / (dmax.loc[f"{l10_yr1-1}-05":f"{l10_yr2}-04"].dropna().count() / 365.25)
    - f10_htf / (dmax.loc[f"{f10_yr1-1}-05":f"{f10_yr2}-04"].dropna().count() / 365.25)
)
htf_change_nrm


minor           1.079918
moderate        0.097268
major           0.000000
nws_minor      -0.100590
nws_moderate    0.000000
nws_major       0.000000
100yr           0.000000
10yr           -0.200879
2yr            -0.005436
1yr             3.469624
dtype: float64

In [62]:
f10_msl = tg.loc[f"{f10_yr1-1}-05":f"{f10_yr2}-04"].mean()
l10_msl = tg.loc[f"{l10_yr1-1}-05":f"{l10_yr2}-04"].mean()
msl_change = np.round(l10_msl - f10_msl, 2)
msl_change

0.03

In [63]:
yrs1 = fld_dys["annual"].index[:-10]
ryrs1 = yrs1[np.random.randint(low=0, high=len(yrs1), size=100)]

htf_diff_dist = pd.DataFrame(index=range(len(ryrs1)), columns=thresholds)
for k, y1 in enumerate(ryrs1):

    yrs2 = yrs1[np.abs(yrs1 - y1) > 10]
    y2 = yrs2[np.random.randint(low=0, high=len(yrs2))]

    tg1 = tg.loc[f"{y1-1}-05":f"{y1+9}-04"]
    dmax1 = dmax.loc[f"{y1-1}-05":f"{y1+9}-04"] - tg1.mean() + f10_msl

    tg2 = tg.loc[f"{y2-1}-05":f"{y2+9}-04"]
    dmax2 = dmax.loc[f"{y2-1}-05":f"{y2+9}-04"] - tg2.mean() + f10_msl

    htf_diff_dist.iloc[k, :] = [
        (dmax2 > thresholds[h]).sum() / (dmax2.dropna().count() / 365.25) # days/year
        - (dmax1 > thresholds[h]).sum() / (dmax1.dropna().count() / 365.25)
        for h in thresholds
    ]

htf_diff_dist = htf_diff_dist.abs()
htf_diff_dist


Unnamed: 0,minor,moderate,major,nws_minor,nws_moderate,nws_major,100yr,10yr,2yr,1yr
0,1.699767,0.199973,0.0,0.099986,0.0,0.0,0.0,0.099986,0.499932,1.599781
1,2.200301,0.200027,0.0,0.100014,0.0,0.0,0.0,0.200027,0.500068,1.500205
2,0.999863,0.099986,0.0,0.099986,0.0,0.0,0.0,0.099986,0.099986,0.0
3,0.601834,0.099849,0.0,0.000027,0.0,0.0,0.0,0.100041,0.600411,1.103107
4,0.108598,0.333729,0.0,0.091544,0.0,0.0,0.0,0.19153,0.356242,2.137957
...,...,...,...,...,...,...,...,...,...,...
95,2.69963,0.799891,0.0,0.0,0.0,0.0,0.0,0.099986,0.899877,3.099576
96,0.300041,0.600082,0.0,0.200027,0.0,0.0,0.0,0.200027,0.500068,0.600082
97,1.401643,0.500068,0.0,0.0,0.0,0.0,0.0,0.0,1.100233,2.402765
98,1.000137,0.600082,0.0,0.100014,0.0,0.0,0.0,0.100014,0.80011,0.400055


In [64]:
htf_diff_prob = pd.Series(
    [
        (htf_diff_dist.loc[:, h] > np.abs(htf_change_nrm.loc[h])).sum()
        / htf_diff_dist.index.size
        for h in thresholds
    ],
    index=thresholds,
)
htf_diff_prob

minor           0.66
moderate        0.90
major           0.00
nws_minor       0.54
nws_moderate    0.00
nws_major       0.00
100yr           0.00
10yr            0.40
2yr             0.99
1yr             0.10
dtype: float64

In [7]:
fld_dys["monthly"] = pd.DataFrame(
    {
        h: dmax.groupby(pd.Grouper(freq="MS")).apply(
            lambda x: (x > thresholds[h]).sum() if x.dropna().count() >= 24 else None
        )
        for h in thresholds
    }
)

fld_dys["monthly"]


Unnamed: 0_level_0,minor,moderate,major,nws_minor,nws_moderate,nws_major,100yr,10yr,2yr,1yr
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1914-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1914-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1914-03-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1914-04-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1914-05-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2021-08-01,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,2.0,13.0
2021-09-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
2021-10-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-11-01,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,7.0


In [8]:
fld_dys["clim"] = fld_dys["monthly"].groupby(fld_dys["monthly"].index.month).sum()
fld_dys["clim"]

Unnamed: 0_level_0,minor,moderate,major,nws_minor,nws_moderate,nws_major,100yr,10yr,2yr,1yr
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.0,0.0,0.0,107.0,0.0,0.0,0.0,0.0,26.0,288.0
2,0.0,0.0,0.0,29.0,0.0,0.0,0.0,0.0,1.0,102.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0
4,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,4.0,26.0
5,0.0,0.0,0.0,43.0,0.0,0.0,0.0,1.0,8.0,110.0
6,0.0,0.0,0.0,94.0,0.0,0.0,0.0,1.0,37.0,241.0
7,0.0,0.0,0.0,168.0,0.0,0.0,0.0,6.0,50.0,371.0
8,0.0,0.0,0.0,164.0,0.0,0.0,0.0,7.0,47.0,378.0
9,0.0,0.0,0.0,79.0,0.0,0.0,0.0,1.0,21.0,216.0
10,0.0,0.0,0.0,68.0,0.0,0.0,0.0,2.0,20.0,209.0


In [9]:
# convert to useful json
fld_dys_json = {}
fld_dys_json["annual"] = {
    "years": fld_dys["annual"].index.values.tolist(),
    "levels": {
        h: [int(c) if c >= 0 else -1 for c in fld_dys["annual"][h].values]
        for h in fld_dys["annual"].columns
    },
}
fld_dys_json["monthly"] = {
    h: {
        str(y): [int(c) if c >= 0 else -1 for c in fld_dys["monthly"][h].loc[str(y)]]
        for y in fld_dys["monthly"].index.year.unique()
    }
    for h in fld_dys["annual"].columns
}
fld_dys_json["clim"] = {
    h: [int(c) if c >= 0 else -1 for c in fld_dys["clim"][h]]
    for h in fld_dys["annual"].columns
}


In [11]:
pd.DataFrame({"day_max": dmax, "day_min": dmin})

Unnamed: 0_level_0,day_max,day_min
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1914-01-01,-0.126,-0.491
1914-01-02,-0.187,-0.583
1914-01-03,-0.217,-0.613
1914-01-04,-0.217,-0.552
1914-01-05,-0.187,-0.583
...,...,...
2021-12-27,-0.096,-0.532
2021-12-28,-0.070,-0.535
2021-12-29,0.039,-0.581
2021-12-30,0.152,-0.596


In [None]:
first_dec = [met_yrs[0][0], met_yrs[9][1]]
last_dec = [met_yrs[0][-10], met_yrs[9][-1]]

msl_dec = [tg.loc[mean()]

In [12]:
met_yrs

[['1914-05-01', '1915-04-30'],
 ['1915-05-01', '1916-04-30'],
 ['1916-05-01', '1917-04-30'],
 ['1917-05-01', '1918-04-30'],
 ['1918-05-01', '1919-04-30'],
 ['1919-05-01', '1920-04-30'],
 ['1920-05-01', '1921-04-30'],
 ['1921-05-01', '1922-04-30'],
 ['1922-05-01', '1923-04-30'],
 ['1923-05-01', '1924-04-30'],
 ['1924-05-01', '1925-04-30'],
 ['1925-05-01', '1926-04-30'],
 ['1926-05-01', '1927-04-30'],
 ['1927-05-01', '1928-04-30'],
 ['1928-05-01', '1929-04-30'],
 ['1929-05-01', '1930-04-30'],
 ['1930-05-01', '1931-04-30'],
 ['1931-05-01', '1932-04-30'],
 ['1932-05-01', '1933-04-30'],
 ['1933-05-01', '1934-04-30'],
 ['1934-05-01', '1935-04-30'],
 ['1935-05-01', '1936-04-30'],
 ['1936-05-01', '1937-04-30'],
 ['1937-05-01', '1938-04-30'],
 ['1938-05-01', '1939-04-30'],
 ['1939-05-01', '1940-04-30'],
 ['1940-05-01', '1941-04-30'],
 ['1941-05-01', '1942-04-30'],
 ['1942-05-01', '1943-04-30'],
 ['1943-05-01', '1944-04-30'],
 ['1944-05-01', '1945-04-30'],
 ['1945-05-01', '1946-04-30'],
 ['1946-