In [1]:
import os
import hydromt
from hydromt import DataCatalog
from hydromt_sfincs import SfincsModel, utils
import geopandas as gpd
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime, timedelta
from mpl_toolkits.axes_grid1 import make_axes_locatable
import cartopy.crs as ccrs

In [2]:
def calculate_stats(station_id, df, tstart=None, tend=None):
    if tstart:
        df = df[df.index > tstart]
    if tend:
        df = df[df.index < tend]

    n = len(df)
    # Mean Absolute Error
    mae = sum(abs(df.Modeled - df.Observed)) / n
    # Mean Error or Bias
    bias = sum(df.Modeled - df.Observed) / n
    # Root Mean Squared Error
    rmse = sum(((df.Observed - df.Modeled) ** 2) / n) ** 0.5
    # Peak Error
    pe = df.Modeled.max() - df.Observed.max()
    # Time to Peak - Error
    tpe = df.Modeled.idxmax() - df.Observed.idxmax()
    # Nash-Sutcliffe Efficiency (NSE)
    nse = 1 - (sum((df.Modeled - df.Observed) ** 2) / sum((df.Observed - df.Observed.mean()) ** 2))
    # Correlation Coefficient (Pearson)
    try:
        r = sum(((df.Modeled - df.Modeled.mean()) * (df.Observed - df.Observed.mean()))) / (
                sum((df.Modeled - df.Modeled.mean()) ** 2) * sum((df.Observed - df.Observed.mean()) ** 2)) ** 0.5
    except:
        print('Correlation Coefficient problem')
        r = 0

    # Coefficient of Determination
    r2 = r ** 2

    # Save stats in a dataframe for output
    stats = pd.DataFrame(data={'station_id': station_id,
                               'mae': round(mae, 2),
                               'rmse': round(rmse, 2),
                               'nse': round(nse, 2),
                               'bias': round(bias, 2),
                               'r': round(r, 2),
                               'r2': round(r2, 2),
                               'pe': round(pe, 2),
                               'tpe': round(tpe.seconds, 1),
                               'mod_peak_wl': round(df.Modeled.max(), 2),
                               'obs_peak_wl': round(df.Observed.max(), 2)
                               },
                         index=[0]
                         )
    peak_dt = [df.Observed.idxmax(), df.Modeled.idxmax()]

    return stats, peak_dt



def clean_obs_coords(obs_df, source_crs, target_crs):
    # Clean up the observation data and the coordinates
    if 'geometry' in list(obs_df.coords):
        pts = gpd.GeoDataFrame(obs_df.station.values,
                               geometry=obs_df.geometry.values,
                               crs=source_crs)
        pts.to_crs(target_crs, inplace=True)
        pts.columns = ['site_no', 'geometry']
        obs_df.geometry.values = pts.geometry
    else:
        pts = gpd.GeoDataFrame(obs_df.station,
                               geometry=gpd.points_from_xy(x=obs_df.x.values,
                                                           y=obs_df.y.values,
                                                           crs=source_crs))
        pts.to_crs(target_crs, inplace=True)
        pts.columns = ['site_no', 'geometry']
        obs_df.x.values = pts.geometry.x
        obs_df.y.values = pts.geometry.y

    return pts, obs_df

In [3]:
# Load data catalog and model results
model_root = r'Z:\Data-Expansion\users\lelise\projects\Carolinas_SFINCS\Chapter1_FlorenceValidation\sfincs_models\mod_v4_flor' \
             r'\ENC_200m_sbg5m_avgN_adv1_eff75'
mod = SfincsModel(root=model_root, mode='r')
cat = mod.data_catalog
mod.read_results(fn_his='sfincs_his.nc')
print(mod.results.keys())

dict_keys(['inp', 'msk', 'qinf', 'zb', 'zs', 'Seff', 'zsmax', 'tmax', 'vmax', 'total_runtime', 'average_dt', 'point_zb', 'structure_height', 'point_zs', 'point_qinf', 'point_S'])


In [4]:
# Get the station data for querying SFINCS results
mod_zs_da = mod.results['point_zs']
mod_zs_lookup = pd.DataFrame()
mod_zs_lookup['station_id'] = mod_zs_da['station_id'].values
mod_zs_lookup['station_name'] = [x.decode('utf-8').strip() for x in mod_zs_da['station_name'].values]
mod_zs_lookup['data_source'] = [x.rsplit('_', 1)[0] for x in mod_zs_lookup['station_name']]
mod_zs_lookup['data_source_id'] = [x.split('_')[-1] for x in mod_zs_lookup['station_name']]

In [5]:
# LOAD THE OBSERVED WATER LEVEL TIMESERIES
agency ='USGS'
obs_dataset = r'Z:\Data-Expansion\users\lelise\data\storm_data\hurricanes\2018_florence\waterlevel\carolinas_usgs_waterlevel_20180815_20181015_DATA.nc'
obs_da = cat.get_geodataset(obs_dataset, geom=mod.region, variables=["waterlevel"], crs=4326)

pts, obs = clean_obs_coords(obs_df=obs_da, source_crs=4326, target_crs=mod.crs.to_epsg())

# Loop through the observation locations and extract model data
mod_zs_lookup_sub = mod_zs_lookup[mod_zs_lookup['data_source'] == agency]

In [7]:
# Create empty lists/df to save information to when looping through the observation gages
calculate_gage_stats = False
if calculate_gage_stats is True:
    station_stats = pd.DataFrame()
    invalid_obs, valid_obs = [], []
    for index, row in mod_zs_lookup_sub.iterrows():
        data_source_id = int(row['data_source_id'])
        if data_source_id in obs_da.station.values.tolist():
            obs_zs = obs.sel(station=data_source_id)
            mod_zs = mod_zs_da.sel(stations=index)

            # Add observed and modeled data into a single dataframe
            obs_df = pd.DataFrame(data=obs_zs.values, index=obs_zs.time.values, columns=['Observed'])
            mod_df = pd.DataFrame(data=mod_zs.values, index=mod_zs.time.values, columns=['Modeled'])
            merged_df = pd.concat([obs_df, mod_df], axis=1)
            merged_df.dropna(inplace=True)

            # If the dataframe is empty or there are fewer than 20 observation points,
            # append the gage ID to the list of "invalid_obs"
            if merged_df.empty or len(merged_df) < 50:
                print(f'No data for gage: {data_source_id}')
                invalid_obs.append(data_source_id)
            else:
                valid_obs.append(data_source_id)

            # Calculate the hydrograph stats at the station and add to master dataframe
            ss, _ = calculate_stats(station_id=data_source_id, df=merged_df, tstart=None, tend=None)
            ss['source'] = agency
            station_stats = pd.concat([station_stats, ss], ignore_index=True)

station_stats.set_index('station_id', drop=True, inplace=True)
pts.set_index('site_no', drop=True, inplace=True)
stats_out = pd.concat([pts, station_stats], axis=1, ignore_index=False)
stats_out = gpd.GeoDataFrame(stats_out, geometry='geometry', crs=mod.crs)
stats_out['x'] = stats_out.geometry.x
stats_out['y'] = stats_out.geometry.y

In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(stats_out)

                                 geometry   mae  rmse   nse  bias     r    r2  \
208250410  POINT (781222.448 3977329.853)   NaN   NaN   NaN   NaN   NaN   NaN   
2082585    POINT (789772.995 3983699.566)   NaN   NaN   NaN   NaN   NaN   NaN   
2082770    POINT (777240.357 4000789.904)   NaN   NaN   NaN   NaN   NaN   NaN   
2082950     POINT (780938.091 4008807.17)   NaN   NaN   NaN   NaN   NaN   NaN   
2083000    POINT (797530.128 4005715.872)   NaN   NaN   NaN   NaN   NaN   NaN   
2083500    POINT (812943.486 3977795.911)   NaN   NaN   NaN   NaN   NaN   NaN   
2084160    POINT (841656.289 3941873.867)   NaN   NaN   NaN   NaN   NaN   NaN   
2084472    POINT (857054.363 3940439.052)   NaN   NaN   NaN   NaN   NaN   NaN   
2084557     POINT (884804.715 3962445.58)   NaN   NaN   NaN   NaN   NaN   NaN   
2087500    POINT (734919.205 3947922.698)   NaN   NaN   NaN   NaN   NaN   NaN   
2087570    POINT (740377.968 3933110.854)   NaN   NaN   NaN   NaN   NaN   NaN   
208773375   POINT (726671.10

In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(mod_zs_lookup_sub)

     station_id    station_name data_source data_source_id
0           1.0    USGS_2136361        USGS        2136361
1           2.0    USGS_2136350        USGS        2136350
2           3.0    USGS_2110815        USGS        2110815
4           5.0   USGS_21108125        USGS       21108125
6           7.0    USGS_2110802        USGS        2110802
7           8.0    USGS_2136030        USGS        2136030
9          10.0    USGS_2136000        USGS        2136000
10         11.0    USGS_2135615        USGS        2135615
11         12.0   USGS_21355015        USGS       21355015
12         13.0    USGS_2135501        USGS        2135501
15         16.0    USGS_2135200        USGS        2135200
16         17.0    USGS_2110725        USGS        2110725
17         18.0    USGS_2110704        USGS        2110704
19         20.0    USGS_2110550        USGS        2110550
20         21.0    USGS_2110701        USGS        2110701
21         22.0    USGS_2135000        USGS        21350

In [14]:
for data_source_id in mod_zs_lookup_sub['data_source_id'].values.tolist():
    data_source_id = str(data_source_id)
    print(data_source_id)
    try:
        # Load the observed and modeled data for the select gage
        obs_zs = obs.sel(station=data_source_id)
        # Get the model data
        index = mod_zs_lookup_sub[mod_zs_lookup_sub['data_source_id'] == f'{data_source_id}'].index.item()
        mod_zs = mod_zs_da.sel(stations=index)
        # # Add observed and modeled data into a single dataframe
        obs_df = pd.DataFrame(data=obs_zs.values, index=obs_zs.time.values, columns=['Observed'])
        mod_df = pd.DataFrame(data=mod_zs.values, index=mod_zs.time.values, columns=['Modeled'])
        merged_df = pd.concat([obs_df, mod_df], axis=1)
        merged_df.dropna(inplace=True)
        #ss, _ = calculate_stats(station_id=data_source_id, df=merged_df, tstart=None, tend=None)
        fig, ax = plt.subplots()
        merged_df.plot(title=data_source_id, ax=ax)
        outfile = rf'Z:\Data-Expansion\users\lelise\projects\Carolinas_SFINCS\Chapter1_FlorenceValidation\sfincs_models\mod_v4_flor\ENC_200m_sbg5m_avgN_adv1_eff75\validation\hydrographs\{data_source_id}.png'
        plt.savefig(outfile)
        plt.close()
    except:
        print(f'Issue with {data_source_id}')

2136361
Issue with 2136361
2136350
Issue with 2136350
2110815
Issue with 2110815
21108125
Issue with 21108125
2110802
Issue with 2110802
2136030
Issue with 2136030
2136000
Issue with 2136000
2135615
Issue with 2135615
21355015
Issue with 21355015
2135501
Issue with 2135501
2135200
Issue with 2135200
2110725
Issue with 2110725
2110704
Issue with 2110704
2110550
Issue with 2110550
2110701
Issue with 2110701
2135000
Issue with 2135000
2134900
Issue with 2134900
2131510
Issue with 2131510
2132000
Issue with 2132000
2131010
Issue with 2131010
2131000
Issue with 2131000
2130980
Issue with 2130980
2131500
Issue with 2131500
2130810
Issue with 2130810
2130910
Issue with 2130910
2134500
Issue with 2134500
2134170
Issue with 2134170
2130561
Issue with 2130561
2129375
Issue with 2129375
2129000
Issue with 2129000
2131472
Issue with 2131472
2131455
Issue with 2131455
21313485
Issue with 21313485
2131452
Issue with 2131452
2110777
Issue with 2110777
2110500
Issue with 2110500
2110400
Issue with 211