In [35]:
import xarray as xr
import numpy as np
from stompy.plot import nbviz, plot_wkb, plot_utils
from stompy.grid import unstructured_grid
from stompy.spatial import wkb2shp
from stompy import utils, filters, memoize
import shlex
import pandas as pd
import statsmodels.formula.api as smf

import os
import six
import matplotlib.pyplot as plt
import stompy.model.delft.waq_scenario as dwaq
import stompy.model.delft.io as dio

In [37]:
#%matplotlib notebook

In [38]:
# Might be useful...
hydro = dwaq.HydroFiles(hyd_path="../v148_jun_28_2016_dwaq_merge/com-v148_jun_28_2016_dwaq_merge.hyd")

In [39]:
#run_dir="/home/rusty/src/data_lsb_tracer_00"
#run_dir="./data_lsb_tracer_01b" # only 5 days... output maybe in dwaq2_map
#run_dir="./data_lsb_tracer_01" 
run_dir="./data_lsb_tracer_02" # additional Palo Alto source, maybe better pond tracer values. 
map_ds=xr.open_dataset(os.path.join(run_dir,'dwaq_map.nc')) # currently incomplete

In [73]:
# LP advective flux?

# Extract flow manually. Scans the hydro which takes some time.
# Cache results in the waq run directory

def transect_flow(run_dir,hydro,transect_name,force=False):
    cache_dir=os.path.join(run_dir,"cache")
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    cache_basename = f"Q_{transect_name}.nc"
    cache_fn = os.path.join(cache_dir,cache_basename)
    if force or not os.path.exists(cache_fn):
        #hydro = dwaq.HydroFiles(hyd_path="../v148_jun_28_2016_dwaq_merge/com-v148_jun_28_2016_dwaq_merge.hyd")
        inp=dwaq.InpReader(os.path.join(run_dir,'waqmodel.inp'))
        Q=hydro.extract_transect_flow(inp.get_transect_by_name(transect_name))
        Q.name="discharge"
        Q.to_netcdf(cache_fn)
    else:
        Q=xr.load_dataarray(cache_fn)
        
    return Q

In [40]:
g=unstructured_grid.UnstructuredGrid.read_ugrid(map_ds)
shore=g.boundary_polygon()
if 0:
    fig,ax=plt.subplots(figsize=(9.5,8))
    ax.set_adjustable('datalim')
    g.plot_edges(color='k',lw=0.5,alpha=0.6)

INFO:join_features:0 open strings, 24 simple polygons
INFO:join_features:Building index
  p.join_id=i
  index=STRtree(simple_polys)
INFO:join_features:done building index
INFO:join_features:Examining largest poly left with area=3242487807.773357, 23 potential interiors


In [41]:
fig,ax=plt.subplots(figsize=(9.5,8))

scal=map_ds.pond_chl.isel(layer=0,time=-1)

dry=scal==-999

g.plot_cells(ax=ax,mask=~dry,values=scal,cmap='turbo',clim=[0,2],ec='face',lw=1.2)
g.plot_cells(ax=ax,mask=dry,color="0.8",zorder=-1)
plot_wkb.plot_wkb(shore,fc='none',ec='k',lw=0.4,zorder=3)

ax.set_position([0,0,1,1])
ax.axis('off')

ax.axis( (576015., 593479., 4140885., 4152402.))

<IPython.core.display.Javascript object>

(576015.0, 593479.0, 4140885.0, 4152402.0)

Flux Analysis
==

In [42]:
hist_ds = xr.open_dataset(os.path.join(run_dir,'dwaq_hist.nc'))

In [177]:
# For instance, relate flux at alv2 (ALV moored sensor)
# to the concentrations at alv1 (upstream) and alv3 (downstream).

class FluxAnalysis:
    flux_station ='alv2'
    up_station   = 's_alv1'
    mid_station  ='s_alv2'
    down_station ='s_alv3'
    scalar='san_jose'
    norm=None # 'lp_gradient' normalize so lp gradient has rms of 1.0 
    
    def __init__(self,**kw):
        utils.set_keywords(self,kw)

    @property
    @memoize.imemoize()
    def scale(self):
        """ A factor to scale flux-gradient data to unit range.
        This is automatically applied in plotting, but not directly
        to any data.
        """
        if self.norm is None: return 1.0
        if self.norm=='lp_gradient':
            rms=np.sqrt(np.mean(self.lp(self.gradient())**2))
            return 1.0/max(1e-6,rms)
        assert False

    def flux_values(self):
        # ostensibly downstream-directed, but 50/50 change I got signs correct.
        # DWAQ values are period-integrated. convert to a rate.
        return hist_ds.bal.sel(field=self.scalar,region=self.flux_station) / self.dt_s()
    def QC_flux_values(self):
        return self.conc_values() * self.discharge()
    def QbarCbar_flux_values(self):
        return self.lp(self.conc_values()) * self.lp(self.discharge())
    
    def lp_disp_flux_values(self):
        return self.lp(self.QC_flux_values()) - self.QbarCbar_flux_values()
    
    def dt_s(self):
        return np.median( np.diff( hist_ds.time.values ) /np.timedelta64(1,'s') )
    
    def lp(self,v):
        try:
            v=v.values
        except AttributeError:
            pass
        return filters.lowpass_godin(v,mean_dt_h=self.dt_s()/3600.0)

    def spring_neap(self,v,standard=True):
        "RMS amplitude filter for spring-neap indicator"
        try:
            v=v.values
        except AttributeError:
            pass
        prime=v-self.lp(v)
        result = np.sqrt( self.lp(prime**2) )
        trim_samples = int( (36.0 * 3600) / self.dt_s() )
        result[:trim_samples]  = result[ trim_samples]
        result[-trim_samples:] = result[-trim_samples]
        
        if standard:
            result = (result-result.mean())/np.std(result)
        return result

    def conc_values(self):        
        return hist_ds.bal.sel(field=self.scalar,region=self.mid_station)
    def conc_up_values(self):
        return hist_ds.bal.sel(field=self.scalar,region=self.up_station)
    def conc_dn_values(self):
        return hist_ds.bal.sel(field=self.scalar,region=self.down_station)
    def gradient(self):
        return self.conc_up_values() - self.conc_dn_values()

    def discharge(self):
        Q = transect_flow(run_dir,hydro,transect_name=self.flux_station)
        # I think that for a given time, Q from the hydro is the flow in the following
        # interval, while the history output reflects the previous interval.
        # If nothing else, this adjustment gets the dwaq flux and Q*C flux into agreement.
        Q_dt = np.median(np.diff(Q.time))
        return np.interp( hist_ds.time, Q.time+Q_dt, Q.values )

    def df(self,normalize=True):
        df=pd.DataFrame()
        scale=self.scale if normalize else 1.0
        df['time']=hist_ds.time
        df['scalar']=self.scalar
        df['lp_gradient']=scale * self.lp(self.gradient())
        df['lp_flux']=scale*self.lp(self.flux_values())
        df['disp_flux']=scale*self.lp_disp_flux_values()
        df['sn_Q'] = self.spring_neap(self.discharge())
        return df
        
    def plot_timeseries(self):
        fig,axs=plt.subplots(2,1,sharex=True,figsize=[8.5,5])

        ax_J,ax_C = axs

        #ax_J.plot( hist_ds.time, self.scale * self.flux_values(), label="DWAQ flux")
        ax_J.plot( hist_ds.time, self.scale * self.QC_flux_values(),alpha=0.4,lw=0.5,label="QC")
        ax_J.plot( hist_ds.time, self.scale * self.lp(self.QC_flux_values()),label="<QC>")
        ax_J.plot( hist_ds.time, self.scale * self.QbarCbar_flux_values(),label="<Q><C>")
        ax_J.plot( hist_ds.time, self.scale * self.lp_disp_flux_values(),label="<Q'C'>")        

        ax_C.plot( hist_ds.time, self.scale * self.conc_up_values(),label=f'up ({self.up_station})')
        ax_C.plot( hist_ds.time, self.scale * self.conc_dn_values(),label=f'dn ({self.down_station})')
        #ax_C.plot( hist_ds.time, self.scale * self.conc_values(),label=f'mid ({self.mid_station})')

        ax_C.legend(loc='upper left')        
        ax_J.legend(loc='upper left')        
        ax_C.set_ylabel(f'Conc {self.scalar}')
        ax_J.set_ylabel(f'Flux {self.scalar}')
        fig.autofmt_xdate()
        fig.subplots_adjust(right=0.97,bottom=0.14,top=0.96)
        return fig
    
    def plot_scatter(self):
        fig,ax=plt.subplots(figsize=(5,4))
        ax.set_xlabel('Gradient')
        ax.set_ylabel('Flux')
        #self.add_inst_scatter(ax=ax)
        self.add_lp_scatter(ax=ax)
        ax.legend(loc='upper left')
        ax.axhline(0,color='k',lw=0.5)
        ax.axvline(0,color='k',lw=0.5)
        fig.subplots_adjust(left=0.16,top=0.97,right=0.97)
        return fig
    
    def add_inst_scatter(self,ax):
        ax.plot( self.scale*self.gradient(), self.scale*self.flux_values(),
                '.', ms=2, alpha=0.4, label=f'Inst. {self.scalar}')
        
    def add_lp_scatter(self,ax,set_limits=True,disp=True,total=True):
        grad_lp=self.scale*self.lp(self.gradient())
        if total:
            flux_lp=self.scale*self.lp(self.flux_values())
            ax.plot( grad_lp, flux_lp, '.', label=f'LP {self.scalar}')
            xxyy=[ grad_lp.min(), grad_lp.max(), flux_lp.min(), flux_lp.max()]
        if disp:
            disp_lp=self.scale*self.lp_disp_flux_values()
            ax.plot( grad_lp, disp_lp, '.', label=f"<Q'C'> {self.scalar}")        
            xxyy=[ grad_lp.min(), grad_lp.max(), disp_lp.min(), disp_lp.max()]

        if set_limits and (disp or total):
            ax.axis( utils.expand_xxyy(xxyy,0.1) )
    

In [178]:
fa=FluxAnalysis(scalar='san_jose')
fig=fa.plot_timeseries()
#fig=fa.plot_scatter()

fig.subplots_adjust(right=0.84)
ax2=fig.axes[0].twinx()

# Flow-based spring-neap indicator
Qspring_neap = fa.spring_neap(fa.discharge())
ax2.plot(hist_ds.time, Qspring_neap,color='m')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f650d3cbd90>]

In [179]:
# Get a stage value for more realistic spring-neap process
dir(hydro)

['BOUNDARY',
 'CLOSED',
 'SIGMA',
 'SINGLE',
 'VERT_UNKNOWN',
 'ZLAYER',
 '_QtodV',
 '_QtodVabs',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_areas_mmap',
 '_boundary_defs',
 '_flows_mmap',
 '_grid',
 '_n_seg',
 '_params',
 '_read_seg_attrs',
 '_seg_mmap',
 '_seg_to_exchs',
 '_t_secs',
 '_vertical',
 'add_parameters',
 'adjust_boundaries_for_conservation',
 'are_dtype',
 'are_filename',
 'areas',
 'bc_segs',
 'bnd_filename',
 'bottom_depths',
 'bottom_depths_2d',
 'boundary_defs',
 'boundary_dtype',
 'boundary_scheme',
 'check_volume_conservation_incr',
 'check_volume_conservation_nonincr',
 'clean_mpi_dfm_grid',
 'coerce_to_eleme

In [144]:
fa=FluxAnalysis(scalar='san_mateo',norm='lp_gradient')
fig=fa.plot_timeseries()
fig=fa.plot_scatter()


  fig,axs=plt.subplots(2,1,sharex=True,figsize=[8.5,5])


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [145]:
fa=FluxAnalysis(scalar='pond_chl',norm='lp_gradient')
fig=fa.plot_timeseries()
fig=fa.plot_scatter()


  fig,axs=plt.subplots(2,1,sharex=True,figsize=[8.5,5])


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [146]:
fa=FluxAnalysis(scalar='palo_alto',norm='lp_gradient')
fig=fa.plot_timeseries()
fig=fa.plot_scatter()

  fig,axs=plt.subplots(2,1,sharex=True,figsize=[8.5,5])


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [156]:
class GroupFluxAnalysis:
    scalars=['pond_chl','san_jose','san_mateo','sunnyvale','palo_alto']

    def __init__(self,**kw):
        utils.set_keywords(self,kw)
        
        self.fas=[FluxAnalysis(scalar=scalar,norm='lp_gradient') for scalar in self.scalars]

    def df(self):
        dfs=[]
        for fa in self.fas:
            dfs.append(fa.df())
        return pd.concat(dfs)
            
    def plot_scatter(self,disp=True):
        fig,ax = plt.subplots(figsize=(7,5))

        for fa in self.fas:
            fa.add_lp_scatter(ax,set_limits=False,disp=disp,total=not disp)

        [fn(0,color='k',lw=0.5) for fn in [ax.axvline,ax.axhline]]
        
        self.add_fit(ax=ax,disp=disp)

        ax.legend(loc='upper right',frameon=0)
        ax.set_xlabel('Gradient')
        ax.set_ylabel('Flux')
        return fig
    
    def add_fit(self,ax,disp=True):
        df=self.df()
        if disp:
            formula="disp_flux ~ lp_gradient"
        else:
            formula="lp_flux ~ lp_gradient"
            
        result = smf.ols(formula,data=df).fit()
        df_ordered=df.sort_values('lp_gradient')
        y_ordered=result.predict(exog=df_ordered)
        ls=ax.plot(df_ordered['lp_gradient'],y_ordered,'k-',label='Fit')
        
        annot=f"r$^2$={result.rsquared:.2f}"
        plot_utils.annotate_line(ls[0],annot,ax=ax,buff=dict(foreground='w',linewidth=3.5),
                                offset_points=15)

gfa=GroupFluxAnalysis()
fig=gfa.plot_scatter(disp=False) ;
fig=gfa.plot_scatter(disp=True) ;


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [157]:
gfa.df()

Unnamed: 0,time,scalar,lp_gradient,lp_flux,disp_flux
0,2016-06-28 14:00:00,pond_chl,0.007731,0.109506,0.225186
1,2016-06-28 14:30:00,pond_chl,0.008228,0.116042,0.229270
2,2016-06-28 15:00:00,pond_chl,0.008738,0.121339,0.231695
3,2016-06-28 15:30:00,pond_chl,0.009263,0.124715,0.232113
4,2016-06-28 16:00:00,pond_chl,0.009801,0.125840,0.230444
...,...,...,...,...,...
3308,2016-09-05 12:00:00,palo_alto,-0.370844,1.683672,1.562459
3309,2016-09-05 12:30:00,palo_alto,-0.354392,1.565730,1.470616
3310,2016-09-05 13:00:00,palo_alto,-0.338070,1.457392,1.383476
3311,2016-09-05 13:30:00,palo_alto,-0.321883,1.360190,1.302551


In [159]:
df=gfa.df()
result = smf.ols("disp_flux ~ lp_gradient",data=df).fit()
df_ordered=df.sort_values('lp_gradient')
y_ordered=result.predict(exog=df_ordered)
ax=fig.axes[0]
ax.plot(df_ordered['lp_gradient'],y_ordered,'k-',label='Fit')

result.summary()

0,1,2,3
Dep. Variable:,disp_flux,R-squared:,0.85
Model:,OLS,Adj. R-squared:,0.85
Method:,Least Squares,F-statistic:,93940.0
Date:,"Fri, 16 Feb 2024",Prob (F-statistic):,0.0
Time:,15:42:22,Log-Likelihood:,-29788.0
No. Observations:,16565,AIC:,59580.0
Df Residuals:,16563,BIC:,59600.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6913,0.014,51.167,0.000,0.665,0.718
lp_gradient,-4.1409,0.014,-306.504,0.000,-4.167,-4.114

0,1,2,3
Omnibus:,458.961,Durbin-Watson:,0.0
Prob(Omnibus):,0.0,Jarque-Bera (JB):,276.836
Skew:,-0.171,Prob(JB):,7.689999999999999e-61
Kurtosis:,2.467,Cond. No.,1.83


Mid-Run Output
==

Create nc files while the longer run is still going. 

In [36]:
# And the history:
his_fn=os.path.join(run_dir,"waqmodel.his")
hist_ds=dio.his_file_xarray(his_fn)
 

In [132]:
# Balance is only defined for stations / regions. 
bal_fn=os.path.join(run_dir,"waqmodel-bal.his")
bal_ds=dio.his_file_xarray(bal_fn)

In [7]:
# Re-transcribe output 
if False: 
    model=dwaq.WaqModel(base_path="./data_lsb_tracer_01", hydro=hydro)
    print("Writing history")
    model.write_binary_his_nc()
    print("Writing map")
    model.write_binary_map_nc(overwrite=True)
    
if False:
    run_dir="./data_lsb_tracer_01"
    output_fn=os.path.join(run_dir,"tmp-dwaq_map.nc")

    map_fn=os.path.join(run_dir,"waqmodel.map")
    print("Reading map file")
    map_ds=dio.read_map(map_fn,hydro) # 60s?. Adding z takes way too long.
