In [396]:
import xarray as xr
import numpy as np
from stompy.plot import nbviz, plot_wkb, plot_utils
from stompy.grid import unstructured_grid
from stompy.spatial import wkb2shp
from stompy import utils, filters, memoize
from stompy.io.local import usgs_nwis 
import shlex
import pandas as pd
import statsmodels.formula.api as smf

import os
import six
import seaborn as sns
import matplotlib.pyplot as plt
import stompy.model.delft.waq_scenario as dwaq
import stompy.model.delft.io as dio

In [37]:
#%matplotlib notebook

In [398]:
cache_dir='cache'
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

In [365]:
hydros = dict(
    summer=dwaq.HydroFiles(hyd_path="../v148_jun_28_2016_dwaq_merge/com-v148_jun_28_2016_dwaq_merge.hyd"),
    winter=dwaq.HydroFiles(hyd_path="../v148_feb_2016_dwaq_merge/com-v148_feb_2016_dwaq_merge.hyd")
)

In [455]:
#run_dir="/home/rusty/src/data_lsb_tracer_00"
#run_dir="./data_lsb_tracer_01b" # only 5 days... output maybe in dwaq2_map
#run_dir="./data_lsb_tracer_01" 
#run_dir="./data_lsb_tracer_02" # additional Palo Alto source, maybe better pond tracer values.
run_dir="./data_lsb_tracer_03" # winter period, drop diurnal chl forcing
map_ds=xr.open_dataset(os.path.join(run_dir,'dwaq_map.nc')) 

multi_run_dirs=[
    "./data_lsb_tracer_02", # summer
    "./data_lsb_tracer_03" # winter
]

In [366]:
def hydro(run_dir): # had been a value before, not method
    if run_dir in {"./data_lsb_tracer_03"}: 
        hydro_name='winter'
    else:
        hydro_name='summer'
    return hydros[hydro_name]

In [367]:
# Extract flow manually. Scans the hydro which takes some time.
# Cache results in the waq run directory

def transect_flow(run_dir,transect_name,force=False):
    cache_dir=os.path.join(run_dir,"cache")
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    cache_basename = f"Q_{transect_name}.nc"
    cache_fn = os.path.join(cache_dir,cache_basename)
    if force or not os.path.exists(cache_fn):
        hyd = hydro(run_dir)
        inp=dwaq.InpReader(os.path.join(run_dir,'waqmodel.inp'))
        Q=hyd.extract_transect_flow(inp.get_transect_by_name(transect_name))
        Q.name="discharge"
        Q.to_netcdf(cache_fn)
    else:
        Q=xr.load_dataarray(cache_fn)
        
    return Q

In [324]:
g=unstructured_grid.UnstructuredGrid.read_ugrid(map_ds)
shore=g.boundary_polygon()
tri,tri_srcs=g.mpl_triangulation(return_sources=True)
if 0:
    fig,ax=plt.subplots(figsize=(9.5,8))
    ax.set_adjustable('datalim')
    g.plot_edges(color='k',lw=0.5,alpha=0.6)

INFO:join_features:0 open strings, 24 simple polygons
INFO:join_features:Building index
  p.join_id=i
  index=STRtree(simple_polys)
INFO:join_features:done building index
INFO:join_features:Examining largest poly left with area=3242487807.773357, 23 potential interiors


In [325]:
fig,ax=plt.subplots(figsize=(9.5,8))

scal=map_ds.pond_chl.isel(layer=0,time=-1)

dry=scal==-999

tri_scalar=scal[tri_srcs]
ax.tripcolor(tri,facecolors=scal[tri_srcs],cmap='turbo',clim=[0,8])

plot_wkb.plot_wkb(shore,fc='none',ec='k',lw=0.4,zorder=3)

ax.set_position([0,0,1,1])
ax.axis('off')
ax.axis('equal')
ax.axis( (576015., 593479., 4140885., 4152402.))

<IPython.core.display.Javascript object>

(576015.0, 593479.0, 4140885.0, 4152402.0)

In [311]:
map_ds

In [326]:
# In-pond distribution of concentration
zoom=(583787., 593113., 4139756., 4147609.)
cell_in_zoom=g.cell_clip_mask(zoom)

for field in ['pond_chl','pond_agec','san_jose']:
    fig,ax=plt.subplots(figsize=(9.5,8))

    scal=map_ds[field].isel(layer=0,time=-1)
    dry=map_ds['LocalDepth'].isel(layer=0,time=-1).values<0.01
    scal=np.where(dry,np.nan,scal)
    
    cmax=1.2*np.percentile(scal[np.isfinite(scal)&cell_in_zoom],94)
    
    tri_scalar=scal[tri_srcs]
    coll=ax.tripcolor(tri,facecolors=scal[tri_srcs],cmap='turbo',clim=[0,cmax])

    plot_wkb.plot_wkb(shore,fc='0.8',lw=0.4,zorder=-1)
    plot_wkb.plot_wkb(shore,fc='none',ec='k',lw=0.4,zorder=3)

    ax.set_position([0,0,1,1])
    ax.axis('off')
    ax.axis('equal')
    ax.axis( zoom)
    cax=fig.add_axes([0.05,0.1,0.35,0.03])
    plt.colorbar(coll,cax=cax,label=field,orientation='horizontal')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Flux Analysis
==

In [535]:
# For instance, relate flux at alv2 (ALV moored sensor)
# to the concentrations at alv1 (upstream) and alv3 (downstream).

@memoize.memoize()
def get_hist_ds(run_dir):
    return xr.open_dataset(os.path.join(run_dir,'dwaq_hist.nc'))

class FluxAnalysis:
    flux_station ='alv2'
    up_station   = 's_alv1'
    mid_station  ='s_alv2'
    down_station ='s_alv3'
    scalar='san_jose'
    flux_station_usgs=11169025 # Guad River station
    norm=None # 'lp_gradient' normalize so lp gradient has rms of 1.0 
    discharge_source='DWAQ' # DWAQ, USGS
    # 'central': central difference between up and down
    # 'tidal': use tidal variability at mid_station
    gradient_source='central' 
            

    run_dir = run_dir
    
    def __init__(self,**kw):
        utils.set_keywords(self,kw)

    @property
    def hist(self):
        return get_hist_ds(self.run_dir)
    
    @property
    @memoize.imemoize()
    def scale(self):
        """ A factor to scale flux-gradient data to unit range.
        This is automatically applied in plotting, but not directly
        to any data.
        """
        if self.norm is None: return 1.0
        if self.norm=='lp_gradient':
            rms=np.sqrt(np.mean(self.lp_gradient()**2))
            return 1.0/max(1e-6,rms)
        assert False

    def time(self):
        return self.hist.time.values
    
    def flux_values(self):
        # positive upstream (due to getting the transects backwards in DWAQ). 
        # DWAQ values are period-integrated. convert to a rate.
        return self.hist.bal.sel(field=self.scalar,region=self.flux_station) / self.dt_s()
    def QC_flux_values(self):
        # other discharge estimates may not include tidal variability.
        # This is not an exogenous variable for fitting, so okay to use dwaq discharge.
        return self.conc_values() * self.discharge_dwaq()
    def QbarCbar_flux_values(self):
        return self.lp(self.conc_values()) * self.lp(self.discharge())
    
    def lp_disp_flux_values(self):
        return self.lp(self.QC_flux_values()) - self.QbarCbar_flux_values()
    
    def dt_s(self):
        return np.median( np.diff( self.time() ) /np.timedelta64(1,'s') )
    
    def lp(self,v):
        try:
            v=v.values
        except AttributeError:
            pass
        return filters.lowpass_godin(v,mean_dt_h=self.dt_s()/3600.0)

    def spring_neap(self,v,standard=True):
        "RMS amplitude filter for spring-neap indicator"
        try:
            v=v.values
        except AttributeError:
            pass
        prime=v-self.lp(v)
        result = np.sqrt( self.lp(prime**2) )
        trim_samples = int( (36.0 * 3600) / self.dt_s() )
        result[:trim_samples]  = result[ trim_samples]
        result[-trim_samples:] = result[-trim_samples]
        
        if standard:
            result = (result-result.mean())/np.std(result)
        return result

    def conc_values(self):        
        return self.hist.bal.sel(field=self.scalar,region=self.mid_station)
    def conc_up_values(self):
        return self.hist.bal.sel(field=self.scalar,region=self.up_station)
    def conc_dn_values(self):
        return self.hist.bal.sel(field=self.scalar,region=self.down_station)
    def gradient(self):
        if self.gradient_source=='central':
            return self.gradient_central()
    def gradient_central(self):
        return self.conc_up_values() - self.conc_dn_values()
    def lp_gradient(self):
        if self.gradient_source=='central':
            return self.lp(self.gradient_central())
        elif self.gradient_source=='tidal':
            return self.lp_gradient_tidal() # HERE DEFINE
        
    def lp_gradient_tidal(self):
        H =self.depth()
        C =self.conc_values().values
        Hp = -(H - self.lp(H)) # flip to match sign of C_up-C_down gradient.
        Cp = C - self.lp(C)
        # Effectively a least-squares fit for the slope over time.
        return self.lp(Hp*Cp) / self.lp(Hp*Hp)

    def discharge(self):
        if self.discharge_source=='DWAQ':
            return self.discharge_dwaq()
        elif self.discharge_source=='USGS':
            return self.discharge_usgs()
        else:
            raise Exception(f"bad discharge_source {self.discharge_source}")

    def discharge_dwaq(self):
        Q = transect_flow(self.run_dir,transect_name=self.flux_station)
        # I think that for a given time, Q from the hydro is the flow in the following
        # interval, while the history output reflects the previous interval.
        # If nothing else, this adjustment gets the dwaq flux and Q*C flux into agreement.
        Q_dt = np.median(np.diff(Q.time))
        # This had worked before without to_dnum(), not sure how.
        return np.interp( utils.to_dnum(self.time()), utils.to_dnum(Q.time+Q_dt), Q.values )
    
    def discharge_usgs_ds(self):
        t=self.time()
        usgs_ds = usgs_nwis.nwis_dataset(self.flux_station_usgs,t[0],t[-1],[60],cache_dir=cache_dir)
        # Other terms already define positive as up-stream, opposite USGS convention. Flip
        # data here.
        usgs_ds['Q'] = -0.3048**3 * usgs_ds['stream_flow_mean_daily']
        usgs_ds['Q'].attrs['units'] = 'm3 s-1'
        return usgs_ds
        
    @memoize.imemoize()
    def discharge_usgs(self):
        ds=self.discharge_usgs_ds()
        return np.interp( utils.to_dnum(self.time()), utils.to_dnum(ds.time), ds.Q.values )

    def depth(self):
        # Generally have LocalDepth in the history output, but
        # it's the *sum* of depths of the lower bound of segments.
        # Station output is set to be a single watercolumn.
        Nlayers=int(hydro(self.run_dir).hyd_toks['number-hydrodynamic-layers'])

        # localdepth=  dz * sum_{k=1..N} k = dz * (N+1)N/2
        # H = dz*N = localdepth * 2 / (N+1) 
        localdepth=self.hist.bal.sel(field='LocalDepth',region=self.mid_station).values
        return 2*localdepth/(Nlayers+1)
    
    def df(self,normalize=True):
        df=pd.DataFrame()
        scale=self.scale if normalize else 1.0
        df['time']=self.time()
        df['scalar']=self.scalar
        df['lp_gradient']=scale * self.lp_gradient()
        df['lp_C']=scale*self.lp(self.conc_values())
        df['lp_flux']=scale*self.lp(self.flux_values())
        df['disp_flux']=scale*self.lp_disp_flux_values()
        df['sn_Q'] = self.spring_neap(self.discharge_dwaq())
        df['sn_H'] = self.spring_neap(self.depth())
        df['Q'] = self.discharge()
        df['Qbar'] = self.lp(self.discharge())
        return df
        
    def plot_timeseries(self,spring_neap=True,lp_only=True):
        fig,axs=plt.subplots(2,1,sharex=True,figsize=[8.5,5])

        ax_J,ax_C = axs

        ymin=1e6
        ymax=-1e6
        
        if not lp_only:
            #ax_J.plot( self.hist.time, self.scale * self.flux_values(), label="DWAQ flux")
            y=self.scale * self.QC_flux_values()
            ax_J.plot( self.time(), y,alpha=0.4,lw=0.5,label="QC")
            ymin=min(ymin,y.min())
            ymax=max(ymax,y.max())
            
        y=self.scale * self.lp(self.QC_flux_values())
        ax_J.plot( self.time(), y,label="<QC>")
        ymin=min(ymin,y.min())
        ymax=max(ymax,y.max())

        y=self.scale * self.QbarCbar_flux_values()
        ax_J.plot( self.time(), y,label="<Q><C>")
        ymin=min(ymin,y.min())
        ymax=max(ymax,y.max())

        y=self.scale * self.lp_disp_flux_values()
        ax_J.plot( self.time(), y,label="<Q'C'>")        
        ymin=min(ymin,y.min())
        ymax=max(ymax,y.max())

        ax_C.plot( self.time(), self.scale * self.conc_up_values(),label=f'up ({self.up_station})')
        ax_C.plot( self.time(), self.scale * self.conc_dn_values(),label=f'dn ({self.down_station})')
        #ax_C.plot( self.hist.time, self.scale * self.conc_values(),label=f'mid ({self.mid_station})')

        ax_C.legend(loc='upper left')        
        ax_C.set_ylabel(f'Conc {self.scalar}')
        ax_J.set_ylabel(f'Flux {self.scalar}')
        
        ax_J.set_ylim(ymin,ymax)
        
        fig.autofmt_xdate()

        if spring_neap:
            leg_x=1.07
            fig.subplots_adjust(right=0.75)
            ax2=ax_J.twinx()

            ax_J.legend(loc='upper left',bbox_to_anchor=[leg_x,1],frameon=0)
            
            # Flow-based spring-neap indicator
            Qspring_neap = fa.spring_neap(fa.discharge_dwaq())
            Hspring_neap = fa.spring_neap(fa.depth())
            ax2.plot(self.time(), Qspring_neap,color='m',ls='--',label='Spring-neap ~ Q')
            ax2.plot(self.time(), Hspring_neap,color='y',ls='--',label='Spring-neap ~ H')
            ax2.legend(bbox_to_anchor=[leg_x,0],loc='lower left',frameon=False)
        else:     
            ax_J.legend(loc='upper left')        
            fig.subplots_adjust(right=0.97,bottom=0.14,top=0.96)
        return fig
    
    def plot_scatter(self):
        fig,ax=plt.subplots(figsize=(5,4))
        ax.set_xlabel('Gradient')
        ax.set_ylabel('Flux')
        #self.add_inst_scatter(ax=ax)
        self.add_lp_scatter(ax=ax)
        ax.legend(loc='upper left')
        ax.axhline(0,color='k',lw=0.5)
        ax.axvline(0,color='k',lw=0.5)
        fig.subplots_adjust(left=0.16,top=0.97,right=0.97)
        return fig
    
    def add_inst_scatter(self,ax):
        ax.plot( self.scale*self.gradient(), self.scale*self.flux_values(),
                '.', ms=2, alpha=0.4, label=f'Inst. {self.scalar}')
        
    def add_lp_scatter(self,ax,set_limits=True,disp=True,total=True):
        grad_lp=self.scale*self.lp_gradient()
        if total:
            flux_lp=self.scale*self.lp(self.flux_values())
            ax.plot( grad_lp, flux_lp, '.', label=f'LP {self.scalar}')
            xxyy=[ grad_lp.min(), grad_lp.max(), flux_lp.min(), flux_lp.max()]
        if disp:
            disp_lp=self.scale*self.lp_disp_flux_values()
            ax.plot( grad_lp, disp_lp, '.', label=f"<Q'C'> {self.scalar}")        
            xxyy=[ grad_lp.min(), grad_lp.max(), disp_lp.min(), disp_lp.max()]

        if set_limits and (disp or total):
            ax.axis( utils.expand_xxyy(xxyy,0.1) )
            
    def fig_discharge(self):
        fig,ax=plt.subplots()
        ax.plot(self.time(), self.discharge_dwaq(), label='DWAQ tidal',lw=0.5, alpha=0.6)
        ax.plot(self.time(), self.lp(self.discharge_dwaq()), label='DWAQ lowpass')
        ax.plot(self.time(), self.discharge_usgs(), label='-USGS')
        ax.plot(self.time(), self.lp(self.discharge_usgs()), label='-USGS LP')
        fig.autofmt_xdate()
        ax.legend(loc='upper right')
        return fig,ax
    

Proxies for Gradient
--

The goal is to use concentration data at a single point to estimate a 
gradient based on the tidal fluctuations in concentration.

It's not enough to just look at the range of variation, since we need
to know the sign of the gradient (from phase data), and account for 
difference in tidal amplitude (though the spring-neap indicator adds that 
back in to some degree).

Given a scatter plot of Conc vs Depth, I want a windowed estimate of the 
slope $\frac{dC}{dH}$.

That's the same as a windowed least squares fit for $C' \approx \beta H'$
where primes indicate means have been removed.

This can be distilled down to a ratio of the windowed covariance to
to the windowed variance in $H$: $<C'H'> / <H'H'>$.

Signs: the central difference gradient is calculated from conc@up-conc@down 
(and length is effectively 1.0 with no units). The slope we're fitting is 
essentially conc(when H is high) - conc(when H is low). The water when H is
high is representative of down-slough conditions, and vice versa. So flip
the sign of H' to get a gradient consistent with the up-down gradient.



In [503]:
fa=FluxAnalysis(scalar='san_jose')
fig,ax=plt.subplots()
ax.scatter( fa.depth(), fa.conc_values(), 10, fa.time())
ax.set_ylabel('Conc')
ax.set_xlabel('Depth') ;

<IPython.core.display.Javascript object>

In [521]:
fa=FluxAnalysis(scalar='san_jose')

fig,axs=plt.subplots(2,1,sharex=True,figsize=(9,6.5))
axs[0].plot(fa.time(),fa.gradient_central(),label='central',lw=0.5,alpha=0.7)
axs[0].plot(fa.time(),fa.lp(fa.gradient_central()),label='lp(central)')
axs[1].plot(fa.time(),fa.conc_values(), label='conc mid')

axs[0].plot(fa.time(), fa.lp_gradient_tidal(), label='Windowed OLS')
for ax in axs: ax.legend(loc='upper left')
fig.autofmt_xdate()
fig.tight_layout()

<IPython.core.display.Javascript object>

In [488]:
if 0:
    fa=FluxAnalysis(scalar='san_jose')
    fig=fa.plot_timeseries()
    #fig=fa.plot_scatter()

    fa=FluxAnalysis(scalar='san_mateo',norm='lp_gradient')
    fig=fa.plot_timeseries()
    fig=fa.plot_scatter()

    fa=FluxAnalysis(scalar='pond_chl',norm='lp_gradient')
    fig=fa.plot_timeseries()
    fig=fa.plot_scatter()

    fa=FluxAnalysis(scalar='palo_alto',norm='lp_gradient')
    fig=fa.plot_timeseries()
    fig=fa.plot_scatter()

In [546]:
class GroupFluxAnalysis:
    scalars=['pond_chl','san_jose','san_mateo','sunnyvale','palo_alto']
    run_dirs=multi_run_dirs

    flux_station ='alv2'
    up_station   = 's_alv1'
    mid_station  ='s_alv2'
    down_station ='s_alv3'
        
    discharge_source='DWAQ' # DWAQ, USGS
    gradient_source='central' # central, tidal
    
    def __init__(self,**kw):
        utils.set_keywords(self,kw)
        
        fa_kws=dict(norm='lp_gradient',flux_station=self.flux_station, up_station=self.up_station,
                    mid_station=self.mid_station,down_station=self.down_station,
                    discharge_source=self.discharge_source,gradient_source=self.gradient_source)
        self.fas=[]
        
        for scalar in self.scalars:
            for run_dir in self.run_dirs:
                # If runs don't have the same set of scalars, will have to try/except
                # this
                fa=FluxAnalysis(scalar=scalar,run_dir=run_dir,**fa_kws)
                self.fas.append(fa)

    def df(self):
        dfs=[]
        for fa in self.fas:
            df=fa.df()
            df['scalar']=fa.scalar
            dfs.append(df)
        return pd.concat(dfs)
            
    def plot_scatter(self,disp=True):
        fig,ax = plt.subplots(figsize=(7,5))

        for fa in self.fas:
            fa.add_lp_scatter(ax,set_limits=False,disp=disp,total=not disp)

        [fn(0,color='k',lw=0.5) for fn in [ax.axvline,ax.axhline]]
        
        self.add_fit(ax=ax,disp=disp)

        ax.legend(loc='upper right',frameon=0)
        ax.set_xlabel('Gradient')
        ax.set_ylabel('Flux')
        return fig
    
    def plot_fit(self,formula="disp_flux ~ lp_gradient + sn_Q:lp_gradient",
                 variable=None, hue='scalar',ax=None,annot=True):
        # additionally including sn_Q by itself makes almost no improvement. 
        if ax is None:
            fig,ax = plt.subplots(figsize=(7,5))
        else:
            fig=ax.figure

        df=self.df().copy()
        
        df=df.sample(frac=1) # otherwise the last scalar is purely on top of the others
        
        result = smf.ols(formula,data=df).fit()
        self.result=result
        
        endog = result.model.endog_names # just a single string like disp_flux
        #exog = result.model.exog_names
        pred_endog = 'pred_' + endog
        df[pred_endog]=result.predict(exog=df)
        
        if pred_endog=='pred_disp_flux':
            df['pred_lp_flux'] = df[pred_endog] + df.lp_flux
        
        kw=dict(x=endog,y=pred_endog,hue=hue,lw=0,s=6,ax=ax, alpha=0.7)
        if variable is not None:
            kw['x'] = variable
            kw['y'] = 'pred_' + variable
            if kw['x'] not in df.columns or kw['y'] not in df.columns: 
                print(f"Requested plot variable {variable} does not exist")
            return fig
        sns.scatterplot(data=df,**kw)

        ax.axhline(0,color='k',lw=0.5,alpha=0.5,zorder=3)
        ax.axvline(0,color='k',lw=0.5,alpha=0.5,zorder=3)
        ax.axline((0, 0), slope=1.0, color="k", lw=0.5, alpha=0.5, zorder=3)
        
        if variable=='disp_flux':
            ax.set_xlabel('Pseudo-dispersive flux')
            ax.set_ylabel('Predicted dispersive flux')
        elif variable=='lp_flux':
            ax.set_xlabel('Net flux')
            ax.set_ylabel('Predicted net flux')
        else:
            pass # sns labels
        
        if annot:
            txt_settings=dict(va='bottom',transform=ax.transAxes, family='monospace')
            if isinstance(annot,dict):
                txt_settings.update(annot)
            lines=[formula, 
                   f"r$^2$={result.rsquared:.2f}"]
            for k,v in zip(result.params.index,result.params.values):
                lines.append( f" {k:19}  {v: .3f}")
            ax.text(0.34,0.02,"\n".join(lines),**txt_settings)
        return fig
    
    def add_fit(self,ax,disp=True):
        df=self.df()
        if disp:
            formula="disp_flux ~ lp_gradient"
        else:
            formula="lp_flux ~ lp_gradient"
            
        result = smf.ols(formula,data=df).fit()
        df_ordered=df.sort_values('lp_gradient')
        y_ordered=result.predict(exog=df_ordered)
        ls=ax.plot(df_ordered['lp_gradient'],y_ordered,'k-',label='Fit')
        
        annot=f"r$^2$={result.rsquared:.2f}"
        plot_utils.annotate_line(ls[0],annot,ax=ax,buff=dict(foreground='w',linewidth=3.5),
                                offset_points=15)
    def fig_fit_triptych(self):
        fig,axs=plt.subplots(1,3,figsize=(9.5,3.8))
        fig.subplots_adjust(left=0.08, right=0.98,top=0.98,bottom=0.15,wspace=0.2)
        self.plot_fit(ax=axs[0],annot=False)
        self.plot_fit(ax=axs[1],hue='Qbar',annot=dict(fontsize=7))
        self.plot_fit(ax=axs[2],hue='Qbar',variable='lp_flux', annot=False) ;
        axs[1].get_legend().remove()
        return fig,axs

    
# just winter run => R2 0.86.
# winter and summer => R2 0.90
gfa=GroupFluxAnalysis(gradient_source='tidal')

#fig=gfa.plot_fit() ;
#fig=gfa.plot_fit(hue='Qbar') ;
#fig=gfa.plot_fit(hue='Qbar',variable='lp_flux') ;

gfa.fig_fit_triptych() ;


<IPython.core.display.Javascript object>

In [550]:
gfa=GroupFluxAnalysis(gradient_source='tidal',discharge_source='USGS')
#fig=gfa.plot_fit(formula="lp_flux ~ lp_gradient + sn_H:lp_gradient + lp_C:Qbar",hue='Qbar') ;
fig=gfa.plot_fit(formula="lp_flux ~ lp_gradient + sn_H:lp_gradient + lp_C:Qbar",hue='Qbar') ;


<IPython.core.display.Javascript object>

INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:0

In [544]:
#dir(gfa.result.model)
gfa.result.model.endog_names, gfa.result.model.exog_names

('lp_flux', ['Intercept', 'lp_gradient', 'sn_H:lp_gradient', 'lp_C:Qbar'])

In [485]:
gfa2=GroupFluxAnalysis(discharge_source='USGS')
gfa2.fig_fit_triptych() ;

<IPython.core.display.Javascript object>

INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:0

In [525]:
gfa3=GroupFluxAnalysis(gradient_source='central')
gfa3.fig_fit_triptych() ;

<IPython.core.display.Javascript object>

In [524]:
gfa4=GroupFluxAnalysis(discharge_source='USGS',gradient_source='tidal')
gfa4.fig_fit_triptych() ;

<IPython.core.display.Javascript object>

INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:0

In [486]:
gfa_pond=GroupFluxAnalysis(up_station='s_notch',flux_station='alv1',mid_station='s_alv1',
                           down_station='s_alv2')

fig=gfa_pond.plot_fit();

gfa_pond.fig_fit_triptych() ;

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [487]:
gfa_pond=GroupFluxAnalysis(up_station='s_notch',flux_station='alv1',mid_station='s_alv1',
                           down_station='s_alv2', discharge_source='USGS')
gfa_pond.fig_fit_triptych() ;

<IPython.core.display.Javascript object>

INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:00:00
INFO:usgs_nwis:Cached   2016-09-01 00:00:00 -- 2016-10-01 00:00:00
INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00
INFO:usgs_nwis:Cached   2016-06-01 00:00:00 -- 2016-07-01 00:00:00
INFO:usgs_nwis:Cached   2016-07-01 00:00:00 -- 2016-08-01 00:00:00
INFO:usgs_nwis:Cached   2016-08-01 00:00:00 -- 2016-09-01 00:0

In [285]:
# HERE 
fa=FluxAnalysis(scalar='pond_agec',up_station='s_notch',flux_station='alv1',mid_station='s_alv1',
                           down_station='s_alv2')
fa.plot_timeseries() ;

<IPython.core.display.Javascript object>

In [244]:
gfa.result.summary()


0,1,2,3
Dep. Variable:,disp_flux,R-squared:,0.974
Model:,OLS,Adj. R-squared:,0.974
Method:,Least Squares,F-statistic:,314800.0
Date:,"Fri, 16 Feb 2024",Prob (F-statistic):,0.0
Time:,17:36:47,Log-Likelihood:,-15160.0
No. Observations:,16565,AIC:,30330.0
Df Residuals:,16562,BIC:,30350.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5374,0.006,95.738,0.000,0.526,0.548
lp_gradient,-4.6945,0.006,-793.189,0.000,-4.706,-4.683
sn_Q:lp_gradient,-1.3591,0.005,-283.365,0.000,-1.368,-1.350

0,1,2,3
Omnibus:,3122.782,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5342.408
Skew:,1.246,Prob(JB):,0.0
Kurtosis:,4.238,Cond. No.,2.04


In [225]:
#formula="disp_flux ~ lp_gradient" # 0.85
#formula="disp_flux ~ lp_gradient + sn_Q" # 0.90
formula="disp_flux ~ lp_gradient * sn_Q"  # 0.976!

formula="disp_flux ~ lp_gradient + lp_gradient : sn_Q" # 0.974 ! 

print(formula)
df=gfa.df()
result = smf.ols(formula,data=df).fit()
df_ordered=df.sort_values('lp_gradient')
y_ordered=result.predict(exog=df_ordered)
ax=fig.axes[0]
ax.plot(df_ordered['lp_gradient'],y_ordered,'k-',label='Fit')

result.summary()

disp_flux ~ lp_gradient + lp_gradient : sn_Q


0,1,2,3
Dep. Variable:,disp_flux,R-squared:,0.974
Model:,OLS,Adj. R-squared:,0.974
Method:,Least Squares,F-statistic:,314800.0
Date:,"Fri, 16 Feb 2024",Prob (F-statistic):,0.0
Time:,17:00:26,Log-Likelihood:,-15160.0
No. Observations:,16565,AIC:,30330.0
Df Residuals:,16562,BIC:,30350.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5374,0.006,95.738,0.000,0.526,0.548
lp_gradient,-4.6945,0.006,-793.189,0.000,-4.706,-4.683
lp_gradient:sn_Q,-1.3591,0.005,-283.365,0.000,-1.368,-1.350

0,1,2,3
Omnibus:,3122.782,Durbin-Watson:,0.0
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5342.408
Skew:,1.246,Prob(JB):,0.0
Kurtosis:,4.238,Cond. No.,2.04


Observed Discharge
--

How does overall skill change when using an observable quantity for the
mean flow?

In [421]:
fa=FluxAnalysis(scalar='san_jose')
fig,ax=fa.fig_discharge()

fig,ax=fa.fig_discharge()
ax.axis( (16860.86, 16873.182, -58.1482, 41.98) )

<IPython.core.display.Javascript object>

INFO:usgs_nwis:Cached   2016-02-01 00:00:00 -- 2016-03-01 00:00:00
INFO:usgs_nwis:Cached   2016-03-01 00:00:00 -- 2016-04-01 00:00:00


<IPython.core.display.Javascript object>

(16860.86, 16873.182, -58.1482, 41.98)

Mid-Run Output
==

Create nc files while the longer run is still going. 

In [36]:
# And the history:
his_fn=os.path.join(run_dir,"waqmodel.his")
hist_ds=dio.his_file_xarray(his_fn)
 

In [132]:
# Balance is only defined for stations / regions. 
bal_fn=os.path.join(run_dir,"waqmodel-bal.his")
bal_ds=dio.his_file_xarray(bal_fn)

In [7]:
# Re-transcribe output 
if False: 
    model=dwaq.WaqModel(base_path="./data_lsb_tracer_01", hydro=hydro)
    print("Writing history")
    model.write_binary_his_nc()
    print("Writing map")
    model.write_binary_map_nc(overwrite=True)
    
if False:
    run_dir="./data_lsb_tracer_01"
    output_fn=os.path.join(run_dir,"tmp-dwaq_map.nc")

    map_fn=os.path.join(run_dir,"waqmodel.map")
    print("Reading map file")
    map_ds=dio.read_map(map_fn,hydro) # 60s?. Adding z takes way too long.
