# Synthesize 

Take the parsed, cleaned data, and synthesize a long-term daily record for the constituents
currently going into the model.

Specifically, this means
 - 2000 - 2016
 - NOx, NH3, OrthoP
 - False Delta sources, plus the 40+ discharges in the Bay.

In [20]:
from __future__ import print_function
import six
import pandas as pd

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import datetime
import utils
import glob
import os
%matplotlib notebook 

In [14]:
compile_dir="../outputs/intermediate"

In [96]:
date_start=datetime.datetime(2000,1,1)
date_end  =datetime.datetime(2015,12,31)

dn_start=utils.to_dnum(date_start)
dn_end  =utils.to_dnum(date_end)

dns=np.arange(dn_start,dn_end+1)
fmt='%Y-%m-%d'
print("Generating daily data from %s to %s for %d time points"%(date_start.strftime(fmt),
                                                                date_end.strftime(fmt),
                                                                len(dns)))
ds=xr.Dataset()
ds['time']=utils.to_dt64(dns)
analytes=['flow',
          'NOx_conc','NH3_conc','OrthoP_conc','Si_conc',
          'NOx_load','NH3_load','OrthoP_load','Si_load']

# These match the names of the CSV files
site_names=['tesoro','american','sasm','novato','sunnyvale',
            'petaluma','rodeo','fs','valero','phillips66',
            'vallejo','ebmud','san_mateo','sfo','palo_alto','sausalito',
            'south_bayside','ddsd','burlingame','pinole','st_helena',
            'yountville','benicia','millbrae','sonoma_valley','napa',
            'cccsd','ebda','calistoga','central_marin','lg','west_county_richmond',
            'chevron','sf_southeast','shell','mt_view','marin5','san_jose',
            'south_sf','ch','treasure_island','false_sj','false_sac' ]
ds['site']=( 'site', site_names)

# initialize full output array

for analyte in analytes:
    ds[analyte]=( ['time','site'],
             np.nan*np.ones( (len(ds.time),len(ds.site)) ) )

# set units for clarity upfront
ds.flow.attrs['units']='m3 s-1'

ds.NOx_conc.attrs['units']='mg/l N'
ds.NH3_conc.attrs['units']='mg/l N'
ds.OrthoP_conc.attrs['units']='mg/l P'
ds.Si_conc.attrs['units']='mg/l Si'
ds.NOx_load.attrs['units']='kg/day N'
ds.NH3_load.attrs['units']='kg/day N'
ds.OrthoP_load.attrs['units']='kg/day P'
ds.Si_load.attrs['units']='kg/day Si'

# setup flag entries
for v in ds.data_vars.keys():
    ds[v+'_flag']=( ds[v].dims, np.zeros(ds[v].shape,'i2'))
    ds[v].attrs['flags']=v+'_flag'

Generating daily data from 2000-01-01 to 2015-12-31 for 5844 time points


In [88]:
# Load the HDR data in long format
hdr_fn=os.path.join(compile_dir,'hdr_parsed_long.csv')
hdr=pd.read_csv(hdr_fn)
hdr.head()

Unnamed: 0,analyte,year,month,site,value
0,flow_mgd,2012,7,San Pablo Bay,15.989105
1,flow_mgd,2012,8,San Pablo Bay,15.852014
2,flow_mgd,2012,9,San Pablo Bay,15.340764
3,flow_mgd,2012,10,San Pablo Bay,21.317395
4,flow_mgd,2012,11,San Pablo Bay,48.39628133333334


In [98]:
FLAG_LOADING_STUDY=1

for site in ds.site: 
    site=site.item() # get to a str object
    site_idx=list(ds.site).index(site) # 11

    csv=pd.read_csv(os.path.join(compile_dir,site+'.csv'),
                        parse_dates=['Date'])
    csv_dnums=utils.to_dnum(csv.Date)
    csv_date_i = np.searchsorted(dns,csv_dnums)
   
    # FLOW: HERE
    HERE
    
    # NOx:
    no3=csv['NO3 mg/L N'].copy()
    data_valid=~no3.isnull().values # require at least an no3 value.

    no2=csv_data['NO2 mg/L N'].copy()
    no2[ no2.isnull() ] = 0.0 # ignore missing nitrite numbers

    date_valid=(csv_dnums>=dns[0]) & (csv_dnums<dns[-1])
    valid=data_valid & date_valid
    nox=(no3 + no2)[valid]

    ds['NOx_conc'].isel(site=site_idx)[csv_date_i[valid]]=nox
    ds['NOx_conc_flag'].isel(site=site_idx)[csv_date_i[valid]]=FLAG_LOADING_STUDY