# Experiment Logs

Log and number each experiment using dataframes

Can move this to doing "online" each time an experiment is run, eventually

Pulling data takes time, especially for entire USGS. So organize folders as:
* exps1
    * unfiltered
    * exp1-1
        * gage+SWOT
        * gage+SWOT+cal
        * fit+stats
    * exp1-2
    * ...
* exps2
...

where each "expsn" directory shares a common set of SWOT data, and you can run the calibration for each nested experiment expsn-m without repulling.


In [3]:
import pandas as pd
from pathlib import Path

Things to log
* expid: experiment id, equal to folder name
* reachdomain: what reaches this runs over
* swot source

In [4]:
def ExpDataFrame(expsid,expid,reachdomain,swotsource,slopedata,slopeminimum=None,slopeconst=0,areaoption='fd',
                constrainhw=False,flowlaw='MWAPN',darkfracmax=None,lossfun='linear'):
    
    '''
        expsid: experiment set id
        expid: experiment id
        reachdomain: name of the set reaches over which to run - 'connecticut+willamette calval gages' or 'usgs' or...
        swotsource: source of swot data: hydrochron, or ADT-pvd (pseudo-version D data from algorithm team)
        slopedata: data element to use for slope - either slope or slope2
        slopeminimum: applied as a normal threshold filter: throws out data with slope is less than the minimum
        slopeconst: slope consistentizer: when slope is less than a minimum value, apply some consistency algorithm. several options
            0 : Colin's suggestion to set all values of slope equal to SWORD if any value in the timeseries is less than 0 (or some positive number very close to 0)
           -1 : Don't apply this at all (in which case a slopeminimum value needs to be set)
            S : where S is a positive number. if slope is less than S, then set slope to S. slopeminimum shouldn't be set
        areaoption : how to compute cross-sectional area
            'fd' : finite difference - the old MetroMan approach
            'fh' : fluvial hypsometry - the approach in Confluence
        constrainhw: switch to use the constrained height-width estimates
        flowlaw: name of flow law to use, in FLaPE-Byrd
            MWAPN: Manning's equation, wide-river approximation, area formulation, power law on Manning's n vs hydraulic depth
            AHGD: at-a-station hydraulic geometry for river depth - i.e. a rating curve
        darkfracmax : or None to use default value
        lossfun : the loss function used in scipy's least_squares objective function: linear (default) soft_l1, or others see docs
            
    '''
    
    if not darkfracmax:
        darkfracmax=0.4
    
    df=pd.DataFrame(data={
        'expsid':[expsid],
        'expid':[expid],
        'reachdomain':[reachdomain],
        'swotsource':[swotsource],
        'slopedata':[slopedata],  
        'slopeminimum':[slopeminimum], 
        'slopeconsistencycheck':[slopeconst],
        'areaopt':[areaoption],
        'constrainhw':[constrainhw],
        'flowlaw':[flowlaw],
        'darkfracmax':[darkfracmax],
        'lossfun':[lossfun]
        }
    )
    
    return df

In [21]:
ExpData=Path('ExperimentData/')

In [39]:
dfs=[]

In [40]:
# experiment 1: run over the original 9 calval gages using Hydrochron data
dfs.append(ExpDataFrame('exps1','exp1-1','connecticut+willamette calval gages','hydrochron','slope'))
dfs.append(ExpDataFrame('exps1','exp1-2','connecticut+willamette calval gages','hydrochron','slope2'))
dfs.append(ExpDataFrame('exps1','exp1-3','connecticut+willamette calval gages','hydrochron','slope',slopeminimum=1.7e-5,slopeconst=-1))
dfs.append(ExpDataFrame('exps1','exp1-4','connecticut+willamette calval gages','hydrochron','slope',slopeminimum=3.4e-5,slopeconst=-1))
dfs.append(ExpDataFrame('exps1','exp1-5','connecticut+willamette calval gages','hydrochron','slope',slopeconst=1.7e-5))
dfs.append(ExpDataFrame('exps1','exp1-6','connecticut+willamette calval gages','hydrochron','slope',areaoption='fh'))
dfs.append(ExpDataFrame('exps1','exp1-7','connecticut+willamette calval gages','hydrochron','slope',areaoption='fh',constrainhw=True))
dfs.append(ExpDataFrame('exps1','exp1-8','connecticut+willamette calval gages','hydrochron','slope',flowlaw='AHGD'))
dfs.append(ExpDataFrame('exps1','exp1-9','connecticut+willamette calval gages','hydrochron','slope',darkfracmax=0.2))
dfs.append(ExpDataFrame('exps1','exp1-10','connecticut+willamette calval gages','hydrochron','slope',lossfun='soft_l1'))


In [41]:
# experiment 2: run over all USGS gages using Hydrochron data
dfs.append(ExpDataFrame('exps2','exp2-1','usgs gages','hydrochron','slope'))
dfs.append(ExpDataFrame('exps2','exp2-3','usgs gages','hydrochron','slope',slopeminimum=1.7e-5,slopeconst=-1))
dfs.append(ExpDataFrame('exps2','exp2-4','usgs gages','hydrochron','slope',slopeminimum=3.4e-5,slopeconst=-1))
dfs.append(ExpDataFrame('exps2','exp2-5','usgs gages','hydrochron','slope',slopeconst=1.7e-5))
dfs.append(ExpDataFrame('exps2','exp2-6','usgs gages','hydrochron','slope',areaoption='fh'))
dfs.append(ExpDataFrame('exps2','exp2-8','usgs gages','hydrochron','slope',flowlaw='AHGD'))
dfs.append(ExpDataFrame('exps2','exp2-9','usgs gages','hydrochron','slope',darkfracmax=0.2))
dfs.append(ExpDataFrame('exps2','exp2-10','usgs gages','hydrochron','slope',flowlaw='AHGW'))
dfs.append(ExpDataFrame('exps2','exp2-11','usgs gages','hydrochron','slope',flowlaw='AHGD',darkfracmax=0.2))
dfs.append(ExpDataFrame('exps2','exp2-12','usgs gages','hydrochron','slope',lossfun='soft_l1'))



In [42]:
# experiment 3: run over 9 calval gages using pseudo-version D data from ADT
dfs.append(ExpDataFrame('exps3','exp3-1','connecticut+willamette calval gages','ADT-pvd','slope'))

In [43]:
df=pd.concat(dfs,ignore_index=True)
df

Unnamed: 0,expsid,expid,reachdomain,swotsource,slopedata,slopeminimum,slopeconsistencycheck,areaopt,constrainhw,flowlaw,darkfracmax,lossfun
0,exps1,exp1-1,connecticut+willamette calval gages,hydrochron,slope,,0.0,fd,False,MWAPN,0.4,linear
1,exps1,exp1-2,connecticut+willamette calval gages,hydrochron,slope2,,0.0,fd,False,MWAPN,0.4,linear
2,exps1,exp1-3,connecticut+willamette calval gages,hydrochron,slope,1.7e-05,-1.0,fd,False,MWAPN,0.4,linear
3,exps1,exp1-4,connecticut+willamette calval gages,hydrochron,slope,3.4e-05,-1.0,fd,False,MWAPN,0.4,linear
4,exps1,exp1-5,connecticut+willamette calval gages,hydrochron,slope,,1.7e-05,fd,False,MWAPN,0.4,linear
5,exps1,exp1-6,connecticut+willamette calval gages,hydrochron,slope,,0.0,fh,False,MWAPN,0.4,linear
6,exps1,exp1-7,connecticut+willamette calval gages,hydrochron,slope,,0.0,fh,True,MWAPN,0.4,linear
7,exps1,exp1-8,connecticut+willamette calval gages,hydrochron,slope,,0.0,fd,False,AHGD,0.4,linear
8,exps1,exp1-9,connecticut+willamette calval gages,hydrochron,slope,,0.0,fd,False,MWAPN,0.2,linear
9,exps1,exp1-10,connecticut+willamette calval gages,hydrochron,slope,,0.0,fd,False,MWAPN,0.4,soft_l1


In [44]:
# check for duplicate expid
exps=list(df['expid'])
nexps=len(exps)
print('there are ',nexps,' experiments')
nu=len(list(set(exps)))
if nu != nexps:
    print('Oops! There are ',nu,' unique experiment directories and this number does not equal number of experiments')


there are  21  experiments


In [45]:
df.to_csv(ExpData.joinpath('experimentlog.csv'))