### Make a CSV file of the SW observations with variances
#### Adjust initial values and weights based on COMMENT field (more weights work in pyemu notebook)
### Only needs to be run if new SW observations are added.

In [1]:
import os
import pandas as pd
import numpy as np

obspath = 'E:\\UMID_Data\\0188_offline\\modeldev\\OBS\\Baseflow_Separation_Results'
outpath = 'E:\\UMID_Data\\0188_offline\\modeldev\\OBS'

file1 = 'ANNUAL_RO_BF_OBS_TABLE.csv'
file2 = 'MONTHLY_RO_BF_OBS_TABLE.csv'  # This is ALL the years, not just the subset used in PEST


In [2]:
anndat = pd.read_csv(os.path.join(obspath, file1))
mondat = pd.read_csv(os.path.join(obspath, file2))

In [3]:
anndat.head()

Unnamed: 0.1,Unnamed: 0,IDX2,Comment,OBSVAL,OBS_group,obs_stdev
0,428,bf_10_7030392_yr2000,"Used original val, no upstream site",7.572857,BF_annual,0.241917
1,429,bf_10_7030392_yr2001,"Used original val, no upstream site",9.458571,BF_annual,1.369944
2,430,bf_10_7030392_yr2002,"Used original val, no upstream site",14.927143,BF_annual,1.438596
3,431,bf_10_7030392_yr2003,"Used original val, no upstream site",13.767143,BF_annual,1.017869
4,432,bf_10_7030392_yr2004,"Used original val, no upstream site",14.84,BF_annual,1.140935


In [4]:
## Variance is the square of the st dev:

In [7]:
alldat = anndat.append(mondat).reset_index()
alldat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27734 entries, 0 to 27733
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   index       27734 non-null  int64  
 1   Unnamed: 0  27734 non-null  int64  
 2   IDX2        27734 non-null  object 
 3   Comment     27734 non-null  object 
 4   OBSVAL      27734 non-null  float64
 5   OBS_group   27734 non-null  object 
 6   obs_stdev   27734 non-null  float64
dtypes: float64(2), int64(2), object(3)
memory usage: 1.5+ MB


In [14]:
# First get rid of -9999 values AND losing reaches:
alldat = alldat.loc[alldat.OBSVAL > 0]

# Then adjust st dev if it is zero:
alldat.loc[alldat['obs_stdev'] == 0, 'obs_stdev'] = alldat['OBSVAL'] / 2

In [15]:
alldat['Variance'] = alldat['obs_stdev']**2

In [16]:
alldat.head()

Unnamed: 0.1,index,Unnamed: 0,IDX2,Comment,OBSVAL,OBS_group,obs_stdev,Variance
0,0,428,bf_10_7030392_yr2000,"Used original val, no upstream site",7.572857,BF_annual,0.241917,0.058524
1,1,429,bf_10_7030392_yr2001,"Used original val, no upstream site",9.458571,BF_annual,1.369944,1.876748
2,2,430,bf_10_7030392_yr2002,"Used original val, no upstream site",14.927143,BF_annual,1.438596,2.069557
3,3,431,bf_10_7030392_yr2003,"Used original val, no upstream site",13.767143,BF_annual,1.017869,1.036057
4,4,432,bf_10_7030392_yr2004,"Used original val, no upstream site",14.84,BF_annual,1.140935,1.301733


In [17]:
tmp = alldat.groupby('OBS_group').describe().unstack(1).reset_index()
obsdat = tmp.loc[tmp['level_0'] == 'OBSVAL']
vardat = tmp.loc[tmp['level_0'] == 'Variance']

vardat.pivot(index='OBS_group',columns='level_1',values=0)

level_1,25%,50%,75%,count,max,mean,min,std
OBS_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BF_annual,0.671261,1.621564,3.772131,990.0,317.774214,3.682917,0.01234762,11.618441
BF_monthly,0.000344,0.004362,0.037596,12584.0,87.594529,0.090512,5.617296e-35,0.912041
RO_annual,0.669649,1.621069,3.77064,980.0,317.792757,3.691487,0.0123619,11.675181
RO_monthly,0.000324,0.004062,0.036135,12270.0,87.623262,0.089898,5.617296e-35,0.922844


In [18]:
alldat_out = alldat[['IDX2','Comment','OBSVAL','OBS_group','Variance']]

In [20]:
outfile = 'OBSERVATIONS_DATA_BF_RO_wVARIANCES.csv'
alldat_out.to_csv(os.path.join(outpath, outfile))