In [1]:
import datetime
dt = datetime.timedelta(1)

In [2]:
files = glob.glob('./data/hru_*_stage_4_precip.pcl')

In [3]:
files

['./data/hru_07_stage_4_precip.pcl',
 './data/hru_08_stage_4_precip.pcl',
 './data/hru_09_stage_4_precip.pcl',
 './data/hru_10L_stage_4_precip.pcl',
 './data/hru_10U_stage_4_precip.pcl',
 './data/hru_11_stage_4_precip.pcl',
 './data/hru_12_stage_4_precip.pcl']

In [4]:
fl = files[0]

In [10]:
def interp_convert(fl):
    dat = pd.read_pickle(fl)
    
    # generate lists of dates
    strt = str(dat.index[0])
    nd = str(dat.index[-1])

    dates = []
    [dates.append(str(date)) for date in pd.date_range(strt,nd,freq='D')]

    dsDates = []
    [dsDates.append(str(date)) for date in dat.index]
    
    missing = list(set(dates) - set(dsDates)) # compute the difference between the two lists
    
    if len(missing) == 0:
        print('Dates match, no interpolation needed')
    else:
        print('Dates missing, interpolation needed')

        for date in missing: # iterate through the missing dates
            print(date)
            # find the dates before and after the missing date
            year = int(date.split(' ')[-2].split('-')[0])
            month = int(date.split(' ')[-2].split('-')[1])
            day = int(date.split(' ')[-2].split('-')[2])
            date = datetime.date(year,month,day)
            before = date - dt
            after = date + dt


            out = pd.DataFrame()
            out['datetime'] = [date]
            out['year'] = year
            out['month'] = month
            out['day'] = day
            out['hour'] = 0
            out['minute'] = 0
            out['second'] = 0


            for hru in dat.columns[6:]:
                out[hru] =((dat.loc[dat.index==str(before),hru].as_matrix() + dat.loc[dat.index==str(after),hru].as_matrix()) / 2.)[0]

            out.index = pd.DatetimeIndex(out.datetime)
            del out['datetime']
            dat = dat.append(out)

    dat.sort_index(inplace=True)

    # convert the values to inches
    region = fl.split('_')[1] # extract the region from the file name
    if region != '12': # 12 is already in inches...
        print('Converting mm to inches')
        for hru in dat.columns[6:]:
            dat[hru] *= 0.0393701 # mm to inches!!!

    dat.to_pickle('./data/hru_%s_stage_4_precip_interp_inches.pcl'%region)
    print('Region: %s complete'%region)

In [11]:
for fl in files:
    interp_convert(fl)

Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 07 complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 08 complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 09 complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 10L complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 10U complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Converting mm to inches
Region: 11 complete
Dates missing, interpolation needed
2008-11-02 00:00:00
2012-10-01 00:00:00
Region: 12 complete


In [12]:
# export the data frames as new forcing files:
files = glob.glob('./data/hru_*_stage_4_precip_interp_inches.pcl')

In [22]:
def export_cbh(fl):
    region = fl.split('_')[1]
    print('Processing Region: %s'%region)
    dat = pd.read_pickle(fl)
    dat.to_csv('./data/hru_%s_stage_4_precip_interp_inches.cbh'%region,sep=' ',index=False,header=False,float_format='%.4f')

In [23]:
for fl in files:
    export_cbh(fl)

Processing Region: 07
Processing Region: 08
Processing Region: 09
Processing Region: 10L
Processing Region: 10U
Processing Region: 11
Processing Region: 12


In [12]:
dat.head()

Unnamed: 0_level_0,year,month,day,hour,minute,second,hru_1,hru_2,hru_3,hru_4,...,hru_8196,hru_8197,hru_8198,hru_8199,hru_8200,hru_8201,hru_8202,hru_8203,hru_8204,hru_8205
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-12-20,2004,12,20,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-21,2004,12,21,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-22,2004,12,22,0,0,0,0.0,0.030779,0.0,0.160426,...,8.34631,8.708111,9.731642,9.566482,2.483852,2.367854,6.98103,5.275792,2.472068,2.887732
2004-12-23,2004,12,23,0,0,0,0.0,4.291635,3.337007,4.584321,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-24,2004,12,24,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
dat.tail()

Unnamed: 0_level_0,year,month,day,hour,minute,second,hru_1,hru_2,hru_3,hru_4,...,hru_8196,hru_8197,hru_8198,hru_8199,hru_8200,hru_8201,hru_8202,hru_8203,hru_8204,hru_8205
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-03,2015,1,3,0,0,0,10.524005,15.240544,14.692121,15.025848,...,20.794635,20.938213,20.242687,19.595702,13.147538,13.659655,12.358628,12.419685,17.806062,15.797369
2015-01-04,2015,1,4,0,0,0,2.845967,0.47794,0.596432,0.372863,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-05,2015,1,5,0,0,0,0.67732,0.118776,0.14007,0.121935,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-06,2015,1,6,0,0,0,3.652099,2.371195,2.35072,2.391178,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-07,2015,1,7,0,0,0,0.75,0.0,0.005035,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
before

Unnamed: 0_level_0,year,month,day,hour,minute,second,hru_1,hru_2,hru_3,hru_4,...,hru_8196,hru_8197,hru_8198,hru_8199,hru_8200,hru_8201,hru_8202,hru_8203,hru_8204,hru_8205
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-09-30,2012,9,30,0,0,0,0.0,0.0,0.0,0.0,...,26.831316,25.40707,11.75609,11.214021,2.766403,2.800153,4.946539,3.362403,3.806132,4.025229


In [21]:
dat.loc[]

'hru_1'

In [8]:
missing

['2012-10-01 00:00:00', '2008-11-02 00:00:00']

In [79]:
if n == m:
    print('Lengths match, no interpolation needed')
elif n != m:
    print('Lengths do not match, interpolation needed')
    

    #missing = []    
    #for date in dates: # iterate through dates in the data set
    #    if str(date) in dsDates == False:
    #        missing.append(str(date))

Lengths do not match, interpolation needed


In [84]:
missing = list(set(dates) - set(dsDates))

In [87]:
d = missing[0]

In [90]:
year = int(d.split(' ')[-2].split('-')[0])
month = int(d.split(' ')[-2].split('-')[1])
day = int(d.split(' ')[-2].split('-')[2])
datetime.date(year,month,day)

datetime.date(2012, 10, 1)

In [91]:
missing[0]

'2012-10-01 00:00:00'

In [85]:
datetime.date(missing[0])

TypeError: an integer is required (got type str)

In [74]:
len(dates)

3671

In [75]:
dsDates

['2004-12-20 00:00:00',
 '2004-12-21 00:00:00',
 '2004-12-22 00:00:00',
 '2004-12-23 00:00:00',
 '2004-12-24 00:00:00',
 '2004-12-25 00:00:00',
 '2004-12-26 00:00:00',
 '2004-12-27 00:00:00',
 '2004-12-28 00:00:00',
 '2004-12-29 00:00:00',
 '2004-12-30 00:00:00',
 '2004-12-31 00:00:00',
 '2005-01-01 00:00:00',
 '2005-01-02 00:00:00',
 '2005-01-03 00:00:00',
 '2005-01-04 00:00:00',
 '2005-01-05 00:00:00',
 '2005-01-06 00:00:00',
 '2005-01-07 00:00:00',
 '2005-01-08 00:00:00',
 '2005-01-09 00:00:00',
 '2005-01-10 00:00:00',
 '2005-01-11 00:00:00',
 '2005-01-12 00:00:00',
 '2005-01-13 00:00:00',
 '2005-01-14 00:00:00',
 '2005-01-15 00:00:00',
 '2005-01-16 00:00:00',
 '2005-01-17 00:00:00',
 '2005-01-18 00:00:00',
 '2005-01-19 00:00:00',
 '2005-01-20 00:00:00',
 '2005-01-21 00:00:00',
 '2005-01-22 00:00:00',
 '2005-01-23 00:00:00',
 '2005-01-24 00:00:00',
 '2005-01-25 00:00:00',
 '2005-01-26 00:00:00',
 '2005-01-27 00:00:00',
 '2005-01-28 00:00:00',
 '2005-01-29 00:00:00',
 '2005-01-30 00:

In [76]:
dates

[Timestamp('2004-12-20 00:00:00', freq='D'),
 Timestamp('2004-12-21 00:00:00', freq='D'),
 Timestamp('2004-12-22 00:00:00', freq='D'),
 Timestamp('2004-12-23 00:00:00', freq='D'),
 Timestamp('2004-12-24 00:00:00', freq='D'),
 Timestamp('2004-12-25 00:00:00', freq='D'),
 Timestamp('2004-12-26 00:00:00', freq='D'),
 Timestamp('2004-12-27 00:00:00', freq='D'),
 Timestamp('2004-12-28 00:00:00', freq='D'),
 Timestamp('2004-12-29 00:00:00', freq='D'),
 Timestamp('2004-12-30 00:00:00', freq='D'),
 Timestamp('2004-12-31 00:00:00', freq='D'),
 Timestamp('2005-01-01 00:00:00', freq='D'),
 Timestamp('2005-01-02 00:00:00', freq='D'),
 Timestamp('2005-01-03 00:00:00', freq='D'),
 Timestamp('2005-01-04 00:00:00', freq='D'),
 Timestamp('2005-01-05 00:00:00', freq='D'),
 Timestamp('2005-01-06 00:00:00', freq='D'),
 Timestamp('2005-01-07 00:00:00', freq='D'),
 Timestamp('2005-01-08 00:00:00', freq='D'),
 Timestamp('2005-01-09 00:00:00', freq='D'),
 Timestamp('2005-01-10 00:00:00', freq='D'),
 Timestamp

In [60]:
dates=dsDates

In [55]:
str(date) in dsDates

True

In [50]:
str(dates[0])

'2004-12-20 00:00:00'

In [44]:
str(date)

'2015-01-07 00:00:00'

In [4]:
dat.head()

Unnamed: 0_level_0,year,month,day,hour,minute,second,hru_1,hru_2,hru_3,hru_4,...,hru_8196,hru_8197,hru_8198,hru_8199,hru_8200,hru_8201,hru_8202,hru_8203,hru_8204,hru_8205
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-12-20,2004,12,20,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-21,2004,12,21,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-22,2004,12,22,0,0,0,0.0,0.030779,0.0,0.160426,...,8.34631,8.708111,9.731642,9.566482,2.483852,2.367854,6.98103,5.275792,2.472068,2.887732
2004-12-23,2004,12,23,0,0,0,0.0,4.291635,3.337007,4.584321,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-24,2004,12,24,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
dat.tail()

Unnamed: 0_level_0,year,month,day,hour,minute,second,hru_1,hru_2,hru_3,hru_4,...,hru_8196,hru_8197,hru_8198,hru_8199,hru_8200,hru_8201,hru_8202,hru_8203,hru_8204,hru_8205
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-03,2015,1,3,0,0,0,10.524005,15.240544,14.692121,15.025848,...,20.794635,20.938213,20.242687,19.595702,13.147538,13.659655,12.358628,12.419685,17.806062,15.797369
2015-01-04,2015,1,4,0,0,0,2.845967,0.47794,0.596432,0.372863,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-05,2015,1,5,0,0,0,0.67732,0.118776,0.14007,0.121935,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-06,2015,1,6,0,0,0,3.652099,2.371195,2.35072,2.391178,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-07,2015,1,7,0,0,0,0.75,0.0,0.005035,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
