## Script to calculate the long term and shorter term runoff ratio from the VIC 

In [1]:
import numpy as np
import pandas as pd
from IPython import parallel as pr

In [2]:
c = pr.Client() # start the parallel environment

In [3]:
c.ids # check the number of engines available

[0, 1, 2, 3, 4, 5, 6, 7]

In [12]:
view = c.load_balanced_view([0,1,2,3]) # access the first 4 engines

In [13]:
view.targets # check what you are mapping to

[0, 1, 2, 3]

In [14]:
%%px # load the working environment
import pandas as pd
import numpy as np
dates = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/timecode.pcl')
fluxes_columns = ['y','m','d','ET','R','BF','sm1','sm2','sm3','SWE','Cs','Qs','Ql','Qg','NR','PEText','PETtrc','PETsrc']
forcing_columns = ['P','Tmax','Tmin','W']
# bring in the data frame of file paths
files = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/forcing_fluxes_filenames_lat_lon_index.df') 

cells = pd.DataFrame()
data = np.load('/Volumes/data/Theo/projects/Budyko_vic/data/VIC_cells_overlap.npz') # load the soil data

# loop through the data frame
for key in data.keys():
    cells[key] = data[key]

# temporal bounds of the Newman et al. [2015] data set
strt = '1980-1-1'
nd = '2011-12-23'    
    
#def fluxproc(fl):
#    flux = pd.read_table(fl, sep='\t', names = fluxes_columns) # read fluxes
#    flux.index = pd.DatetimeIndex(dates)
#    
#    flux['discharge'] = flux.BF+flux.R # compute the total discharge [mm]
#    
#    return flux.loc[strt:nd,'discharge'].sum()

def forceproc(fr):
    force = pd.read_table(fr,sep=' ', names = forcing_columns) # read forcings
    force.index = pd.DatetimeIndex(dates)
    
    return force.loc[strt:nd,'P'].sum()

In [15]:
strt = '1980-1-1'
nd = '2011-12-23'    
    
#def fluxproc(fl):
#    flux = pd.read_table(fl, sep='\t', names = fluxes_columns) # read fluxes
#    flux.index = pd.DatetimeIndex(dates)
#    
#    flux['discharge'] = flux.BF+flux.R # compute the total discharge [mm]
#    
#    return flux.loc[strt:nd,'discharge'].sum()

def forceproc(fr):
    force = pd.read_table(fr,sep=' ', names = forcing_columns) # read forcings
    force.index = pd.DatetimeIndex(dates)
    
    return force.loc[strt:nd,'P'].sum()

In [16]:
dates = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/timecode.pcl')
fluxes_columns = ['y','m','d','ET','R','BF','sm1','sm2','sm3','SWE','Cs','Qs','Ql','Qg','NR','PEText','PETtrc','PETsrc']
forcing_columns = ['P','Tmax','Tmin','W']

In [17]:
files = pd.read_pickle('./forcing_fluxes_filenames_lat_lon_index.df') # bring in the data frame of file paths

In [18]:
cells = pd.DataFrame()
data = np.load('./data/VIC_cells_overlap.npz') # load the vic cells and basins

# loop through the SWE data

for key in data.keys():
    cells[key] = data[key]

In [19]:
cells.head()

Unnamed: 0,basins,cells,percents
0,1013500,220561,0.03
1,1013500,221439,0.04
2,1013500,221442,0.07
3,1013500,221443,0.67
4,1013500,221444,0.74


In [20]:
basins = np.unique(cells.basins) # create a list of unique basins

In [21]:
len(basins)

677

In [22]:
# define a long term P function
def ltp(basin):


    viccells = cells.loc[cells.basins==basin,'cells'] # pull the cell index numbers
    vicpercs = cells.loc[cells.basins==basin,'percents'] # pull the cell percents
    
    # create lists of file paths for forcing and flux files
    fluxfiles = []
    forcefiles = []
    
    for cell in viccells:
        fluxfiles.extend(list(files.loc[files.indexer==cell,'flux'].as_matrix()))
        forcefiles.extend(list(files.loc[files.indexer==cell,'forcing'].as_matrix()))
    
    # create a data frame for each basin
    temp = pd.DataFrame({'cellid':viccells,'cellperc':vicpercs,'force':forcefiles}) 
    
    temp['P'] = temp.force.map(forceproc)
    
    # assign column the weights that would be used for normal averaging
    temp['aveweight'] = 1./len(temp)
    
    # compute the final weight for each cell based on the number of cells and the
    # percent of the cell that falls within the basin
    temp['weight'] = temp.aveweight*temp.cellperc  
    
    temp['P2'] = temp.P*temp.weight # multiply P by the final weight
    
    P = temp.P2.sum() # compute the sum of the weighted precipitation
    
    return basin,P

In [23]:
res = view.map(ltp,basins) # map the function to all the basins over the engines

In [41]:
res.ready()

True

In [40]:
res.progress

677

In [43]:
res.elapsed/60.

7.1706211500000006

In [44]:
basins,LTP = zip(*res.result)

In [46]:
np.savez_compressed('/Volumes/data/Theo/projects/Budyko_vic/data/ground_truth_ltp.npz',LTP=LTP,basin=basins)