## Script to calculate the long term and shorter term runoff ratio from the VIC 

In [2]:
import numpy as np
import pandas as pd
from IPython import parallel as pr

In [3]:
c = pr.Client() # start the parallel environment

In [4]:
c.ids # check the number of engines available

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

In [5]:
view = c.load_balanced_view() # access the first 4 engines

In [82]:
%%px

# load the working environment

import pandas as pd
import numpy as np
dates = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/timecode.pcl')
fluxes_columns = ['y','m','d','ET','R','BF','sm1','sm2','sm3','SWE','Cs','Qs','Ql','Qg','NR','PEText','PETtrc','PETsrc']
forcing_columns = ['P','Tmax','Tmin','W']
# bring in the data frame of file paths
files = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/forcing_fluxes_filenames_lat_lon_index.df') 

cells = pd.DataFrame()
data = np.load('/Volumes/data/Theo/projects/Budyko_vic/data/VIC_cells_overlap.npz') # load the soil data

# loop through the data frame
for key in data.keys():
    cells[key] = data[key]

# temporal bounds of the Newman et al. [2015] data set
strt = '1980-1-1'
nd = '2011-12-23'    
    
# define a function to read in the flux file and return the sum of the ET
def fluxprocET(fl):
    flux = pd.read_table(fl, sep='\t', names = fluxes_columns) # read fluxes
    flux.index = pd.DatetimeIndex(dates)
    
    #flux['discharge'] = flux.BF+flux.R # compute the total discharge [mm]
    
    ET = flux.loc[strt:nd,'ET'].sum()
    
    return ET

# define a function to read in the flux file and return the sum of the PET
def fluxprocPET(fl):
    flux = pd.read_table(fl, sep='\t', names = fluxes_columns) # read fluxes
    flux.index = pd.DatetimeIndex(dates)
    
    #flux['discharge'] = flux.BF+flux.R # compute the total discharge [mm]
    
    PET = flux.loc[strt:nd,'PEText'].sum()
    
    return PET

def forceproc(fr):
    force = pd.read_table(fr,sep=' ', names = forcing_columns) # read forcings
    force.index = pd.DatetimeIndex(dates)
    
    return force.loc[strt:nd,'P'].sum()

In [6]:
strt = '1980-1-1'
nd = '2011-12-23'    
    
# define a function to read in the flux file and return the sum of the discharge
def fluxproc(x):
    flux = pd.read_table(x.flux, sep='\t', names = fluxes_columns) # read fluxes
    flux.index = pd.DatetimeIndex(dates)
    
    flux['discharge'] = flux.BF+flux.R # compute the total discharge [mm]
    
    return flux.loc[strt:nd,'discharge'].sum()

def forceproc(fr):
    force = pd.read_table(fr,sep=' ', names = forcing_columns) # read forcings
    force.index = pd.DatetimeIndex(dates)
    
    return force.loc[strt:nd,'P'].sum()

In [73]:
dates = pd.read_pickle('/Volumes/data/Theo/projects/Budyko_vic/timecode.pcl')
fluxes_columns = ['y','m','d','ET','R','BF','sm1','sm2','sm3','SWE','Cs','Qs','Ql','Qg','NR','PEText','PETtrc','PETsrc']
forcing_columns = ['P','Tmax','Tmin','W']

In [74]:
files = pd.read_pickle('./forcing_fluxes_filenames_lat_lon_index.df') # bring in the data frame of file paths

In [185]:
cells = pd.DataFrame()
data = np.load('./data/VIC_cells_overlap.npz') # load the vic cells and basins

# loop through the SWE data

for key in data.keys():
    cells[key] = data[key]

In [186]:
cells.head()

Unnamed: 0,basins,cells,percents
0,1013500,220561,0.03
1,1013500,221439,0.04
2,1013500,221442,0.07
3,1013500,221443,0.67
4,1013500,221444,0.74


In [187]:
basins = np.unique(cells.basins) # create a list of unique basins

In [188]:
len(basins)

677

In [189]:
# define a long term P function
def ltETPET(basin):


    viccells = cells.loc[cells.basins==basin,'cells'] # pull the cell index numbers
    vicpercs = cells.loc[cells.basins==basin,'percents'] # pull the cell percents
    
    # create lists of file paths for forcing and flux files
    fluxfiles = []
    
    for cell in viccells:
        fluxfiles.extend(list(files.loc[files.indexer==cell,'flux'].as_matrix()))
    
    # create a data frame for each basin
    temp = pd.DataFrame({'cellid':viccells,'cellperc':vicpercs,'flux':fluxfiles}) 
    
    #temp['P'] = temp.force.map(forceproc)
    temp['ET'] = temp.flux.map(fluxprocET) # apply the equation to the DF to populate the ET column
    temp['PET'] = temp.flux.map(fluxprocPET) # apply the equation to the DF to populate the ET column
    # assign column the weights that would be used for normal averaging
    temp['aveweight'] = 1./len(temp)
    
    # compute the final weight for each cell based on the number of cells and the
    # percent of the cell that falls within the basin
    temp['weight'] = temp.aveweight*temp.cellperc  
    
    temp['PET2'] = temp.PET*temp.weight # multiply PET by the final weight
    temp['ET2'] = temp.ET*temp.weight # multiply ET by the final weight
    
    PET = temp.PET2.sum() # compute the sum of the weighted PET
    ET = temp.ET2.sum() # compute the sum of the weighted ET
    
    return basin,PET,ET

In [190]:
res = view.map(ltETPET,basins) # map the function to all the basins over the engines

In [253]:
res.ready()

True

In [252]:
res.progress

677

In [246]:
res.elapsed/60.

11.302956733333334

In [254]:
basins,LTPET,LTET = zip(*res.result)

In [255]:
LTPET

(16310.240864646468,
 14599.232412903226,
 19079.853981294968,
 14516.755699999998,
 15418.860430952382,
 11937.579543478261,
 9634.3050076923064,
 12048.786946666669,
 11414.453023076923,
 13208.413799999998,
 11138.378966666667,
 5322.4243999999999,
 8844.1114333333317,
 8554.1605625000011,
 12210.392846153847,
 11700.506978571429,
 11235.866605882355,
 6146.8797999999997,
 8193.9769125000021,
 19090.750104285707,
 6592.1680999999999,
 7628.9788249999992,
 11686.022707142858,
 10517.342887499999,
 12021.3208125,
 6255.5627833333328,
 3512.8069999999993,
 9868.0293124999989,
 15250.851200000001,
 9709.8149000000012,
 5174.9897833333334,
 7978.4649333333318,
 10722.855228571429,
 14886.505266666663,
 9740.3252600000014,
 8607.4292571428559,
 15692.706935000002,
 1930.8622666666663,
 9244.2834307692301,
 15782.165453333335,
 11257.862666666666,
 10521.779261538462,
 10497.908536363637,
 1468.1486666666665,
 3667.5373,
 10825.763644444445,
 4233.0046666666667,
 12527.065041666667,
 15159

In [256]:
np.savez_compressed('/Volumes/data/Theo/projects/Budyko_vic/data/ground_truth_ltETPET.npz',LTET=LTET,LTPET=LTPET,basin=basins)