In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import glob as glob
from netCDF4 import Dataset
import rasterio as rs

def get_year(index):
    return index.year

def get_month(index):
    return index.month

def get_day(index):
    return index.day

In [2]:
## generate the time aspect of the data
# the Livneh data start at 1915 and end 2015

dates = pd.date_range(start = '1915-01-01', end = '2015-12-31', freq = 'D')
months = pd.date_range(start = '1915-01', end = '2015-12', freq = 'M')

In [4]:
# bring in the contribution file:
dat = pd.read_pickle('./data/livneh_huc_02_cell_contrib.pcl')
dat.sort_values('hru_id_reg', inplace=True, ascending=True) # sort by regional hru ID

# I think the way to do this is to look through each netCDF file and extract each HRU before moving on to the next.

# prepair the output data frame
out = pd.DataFrame()
out['datetime'] = dates
out.index = pd.DatetimeIndex(out.datetime)
out['year'] = out.index.map(get_year)
out['month'] = out.index.map(get_month)
out['day'] = out.index.map(get_day)
out['hour'] = 0
out['minute'] = 0
out['second'] = 0

for hru in dat.hru_id_reg: # create space for each HRU
    out['hru_%s'%hru] = -999
    
del out['datetime'] # clean up

P = out.copy()
Tmin = out.copy()
Tmax = out.copy()

with rs.open('./data/livneh_idx.tiff') as ds:
    idxRast = ds.read(1)

n,m = idxRast.shape
idxRast.shape = n*m

In [5]:
processing = pd.DataFrame()
processing['file'] = glob.glob('/home/tbarnhart/projects/NHM_precipitation/data/livneh2016/*.nc') # grab all the livneh files

In [None]:
def process_day(df):

In [10]:
def compute_vals(df,idx=[],TempMin = [], TempMax = [], Precip = []):
    percents = np.array(df.percents)
    cells = np.array(df.cells)
    
    # convert cells into index values
    orig_indices = idxRast.argsort() # save original indicies
    keys = orig_indices[np.searchsorted(idxRast[orig_indices],cells)] # use search sorted to find keys, but preserve order
    # https://stackoverflow.com/questions/9566592/find-multiple-values-within-a-numpy-array
    
    # subset out the cells of interest
    Precip = Precip[keys]
    TempMax = TempMax[keys]
    TempMin = TempMin[keys]
    
    # compute the weights followng:
    # https://github.com/theobarnhart/WSC_WRF/blob/master/extract_watershed_data_HW.ipynb
    # at commit: b06e99fdf404536af2c07766a53c3759763c9845

    weights = np.ndarray(len(percents),dtype=np.float64) # preallocate the weights matrix
    weights[:] = 1./len(percents) # fill the weights with 1/n where n is the number of cells feeding into the hru
    weights = weights * percents # change the weights to
    
    Pout = np.sum(Precip*weights) # precip in mm, propogate NaNs
    Tminout = np.sum(Precip*weights) # Tmin in C, propogate NaNs
    Tmaxout = np.sum(Precip*weights) # Tmax in C, propogate NaNs
    
    #convert units
    Pout *= 0.0393701 # mm >> inches
    Tminout = (Tminout * (9./5.)) + 32. # deg C >> Deg F
    Tmaxout = (Tmaxout * (9./5.)) + 32. # deg C >> Deg F
    
    return [Pout,Tminout,Tmaxout]

In [11]:
def process_ncdf(df,contrib=[],idx=[]):
    '''Process a livneh netCDF file based on the contribution file.
    Inputs:
    fl - filename to the livneh product
    contrib - dataframe indicating which cells contribute to each HRU
    idx - index raster for identifying each cell
    
    '''
    noData = 1e+20
    
    # load the data:
    liv = Dataset(df.file)
    TempMin = np.array(liv.variables['Tmin'][:],dtype=np.float64)
    TempMax = np.array(liv.variables['Tmax'][:],dtype=np.float64)
    Precip = np.array(liv.variables['Prec'][:],dtype=np.float64)
    
    # handle no data values:
    TempMin[TempMin == noData] = np.NaN
    TempMax[TempMax == noData] = np.NaN
    Precip[Precip == noData] = np.NaN
    
    # parse the file into the time period to cover:
    year = df.file.split('.')[-2][0:4]
    month = df.file.split('.')[-2][4:]
    dates = pd.DataFrame() 
    dates['datetime'] = pd.date_range(start = '%s-%s-01'%(year,month),periods = 35,freq='D')
    dates['month'] = dates.datetime.map(get_month)
    dates = dates.loc[dates.month == int(month)] # trim the dataframe to the month and year in question
    
    dayIDX = 0 # initialize a day counter
    for date in dates.datetime: # iterate through each date
        n,m = TempMin[dayIDX,:,:].shape # pull only one slice of the data cooresponding to the day being processed
        TempMinIn = np.reshape(TempMin[dayIDX,:,:],n*m)
        TempMaxIn = np.reshape(TempMax[dayIDX,:,:],n*m)
        PrecipIn = np.reshape(Precip[dayIDX,:,:],n*m)
        
        res = dat.apply(compute_vals,axis=1,idx=idx, TempMin = TempMinIn, TempMax = TempMaxIn, Precip = PrecipIn)        
    
        Pout,Tminout,tmaxout = zip(*res)
        
        # insert results into their data frames
        P.loc[P.index == date,6:] = Pout
        Tmin.loc[Tmin.index == date,6:] = Tminout
        Tmax.loc[Tmax.index == date,6:] = Tmaxout
        dayIDX += 1. # increment the day counter

In [12]:
processing.apply(process_ncdf,axis=1,contrib = dat,idx = idxRast)

KeyboardInterrupt: 

In [33]:
dat.head(21)

Unnamed: 0,POI_ID,hru_id_nat,hru_id_reg,region,reg,cells,percents
0,10054896,2463,1,2,2,"[328359.0, 329287.0, 329288.0]","[0.03, 0.14, 0.03]"
1,10054896,2464,2,2,2,"[327432.0, 328359.0, 328360.0, 328361.0, 32928...","[0.05, 0.42, 0.87, 0.03, 0.04, 0.62, 0.18, 0.01]"
2,10054884,2465,3,2,2,"[329284.0, 329285.0, 329286.0, 329287.0, 33021...","[0.04, 0.23, 0.37, 0.33, 0.03, 0.65, 0.63, 0.06]"
3,10054884,2466,4,2,2,"[329287.0, 330214.0, 330215.0]","[0.11, 0.12, 0.34]"
4,8526515,2467,5,2,2,"[338512.0, 338513.0, 339440.0, 339441.0]","[0.09, 0.02, 0.07, 0.05]"
5,10055148,2468,6,2,2,"[329287.0, 329288.0, 330215.0, 330216.0, 33114...","[0.03, 0.02, 0.32, 0.36, 0.13, 0.26]"
6,10055148,2469,7,2,2,"[329288.0, 329289.0, 330216.0, 330217.0, 33021...","[0.33, 0.22, 0.58, 0.83, 0.07, 0.45, 0.04]"
7,8526515,2470,8,2,2,"[338511.0, 338512.0, 339440.0]","[0.06, 0.24, 0.33]"
8,8525377,2471,9,2,2,"[338507.0, 339435.0, 339436.0, 339437.0, 34036...","[0.13, 0.17, 0.38, 0.31, 0.03]"
9,8525453,2472,10,2,2,"[338508.0, 338509.0, 338510.0, 339437.0, 33943...","[0.01, 0.58, 0.31, 0.17, 0.68, 0.24]"


In [34]:
dat.loc[dat.hru_id_nat == 3100]

Unnamed: 0,POI_ID,hru_id_nat,hru_id_reg,region,reg,cells,percents
637,8466793,3100,638,2,2,[353396.0],[1.0]


In [35]:
tmp = []

In [36]:
tmp

[]

In [37]:
len(tmp)

0

In [40]:
dat = pd.read_pickle('./data/livneh_huc_02_cell_contrib.pcl')

In [41]:
dat.head(21)

Unnamed: 0,POI_ID,hru_id_nat,hru_id_reg,region,reg,cells,percents
0,10054896,2463,1,2,2,"[328359.0, 329287.0, 329288.0]","[0.03, 0.14, 0.03]"
1,10054896,2464,2,2,2,"[327432.0, 328359.0, 328360.0, 328361.0, 32928...","[0.05, 0.42, 0.87, 0.03, 0.04, 0.62, 0.18, 0.01]"
2,10054884,2465,3,2,2,"[329284.0, 329285.0, 329286.0, 329287.0, 33021...","[0.04, 0.23, 0.37, 0.33, 0.03, 0.65, 0.63, 0.06]"
3,10054884,2466,4,2,2,"[329287.0, 330214.0, 330215.0]","[0.11, 0.12, 0.34]"
4,8526515,2467,5,2,2,"[338512.0, 338513.0, 339440.0, 339441.0]","[0.09, 0.02, 0.07, 0.05]"
5,10055148,2468,6,2,2,"[329287.0, 329288.0, 330215.0, 330216.0, 33114...","[0.03, 0.02, 0.32, 0.36, 0.13, 0.26]"
6,10055148,2469,7,2,2,"[329288.0, 329289.0, 330216.0, 330217.0, 33021...","[0.33, 0.22, 0.58, 0.83, 0.07, 0.45, 0.04]"
7,8526515,2470,8,2,2,"[338511.0, 338512.0, 339440.0]","[0.06, 0.24, 0.33]"
8,8525377,2471,9,2,2,"[338507.0, 339435.0, 339436.0, 339437.0, 34036...","[0.13, 0.17, 0.38, 0.31, 0.03]"
9,8525453,2472,10,2,2,"[338508.0, 338509.0, 338510.0, 339437.0, 33943...","[0.01, 0.58, 0.31, 0.17, 0.68, 0.24]"
