In [49]:
import pandas as pd
import geopandas as gpd
import numpy as np
import glob as glob
from netCDF4 import Dataset
import rasterio as rs

def get_year(index):
    return index.year

def get_month(index):
    return index.month

def get_day(index):
    return index.day

In [6]:
## generate the time aspect of the data
# the Livneh data start at 1915 and end 2015

dates = pd.date_range(start = '1915-01-01', end = '2015-12-31', freq = 'D')
months = pd.date_range(start = '1915-01', end = '2015-12', freq = 'M')

In [26]:
# bring in the contribution file:
dat = pd.read_pickle('./data/livneh_huc_02_cell_contrib.pcl')
dat.sort_values('hru_id_reg', inplace=True, ascending=True) # sort by regional hru ID

# I think the way to do this is to look through each netCDF file and extract each HRU before moving on to the next.

# prepair the output data frame
out = pd.DataFrame()
out['datetime'] = dates
out.index = pd.DatetimeIndex(out.datetime)
out['year'] = out.index.map(get_year)
out['month'] = out.index.map(get_month)
out['day'] = out.index.map(get_day)
out['hour'] = 0
out['minute'] = 0
out['second'] = 0

for hru in dat.hru_id_reg: # create space for each HRU
    out['hru_%s'%hru] = -999
    
del out['datetime'] # clean up

P = out.copy()
Tmin = out.copy()
Tmax = out.copy()

In [None]:
processing = pd.DataFrame()
processing['file'] = glob.glob('/home/tbarnhart/projects/NHM_precipitation/data/livneh2016/*.nc') # grab all the livneh files

In [None]:
def process_ncdf(fl,contrib=[],idx=[]):
    '''Process a livneh netCDF file based on the contribution file.
    Inputs:
    fl - filename to the livneh product
    contrib - dataframe indicating which cells contribute to each HRU
    idx - index raster for identifying each cell
    
    '''
    noData = 1e+20
    
    # load the data:
    liv = Dataset(fl)
    TempMin = np.array(ds.variables['Tmin'],dtype=float64)
    TempMax = np.array(ds.variables['Tmax'],dtype=float64)
    Precip = np.array(ds.variables['Prec'],dtype=float64)
    
    n,m = TempMin.shape
    TempMin.shape = n*m
    TempMax.shape = n*m
    Precip.shape = n*m
    
    # handle no data values:
    TempMin[TempMin == noData] = np.NaN
    TempMax[TempMax == noData] = np.NaN
    Precip[Precip == noData] = np.NaN
    
    # parse the file into the time period to cover:
    year = fl.split('.')[-2][0:4]
    month = fl.split('.')[-2][4:]
    dates = pd.DataFrame() 
    dates['datetime'] = pd.date_range(start = '%s-%s-01'%(year,month),periods = 35,freq='D')
    dates['month'] = dates.datetime.map(get_month)
    dates = dates.loc[dates.month == int(month)] # trim the dataframe to the month and year in question
    
    for date in dates.datetime: # iterate through each date
        Pout,Tminout,tmaxout = dat.appy(compute_vals,axis=1,idx=livneh_indexRaster)
    
    
        # insert results into their data frames
        P.loc[P.index == date,6:] = Pout
        Tmin.loc[Tmin.index == date,6:] = Tminout
        Tmax.loc[Tmax.index == date,6:] = Tmaxout

In [50]:
def compute_vals(df,idx=[],TempMin = [], TempMax = [], Precip = []):
    percents = np.array(df.percents)
    cells = np.array(df.cells)
    
    # subset out the cells of interest
    Precip = Precip[cells]
    TempMax = TempMax[cells]
    TempMin = TempMin[cells]
    
    # compute the weights followng:
    # https://github.com/theobarnhart/WSC_WRF/blob/master/extract_watershed_data_HW.ipynb
    # at commit: b06e99fdf404536af2c07766a53c3759763c9845

    weights = np.ndarray(len(percents),dtype=np.float64) # preallocate the weights matrix
    weights[:] = 1./len(percents) # fill the weights with 1/n where n is the number of cells feeding into the hru
    weights = weights * percents # change the weights to
    
    Pout = np.sum(Precip*weights) # precip in mm, propogate NaNs
    Tminout = np.sum(Precip*weights) # Tmin in C, propogate NaNs
    Tmaxout = np.sum(Precip*weights) # Tmax in C, propogate NaNs
    
    #convert units
    Pout *= 0.0393701 # mm >> inches
    
    
    return Pout,Tminout,Tmaxout

Unnamed: 0,POI_ID,hru_id_nat,hru_id_reg,region,reg,cells,percents
0,10054896,2463,1,02,02,"[328359.0, 329287.0, 329288.0]","[0.03, 0.14, 0.03]"
1,10054896,2464,2,02,02,"[327432.0, 328359.0, 328360.0, 328361.0, 32928...","[0.05, 0.42, 0.87, 0.03, 0.04, 0.62, 0.18, 0.01]"
2,10054884,2465,3,02,02,"[329284.0, 329285.0, 329286.0, 329287.0, 33021...","[0.04, 0.23, 0.37, 0.33, 0.03, 0.65, 0.63, 0.06]"
3,10054884,2466,4,02,02,"[329287.0, 330214.0, 330215.0]","[0.11, 0.12, 0.34]"
4,8526515,2467,5,02,02,"[338512.0, 338513.0, 339440.0, 339441.0]","[0.09, 0.02, 0.07, 0.05]"
5,10055148,2468,6,02,02,"[329287.0, 329288.0, 330215.0, 330216.0, 33114...","[0.03, 0.02, 0.32, 0.36, 0.13, 0.26]"
6,10055148,2469,7,02,02,"[329288.0, 329289.0, 330216.0, 330217.0, 33021...","[0.33, 0.22, 0.58, 0.83, 0.07, 0.45, 0.04]"
7,8526515,2470,8,02,02,"[338511.0, 338512.0, 339440.0]","[0.06, 0.24, 0.33]"
8,8525377,2471,9,02,02,"[338507.0, 339435.0, 339436.0, 339437.0, 34036...","[0.13, 0.17, 0.38, 0.31, 0.03]"
9,8525453,2472,10,02,02,"[338508.0, 338509.0, 338510.0, 339437.0, 33943...","[0.01, 0.58, 0.31, 0.17, 0.68, 0.24]"
