In [90]:
import pandas as pd
import geopandas as gpd
import numpy as np
import glob as glob
from netCDF4 import Dataset
import rasterio as rs
import progressbar as pb
import sys

#reg = sys.argv[1]
reg = '04'
def get_year(index):
    return index.year

def get_month(index):
    return index.month

def get_day(index):
    return index.day

def get_keys(df,idxraster=[]):
    '''
    Find index raster cell index values.
    '''
    cells = df.cells
    
    x = []
    y = []
    
    if len(cells) != 1:
        for cell in cells:
            xx,yy = np.where(idxraster == cell)
            x.append(xx)
            y.append(yy)
    
    else:
        xx,yy = np.where(idxraster == cells[0])
        x.append(xx)
        y.append(yy)
    
    return x,y

In [91]:
## generate the time aspect of the data
# the Livneh data start at 1915 and end 2015

dates = pd.date_range(start = '1915-01-01', end = '2015-12-31', freq = 'D')
#months = pd.date_range(start = '1915-01', end = '2015-12', freq = 'M')

In [92]:
with rs.open('./data/livneh_idx.tiff') as ds:
    idxRast = np.flipud(ds.read(1)) # flip this to deal with the change from tiff to array

dat = pd.read_pickle('./data/livneh_huc_%s_cell_contrib.pcl'%reg)

# get the index values of each cell
res = dat.apply(get_keys,axis=1,idxraster=idxRast)
x,y = zip(*res)
dat['x'] = x
dat['y'] = y

# now compute some min and max index values and subset the index raster
xs = []
ys = []
for x,y in zip(dat.x,dat.y):
    xs.extend(x)
    ys.extend(y)

xs = np.unique(xs)
ys = np.unique(ys)

# get extents on the local data set needed
minX = xs.min()
maxX = xs.max()
minY = ys.min()
maxY = ys.max()

extents = [minX,maxX,minY,maxY]

idxLocal = idxRast[minX:maxX,minY:maxY] # subset the index raster

res = dat.apply(get_keys,axis=1,idxraster = idxLocal) # recompute local indices to subset the data stack.

x,y = zip(*res)

dat['x_local'] = x
dat['y_local'] = y

In [87]:
def test_idx(df):
    if (len(df.x_local) == 0) | (len(df.y_local) == 0):
        return 1.
    else: 
        return 0

In [93]:
dat['test'] = dat.apply(test_idx,axis=1)

In [94]:
dat.test.sum()

0

In [97]:
dat[980:990]

Unnamed: 0,POI_ID,hru_id_nat,hru_id_reg,region,reg,cells,percents,x,y,x_local,y_local,test
980,15448626,18169,981,4,4,[450888.0],[1.0],[[485]],[[807]],[[73]],[[299]],0
981,15450138,18170,982,4,4,[450889.0],[1.0],[[485]],[[808]],[[73]],[[300]],0
982,15448578,18171,983,4,4,"[450889.0, 450890.0]","[0.7777777777777778, 0.2222222222222222]","[[485], [485]]","[[808], [809]]","[[73], [73]]","[[300], [301]]",0
983,15448574,18172,984,4,4,"[448108.0, 448109.0, 448110.0, 449036.0, 44903...","[0.0033707865168539327, 0.08314606741573034, 0...","[[482], [482], [482], [483], [483], [483], [48...","[[811], [812], [813], [811], [812], [813], [81...","[[70], [70], [70], [71], [71], [71], [71], [72...","[[303], [304], [305], [303], [304], [305], [30...",0
984,15448640,18173,985,4,4,"[448111.0, 448112.0, 448113.0, 449038.0, 44903...","[0.006802721088435374, 0.07482993197278912, 0....","[[482], [482], [482], [483], [483], [483], [48...","[[814], [815], [816], [813], [814], [815], [81...","[[70], [70], [70], [71], [71], [71], [72], [72...","[[306], [307], [308], [305], [306], [307], [30...",0
985,15448640,18174,986,4,4,"[448112.0, 448113.0, 449040.0, 449041.0, 44996...","[0.003171247357293869, 0.039112050739957716, 0...","[[482], [482], [483], [483], [484], [484], [48...","[[815], [816], [815], [816], [814], [815], [81...","[[70], [70], [71], [71], [72], [72], [72], [72...","[[307], [308], [307], [308], [306], [307], [30...",0
986,15456836,18175,987,4,4,"[449958.0, 449959.0, 450886.0, 450887.0]","[0.26063829787234044, 0.06914893617021277, 0.2...","[[484], [484], [485], [485]]","[[805], [806], [805], [806]]","[[72], [72], [73], [73]]","[[297], [298], [297], [298]]",0
987,15450454,18176,988,4,4,"[445327.0, 445328.0, 446255.0, 446256.0, 44718...","[0.024793388429752067, 0.045454545454545456, 0...","[[479], [479], [480], [480], [481], [481], [48...","[[814], [815], [814], [815], [814], [815], [81...","[[67], [67], [68], [68], [69], [69], [70], [70...","[[306], [307], [306], [307], [306], [307], [30...",0
988,15458306,18177,989,4,4,"[442539.0, 442540.0, 442541.0, 443467.0, 44346...","[0.041666666666666664, 0.4097222222222222, 0.1...","[[476], [476], [476], [477], [477], [477]]","[[810], [811], [812], [810], [811], [812]]","[[64], [64], [64], [65], [65], [65]]","[[302], [303], [304], [302], [303], [304]]",0
989,15458360,18178,990,4,4,"[441611.0, 441612.0, 442538.0, 442539.0, 44254...","[0.01694915254237288, 0.01694915254237288, 0.3...","[[475], [475], [476], [476], [476]]","[[810], [811], [809], [810], [811]]","[[63], [63], [64], [64], [64]]","[[302], [303], [301], [302], [303]]",0


In [96]:
len(dat)

5936

In [71]:
def get_fractional_date(fl):
    yearMonth = fl.split('.')[-2]
    year = float(yearMonth[0:4])
    month = float(yearMonth[4:6])-0.5
    
    return year + (month/12.)

In [76]:
# create and sort a data frame to ensure that files are read in the correct order.
livneh = pd.DataFrame()
livneh['files'] = glob.glob('/home/tbarnhart/projects/NHM_precipitation/data/livneh2016/*.nc')
livneh['date'] = livneh.files.map(get_fractional_date)
livneh.sort_values('date',inplace=True,ascending=True)

livneh.reset_index()
fl = livneh.files[0]
liv = Dataset(fl)
Tmin = np.array(liv.variables['Tmin'][:,minX:maxX,minY:maxY],dtype=np.float64)
Tmax = np.array(liv.variables['Tmax'][:,minX:maxX,minY:maxY],dtype=np.float64)
Prec = np.array(liv.variables['Prec'][:,minX:maxX,minY:maxY],dtype=np.float64)

for fl in livneh.files[1:]:
    liv = Dataset(fl)
    Tmin = np.concatenate((Tmin,np.array(liv.variables['Tmin'][:,minX:maxX,minY:maxY],dtype=np.float64)),axis=0)
    Tmax = np.concatenate((Tmax,np.array(liv.variables['Tmax'][:,minX:maxX,minY:maxY],dtype=np.float64)),axis=0)
    Prec = np.concatenate((Prec,np.array(liv.variables['Prec'][:,minX:maxX,minY:maxY],dtype=np.float64)),axis=0)
    
noData = 1e+20

# handle no data values:
Tmin[Tmin == noData] = np.NaN
Tmax[Tmax == noData] = np.NaN
Prec[Prec == noData] = np.NaN

In [72]:
# now loop through each nhru in the region
# prepair the output data frame
out = pd.DataFrame()
out['datetime'] = dates
out.index = pd.DatetimeIndex(out.datetime)
out['year'] = out.index.map(get_year)
out['month'] = out.index.map(get_month)
out['day'] = out.index.map(get_day)
out['hour'] = 0
out['minute'] = 0
out['second'] = 0

for hru in dat.hru_id_reg: # create space for each HRU
    out['hru_%s'%hru] = -999
    
del out['datetime'] # clean up

Pout = out.copy()
Tminout = out.copy()
Tmaxout = out.copy()

KeyboardInterrupt: 

In [33]:
for hru,x,y,percents in zip(dat.hru_id_reg,dat.x_local,dat.y_local,dat.percents):
    PrecTmp = Prec[:,x,y]
    TminTmp = Tmin[:,x,y]
    TmaxTmp = Tmax[:,x,y]
    
    n,m,k = PrecTmp.shape
    percents = np.reshape(percents,(1,m,k))
    percents = np.repeat(percents,n,axis=0)
    
    PrecTmp = np.nansum(PrecTmp * percents,axis=0)
    TminTmp = np.nansum(TminTmp * percents,axis=0)
    TmaxTmp = np.nansum(TmaxTmp * percents,axis=0)
    
    #convert units
    Pout['hru_%s'%hru] = PrecTmp * 0.0393701 # mm >> inches
    Tminout['hru_%s'%hru] = (TminTmp * (9./5.)) + 32 # deg C >> Deg F
    Tmaxout['hru_%s'%hru] = (TmaxTmp * (9./5.)) + 32. # deg C >> Deg F

In [None]:
# save the data
Pout.to_pickle('/home/tbarnhart/projects/NHM_precipitation/data/livneh_Prec_reg_%s.pcl'%reg)
Tminout.to_pickle('/home/tbarnhart/projects/NHM_precipitation/data/livneh_Tmin_reg_%s.pcl'%reg)
Tmaxout.to_pickle('/home/tbarnhart/projects/NHM_precipitation/data/livneh_Tmax_reg_%s.pcl'%reg)