## Script to clip the index raster to each NHM HRU

Theodore Barnhart | tbarnhart@usgs.gov

In [13]:
import geopandas as gpd
import subprocess
import os
import pandas as pd
import numpy as np

In [2]:
def make_outpath(df):
    outpath = './data/nhrus/AEA_tiffs/livneh_HUC_%s_nhruID_%s.tiff'%(df.region,df.nhruID)
    return outpath

In [3]:
def clip_raster(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    subprocess.call(cmd,shell=True)
    
    return None

In [41]:
def runClip(fl,inpath='tmp'):
    reg = fl.split('_')[-2] # extract the region
    
    tmp = gpd.read_file(fl)
    
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    
    dat.apply(clip_raster,axis=1) # run the clip code
    print('HUC%s Done!'%reg)

In [5]:
def is_file(df):
    hru = df.nhruID
    reg = df.region
    return os.path.isfile('./data/nhrus/AEA_tiffs/livneh_HUC_%s_nhruID_%s.tiff'%(reg,hru))

In [6]:
def parse_out(out):
    out = str(out.stdout)
    x = int(out.split()[-1].split('L')[0])
    y = int(out.split()[-3].split('P')[0])
    return x,y

In [7]:
def clip_raster_output(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    out = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE)
    
    return out

In [8]:
def check(fl,inpath='tmp'):
    '''Figure out which HRUS are missing'''
    reg = fl.split('_')[-2] # extract the region
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    dat['region'] = reg # insert the region
    dat['isfile'] = dat.apply(is_file,axis=1)
    
    dat2 = dat.loc[dat.isfile==False].copy() # select only the missing files
    
    if len(dat2) > 0:
        dat2['out'] = dat2.apply(clip_raster_output,axis=1) # run the clip routine again, but save the output
        res = dat2.out.map(parse_out) # parse the output into the raster size trying to be created
        x,y = zip(*res) # unpack the results and put into the dataframe 
        dat2['cols'] = x
        dat2['rows'] = y
        dat2['cells'] = dat2.rows*dat2.cols
    
        if dat2.cells.sum()>0:
            print('non-zero raster!')
    
        return dat2
    
    elif len(dat) - dat.isfile.sum() == 0:
        print('%s Complete!'%reg)

In [29]:
#newRegions = ['01','02','03','04','05','06','13','14','15','16','17','18','20','21']
newRegions = ['12','13','14','15','16','17','18']

In [30]:
# make new files
files = []
for reg in newRegions:
    files.append('./data/nhrus/clean_AEA/nhru_%s_clean.shp'%reg)

In [31]:
files

['./data/nhrus/clean_AEA/nhru_12_clean.shp',
 './data/nhrus/clean_AEA/nhru_13_clean.shp',
 './data/nhrus/clean_AEA/nhru_14_clean.shp',
 './data/nhrus/clean_AEA/nhru_15_clean.shp',
 './data/nhrus/clean_AEA/nhru_16_clean.shp',
 './data/nhrus/clean_AEA/nhru_17_clean.shp',
 './data/nhrus/clean_AEA/nhru_18_clean.shp']

In [32]:
files = pd.DataFrame({'file':files})

In [33]:
files.head()

Unnamed: 0,file
0,./data/nhrus/clean_AEA/nhru_12_clean.shp
1,./data/nhrus/clean_AEA/nhru_13_clean.shp
2,./data/nhrus/clean_AEA/nhru_14_clean.shp
3,./data/nhrus/clean_AEA/nhru_15_clean.shp
4,./data/nhrus/clean_AEA/nhru_16_clean.shp


In [None]:
files.file.apply(runClip,inpath='./data/livneh_idx_sm.tiff')

In [36]:
tmp = gpd.read_file(files.file[0])

In [38]:
tmp.head()

Unnamed: 0,OBJECTID,POI_ID,hru_id_reg,hru_segmen,hru_segm_1,hru_segm_2,hru_x,hru_y,hru_lat,hru_area,region,Shape_Leng,Shape_Area,geometry
0,1,1576420,1,1038,1038,,62821.854643,649887.98863,28.919997,2089.614517,12,24984.695728,8456370.0,POLYGON ((453526.4797736619 -1772745.158867018...
1,2,1576420,2,1038,1038,,64297.004513,649641.869018,28.917682,1295.495853,12,24497.925574,5242686.0,POLYGON ((460726.9014111806 -1767496.801150818...
2,3,1576362,3,1037,1037,,70087.490888,654763.999861,28.963574,849.96358,12,15252.318977,3439681.0,POLYGON ((464186.3697744372 -1764735.323414906...
3,4,1576348,4,0,0,,63959.798297,648495.038764,28.907342,2917.601485,12,60645.018581,11807110.0,"POLYGON ((467299.732520635 -1761601.499437819,..."
4,5,1576436,5,1039,1039,,71313.778846,656002.10832,28.974674,894.72327,12,14914.280679,3620817.0,POLYGON ((467437.8521657833 -1761654.857633956...


In [19]:
missing = files.file.apply(check,inpath='./data/hrap_grid_AEA_idx_sm.tiff')

01 Complete!
02 Complete!
03 Complete!
05 Complete!
06 Complete!
13 Complete!
14 Complete!
15 Complete!
16 Complete!
17 Complete!
18 Complete!
20 Complete!
21 Complete!


In [25]:
missing

0                                                  None
1                                                  None
2                                                  None
3           nhruID                                  ...
4                                                  None
5                                                  None
6                                                  None
7                                                  None
8                                                  None
9                                                  None
10                                                 None
11                                                 None
12                                                 None
13                                                 None
Name: file, dtype: object

In [24]:
missing[3]

Unnamed: 0,nhruID,cutline,region,inpath,outpath,isfile,out,cols,rows,cells
116,17305,./data/nhrus/clean_AEA/nhru_04_clean.shp,4,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_04_nhruID_17305.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0
3746,20935,./data/nhrus/clean_AEA/nhru_04_clean.shp,4,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_04_nhruID_20935.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,0,0
3835,21024,./data/nhrus/clean_AEA/nhru_04_clean.shp,4,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_04_nhruID_21024.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,1,0,0
4538,21727,./data/nhrus/clean_AEA/nhru_04_clean.shp,4,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_04_nhruID_21727.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,0,0
5067,22256,./data/nhrus/clean_AEA/nhru_04_clean.shp,4,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_04_nhruID_22256.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,1,0,0


## Code the few remaining cells by hand

In [26]:
reg04_NHRU = [17305,20935,21024,21727,22256]
reg04_cells = [[296823],[354989],[356077],[306739],[260807]]
reg04_percents = [[1],[1],[1],[1],[1]]

In [27]:
reg08 = pd.DataFrame()
reg08['nhruID'] = reg04_NHRU
reg08['reg_hruID'] = [117,3747,3836,4539,5068]
reg08['cells'] = reg04_cells
reg08['percents'] = reg04_percents
reg08['reg'] = '04'

In [28]:
reg08.to_pickle('./data/reg04_unclipped.pcl')

In [15]:
missing[1]

Unnamed: 0,nhruID,cutline,region,inpath,outpath,isfile,out,cols,rows,cells
2,40817,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40817.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,1,0,0
95,40910,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40910.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0
111,40926,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40926.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,0,0


In [23]:
reg08_NHRU = [40817,40910,40926,40826,40827,40891,40908,41064]
reg08_cells = [[788822],[780963],[772030],[778776],[778776],[776509],[780963],[760827]]
reg08_percents = [[1],[1],[1],[1],[1],[1],[1],[1]]

In [25]:
reg08 = pd.DataFrame()
reg08['nhruID'] = reg08_NHRU
reg08['reg_hruID'] = [3,96,112,12,13,77,94,250]
reg08['cells'] = reg08_cells
reg08['percents'] = reg08_percents
reg08['reg'] = '08'

In [26]:
reg08.to_pickle('./data/reg08_unclipped.pcl')

In [27]:
missing[4]

Unnamed: 0,nhruID,cutline,region,inpath,outpath,isfile,out,cols,rows,cells
8081,63665,./data/nhrus/clean_AEA/nhru_10U_clean.shp,10U,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_10U_nhruID_63665.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0
8097,63681,./data/nhrus/clean_AEA/nhru_10U_clean.shp,10U,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_10U_nhruID_63681.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0


In [28]:
reg10U_NHRU = [63665,63681]
reg10U_cells = [[354526],[354513]]
reg10U_percents = [[1],[1]]

In [29]:
reg10U = pd.DataFrame()
reg10U['nhruID'] = reg10U_NHRU
reg10U['reg_hruID'] = [8082,8098]
reg10U['cells'] = reg10U_cells
reg10U['percents'] = reg10U_percents
reg10U['reg'] = '10U'

In [30]:
reg10U.to_pickle('./data/reg10U_unclipped.pcl')