## Script to clip the index raster to each NHM HRU

Theodore Barnhart | tbarnhart@usgs.gov

In [1]:
import geopandas as gpd
import subprocess
import os

In [2]:
def make_outpath(df):
    outpath = './data/nhrus/AEA_tiffs/HUC_%s_nhruID_%s.tiff'%(df.region,df.nhruID)
    return outpath

In [3]:
def clip_raster(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    subprocess.call(cmd,shell=True)
    
    return None

In [4]:
def clip_raster_12(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    subprocess.call(cmd,shell=True)
    
    return None

In [5]:
def runClip(fl,inpath='tmp'):
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    reg = fl.split('_')[-2] # extract the region
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    
    dat.apply(clip_raster,axis=1) # run the clip code
    print('HUC%s Done!'%reg)
    # test that the number of files created is the same as the number of features
    #files = glob.glob('./data/nhrus/AEA_tiffs/HUC_%s_*.tiff'%(reg))
    #if (len(files)-len(dat)) != 0:
    #    return 'Error!'

In [6]:
def runClip_12(fl,inpath='tmp'):
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    reg = fl.split('_')[-2] # extract the region
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    
    dat.apply(clip_raster_12,axis=1) # run the clip code
    print('HUC%s Done!'%reg)
    # test that the number of files created is the same as the number of features
    #files = glob.glob('./data/nhrus/AEA_tiffs/HUC_%s_*.tiff'%(reg))
    #if (len(files)-len(dat)) != 0:
    #    return 'Error!'

In [7]:
def is_file(df):
    hru = df.nhruID
    reg = df.region
    return os.path.isfile('./data/nhrus/AEA_tiffs/HUC_%s_nhruID_%s.tiff'%(reg,hru))

In [8]:
def parse_out(out):
    out = str(out.stdout)
    x = int(out.split()[-1].split('L')[0])
    y = int(out.split()[-3].split('P')[0])
    return x,y

In [9]:
def clip_raster_output(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    out = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE)
    
    return out

In [10]:
def clip_raster_output_12(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    out = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE)
    
    return out

In [11]:
def check(fl,inpath='tmp'):
    '''Figure out which HRUS are missing'''
    reg = fl.split('_')[-2] # extract the region
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    dat['region'] = reg # insert the region
    dat['isfile'] = dat.apply(is_file,axis=1)
    
    dat2 = dat.loc[dat.isfile==False].copy() # select only the missing files
    
    if len(dat2) > 0:
        dat2['out'] = dat2.apply(clip_raster_output,axis=1) # run the clip routine again, but save the output
        res = dat2.out.map(parse_out) # parse the output into the raster size trying to be created
        x,y = zip(*res) # unpack the results and put into the dataframe 
        dat2['cols'] = x
        dat2['rows'] = y
        dat2['cells'] = dat2.rows*dat2.cols
    
        if dat2.cells.sum()>0:
            print('non-zero raster!')
    
        return dat2
    
    elif len(dat) - dat.isfile.sum() == 0:
        print('%s Complete!'%reg)

In [12]:
def check_12(fl,inpath='tmp'):
    '''Figure out which HRUS are missing'''
    reg = fl.split('_')[-2] # extract the region
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    dat['region'] = reg # insert the region
    dat['isfile'] = dat.apply(is_file,axis=1)
    
    dat2 = dat.loc[dat.isfile==False].copy() # select only the missing files
    
    if len(dat2) > 0:
        dat2['out'] = dat2.apply(clip_raster_output_12,axis=1) # run the clip routine again, but save the output
        res = dat2.out.map(parse_out) # parse the output into the raster size trying to be created
        x,y = zip(*res) # unpack the results and put into the dataframe 
        dat2['cols'] = x
        dat2['rows'] = y
        dat2['cells'] = dat2.rows*dat2.cols
    
        if dat2.cells.sum()>0:
            print('non-zero raster!')
    
        return dat2
    
    elif len(dat) - dat.isfile.sum() == 0:
        print('%s Complete!'%reg)

In [13]:
newRegions = ['01','02','03','04','05','06','13','14','15','16','17','18','20','21']

In [14]:
# make new files
files = []
for reg in newRegions:
    files.append('./data/nhrus/clean_AEA/nhru_%s_clean.shp'%reg)

In [15]:
files

['./data/nhrus/clean_AEA/nhru_01_clean.shp',
 './data/nhrus/clean_AEA/nhru_02_clean.shp',
 './data/nhrus/clean_AEA/nhru_03_clean.shp',
 './data/nhrus/clean_AEA/nhru_04_clean.shp',
 './data/nhrus/clean_AEA/nhru_05_clean.shp',
 './data/nhrus/clean_AEA/nhru_06_clean.shp',
 './data/nhrus/clean_AEA/nhru_13_clean.shp',
 './data/nhrus/clean_AEA/nhru_14_clean.shp',
 './data/nhrus/clean_AEA/nhru_15_clean.shp',
 './data/nhrus/clean_AEA/nhru_16_clean.shp',
 './data/nhrus/clean_AEA/nhru_17_clean.shp',
 './data/nhrus/clean_AEA/nhru_18_clean.shp',
 './data/nhrus/clean_AEA/nhru_20_clean.shp',
 './data/nhrus/clean_AEA/nhru_21_clean.shp']

In [17]:
files = pd.DataFrame({'file':files})

In [None]:
files.file.apply(runClip,inpath='./data/hrap_grid_AEA_idx_sm.tiff')

In [19]:
missing = files.file.apply(check,inpath='./data/hrap_grid_AEA_idx_sm.tiff')

## Code the few remaining cells by hand

In [15]:
missing[1]

Unnamed: 0,nhruID,cutline,region,inpath,outpath,isfile,out,cols,rows,cells
2,40817,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40817.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,1,0,0
95,40910,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40910.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0
111,40926,./data/nhrus/clean_AEA/nhru_08_clean.shp,8,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_08_nhruID_40926.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,0,0


In [23]:
reg08_NHRU = [40817,40910,40926,40826,40827,40891,40908,41064]
reg08_cells = [[788822],[780963],[772030],[778776],[778776],[776509],[780963],[760827]]
reg08_percents = [[1],[1],[1],[1],[1],[1],[1],[1]]

In [25]:
reg08 = pd.DataFrame()
reg08['nhruID'] = reg08_NHRU
reg08['reg_hruID'] = [3,96,112,12,13,77,94,250]
reg08['cells'] = reg08_cells
reg08['percents'] = reg08_percents
reg08['reg'] = '08'

In [26]:
reg08.to_pickle('./data/reg08_unclipped.pcl')

In [27]:
missing[4]

Unnamed: 0,nhruID,cutline,region,inpath,outpath,isfile,out,cols,rows,cells
8081,63665,./data/nhrus/clean_AEA/nhru_10U_clean.shp,10U,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_10U_nhruID_63665.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0
8097,63681,./data/nhrus/clean_AEA/nhru_10U_clean.shp,10U,./data/hrap_grid_AEA_idx_sm.tiff,./data/nhrus/AEA_tiffs/HUC_10U_nhruID_63681.tiff,False,CompletedProcess(args='gdalwarp -cutline ./dat...,0,1,0


In [28]:
reg10U_NHRU = [63665,63681]
reg10U_cells = [[354526],[354513]]
reg10U_percents = [[1],[1]]

In [29]:
reg10U = pd.DataFrame()
reg10U['nhruID'] = reg10U_NHRU
reg10U['reg_hruID'] = [8082,8098]
reg10U['cells'] = reg10U_cells
reg10U['percents'] = reg10U_percents
reg10U['reg'] = '10U'

In [30]:
reg10U.to_pickle('./data/reg10U_unclipped.pcl')