## Script to clip the index raster to each NHM HRU

Theodore Barnhart | tbarnhart@usgs.gov

In [1]:
import geopandas as gpd
import subprocess
import os

In [2]:
def make_outpath(df):
    outpath = './data/nhrus/AEA_tiffs/HUC_%s_nhruID_%s.tiff'%(df.region,df.nhruID)
    return outpath

In [3]:
def clip_raster(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    subprocess.call(cmd,shell=True)
    
    return None

In [4]:
def runClip(fl,inpath='tmp'):
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    reg = fl.split('_')[-2] # extract the region
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    
    dat.apply(clip_raster,axis=1) # run the clip code
    print('Done!')
    # test that the number of files created is the same as the number of features
    #files = glob.glob('./data/nhrus/AEA_tiffs/HUC_%s_*.tiff'%(reg))
    #if (len(files)-len(dat)) != 0:
    #    return 'Error!'

In [5]:
def is_file(df):
    hru = df.nhruID
    reg = df.region
    return os.path.isfile('./data/nhrus/AEA_tiffs/HUC_%s_nhruID_%s.tiff'%(reg,hru))

In [6]:
def parse_out(out):
    out = str(out.stdout)
    x = int(out.split()[-1].split('L')[0])
    y = int(out.split()[-3].split('P')[0])
    return x,y

In [7]:
def clip_raster_output(df):
    
    cutline = df.cutline
    feature = df.nhruID
    inpath = df.inpath
    outpath = df.outpath
    
    cmd = "gdalwarp -cutline %s -cwhere hru_id_nat=%s -crop_to_cutline -overwrite %s %s"%(cutline,
        feature,inpath,outpath)
    
    out = subprocess.run(cmd,shell=True,stdout=subprocess.PIPE)
    
    return out

In [8]:
def check(fl,inpath='tmp'):
    '''Figure out which HRUS are missing'''
    reg = fl.split('_')[-2] # extract the region
    tmp = gpd.read_file(fl)
    dat = pd.DataFrame({'nhruID':tmp.hru_id_nat.unique()}) #dataframe of the unique NHM HRU identifiers
    del tmp # close tmp file
    
    dat['cutline'] = fl # insert the shapefile as the cutline
    dat['region'] = reg
    dat['inpath'] = inpath # specified
    dat['outpath'] = dat.apply(make_outpath,axis=1)
    dat['region'] = reg # insert the region
    dat['isfile'] = dat.apply(is_file,axis=1)
    
    dat = dat.loc[dat.isfile==False].copy() # select only the missing files
    
    dat['out'] = dat.apply(clip_raster_output,axis=1) # run the clip routine again, but save the output
    res = dat.out.map(parse_out) # parse the output into the raster size trying to be created
    x,y = zip(*res) # unpack the results and put into the dataframe 
    dat['cols'] = x
    dat['rows'] = y
    dat['cells'] = dat.rows*dat.cols
    
    if dat.cells.sum()>0:
        print('non-zero raster!')
    
    return dat

In [9]:
files = glob.glob('./data/nhrus/clean_AEA/*.shp')

In [10]:
files = pd.DataFrame({'file':files})

In [None]:
files.file.apply(runClip,inpath='./data/hrap_grid_AEA_sm_95_25.tiff')

Done!
Done!
Done!


In [105]:
missing = files.file.apply(check,inpath='./data/hrap_grid_AEA_sm_95_25.tiff')

In [115]:
for mis in missing:
    print(mis.cells.sum())

0
0
0
0
0
0


In [116]:
len(files)

6