## Clean NHRU Shapefiles to that they can be used to crop an index raster

In [1]:
import geopandas as gpd
import subprocess

In [2]:
def check_val(geom):
    return geom.is_valid # return true if geometry is valid in shapely

In [3]:
def fix_geometry(df):
    if df.geometry.is_valid: # if the geometry is valid in shapely, do nothing
        return df.geometry
    
    elif df.geometry.is_valid==False: # if the geometry is invalid, fix it with a buffer of 0
        return df.geometry.buffer(0)

In [10]:
def clean_file(fl):
    '''Clean a nhm hru shapefile and save the output'''
    dat = gpd.read_file(fl) # load the shapefile
    dat['new_geom'] = dat.apply(fix_geometry,axis=1)
    dat['valid'] = dat.new_geom.map(check_val)
    valid_features = dat.valid.sum()
    num_features = len(dat.hru_id_nat.unique())
    
    if (num_features-valid_features) == 0: # if there are no invalid features
        num = fl.split('_')[-2] # get the region code
        outfl = './data/nhrus/clean_AEA/nhru_%s_clean.shp'%(num)
        dat = dat.set_geometry('new_geom') # assign new geometry to the shapefile
        # remove extra columns
        del dat['valid']
        del dat['geometry']
        dat.to_file(outfl) # save the file
    else: 
        return 'error'

In [5]:
def reproject_shp(fl):
    '''reproject the nhm shapefile from WGS84 to EPSG:2163 (Albers Equal Area, National Map)'''
    
    num = fl.split('_')[-1].split('.')[0]
    outfl = './data/nhrus/AEA/nhru_%s_AEA.shp'%(num)
    
    cmd = "ogr2ogr -s_srs 'EPSG:4326' -t_srs 'EPSG:2163' -overwrite %s %s"%(outfl,fl)
    subprocess.call(cmd,shell=True)
    return None

In [6]:
files = glob.glob('./data/nhrus/nhru_*/nhru_*.shp')

In [7]:
[reproject_shp(fl) for fl in files]

[None, None, None, None, None, None, None]

In [8]:
files = glob.glob('./data/nhrus/AEA/nhru_*_AEA.shp')

In [11]:
[clean_file(fl) for fl in files]

[None, None, None, None, None, None]