## Clean NHRU Shapefiles to that they can be used to crop an index raster

In [1]:
import geopandas as gpd
import subprocess

In [2]:
def check_val(geom):
    return geom.is_valid # return true if geometry is valid in shapely

In [3]:
def fix_geometry(df):
    if df.geometry.is_valid: # if the geometry is valid in shapely, do nothing
        return df.geometry
    
    elif df.geometry.is_valid==False: # if the geometry is invalid, fix it with a buffer of 0
        return df.geometry.buffer(0)

In [19]:
def clean_file(fl):
    '''Clean a nhm hru shapefile and save the output'''
    print(fl)
    dat = gpd.read_file(fl) # load the shapefile
    dat['new_geom'] = dat.apply(fix_geometry,axis=1)
    dat['valid'] = dat.new_geom.map(check_val)
    valid_features = dat.valid.sum()
    num_features = len(dat.hru_id_nat.unique())
    
    if (num_features-valid_features) == 0: # if there are no invalid features
        num = fl.split('_')[-2] # get the region code
        outfl = './data/nhrus/clean_AEA/nhru_%s_clean.shp'%(num)
        dat = dat.set_geometry('new_geom') # assign new geometry to the shapefile
        # remove extra columns
        del dat['valid']
        del dat['geometry']
        dat.to_file(outfl) # save the file
    else: 
        return 'error'

In [24]:
def clean_file_12(fl):
    '''Clean a nhm hru shapefile and save the output'''
    print(fl)
    dat = gpd.read_file(fl) # load the shapefile
    dat['new_geom'] = dat.apply(fix_geometry,axis=1)
    dat['valid'] = dat.new_geom.map(check_val)
    valid_features = dat.valid.sum()
    num_features = len(dat.hru_id.unique())
    
    if (num_features-valid_features) == 0: # if there are no invalid features
        num = fl.split('_')[-2] # get the region code
        outfl = './data/nhrus/clean_AEA/nhru_%s_clean.shp'%(num)
        dat = dat.set_geometry('new_geom') # assign new geometry to the shapefile
        # remove extra columns
        del dat['valid']
        del dat['geometry']
        dat.to_file(outfl) # save the file
    else: 
        return 'error'

In [12]:
def reproject_shp(fl):
    '''reproject the nhm shapefile from WGS84 to EPSG:2163 (Albers Equal Area, National Map)'''
    
    num = fl.split('_')[-1].split('.')[0]
    outfl = './data/nhrus/AEA/nhru_%s_AEA.shp'%(num)
    
    cmd = "ogr2ogr -t_srs 'EPSG:2163' -overwrite %s %s"%(outfl,fl)
    subprocess.call(cmd,shell=True)
    return None


# -s_srs 'EPSG:4326'

In [13]:
files = glob.glob('./data/nhrus/nhru_*/nhru_*.shp')

In [14]:
files

['./data/nhrus/nhru_07/nhru_07.shp',
 './data/nhrus/nhru_08/nhru_08.shp',
 './data/nhrus/nhru_09/nhru_09.shp',
 './data/nhrus/nhru_10L/nhru_10L.shp',
 './data/nhrus/nhru_10U/nhru_10U.shp',
 './data/nhrus/nhru_11/nhru_11.shp',
 './data/nhrus/nhru_12/nhru_12.shp']

In [15]:
[reproject_shp(fl) for fl in files]

[None, None, None, None, None, None, None]

In [20]:
files = glob.glob('./data/nhrus/AEA/nhru_*_AEA.shp')

In [21]:
files

['./data/nhrus/AEA/nhru_07_AEA.shp',
 './data/nhrus/AEA/nhru_08_AEA.shp',
 './data/nhrus/AEA/nhru_09_AEA.shp',
 './data/nhrus/AEA/nhru_10L_AEA.shp',
 './data/nhrus/AEA/nhru_10U_AEA.shp',
 './data/nhrus/AEA/nhru_11_AEA.shp',
 './data/nhrus/AEA/nhru_12_AEA.shp']

In [25]:
clean_file_12(files[-1])

./data/nhrus/AEA/nhru_12_AEA.shp


## This seems to be an issue with region 8 now....

In [22]:
[clean_file(fl) for fl in files]

./data/nhrus/AEA/nhru_07_AEA.shp
./data/nhrus/AEA/nhru_08_AEA.shp


KeyError: "['POI_ID' 'hru_id_nat' 'hru_id_reg' 'region' 'geometry'] not in index"