## Clean NHRU Shapefiles to that they can be used to crop an index raster

In [1]:
import geopandas as gpd
import subprocess

In [2]:
newRegions = ['01','02','03','04','05','06','13','14','15','16','17','18','20','21']

In [10]:
def check_length(fl):
    reg = fl.split('/')[-1].split('_')[-1].split('.')[0]
    dat = gpd.read_file(fl)
    number = len(dat)
    print('region %s: %s'%(reg,number))
    return reg,number

In [11]:
regs = []
hrus = []
for fl in glob.glob('./data/nhrus/nhru_*/nhru_*.shp'):
    reg,number = check_length(fl)
    regs.append(reg)
    hrus.append(number)

region 01: 2462
region 02: 4827
region 03: 9899
region 04: 5936
region 05: 7182
region 06: 2303
region 07: 8205
region 08: 4449
region 09: 1717
region 10L: 8603
region 10U: 10299
region 11: 7373
region 12: 7815
region 13: 1958
region 14: 3879
region 15: 3441
region 16: 2664
region 17: 11102
region 18: 5837
region 20: 322
region 21: 489


In [16]:
df = pd.DataFrame()
df['reg'] = regs
df['number'] = hrus
df.index = df.reg
del df['reg']

In [17]:
df.sort_values('number')

Unnamed: 0_level_0,number
reg,Unnamed: 1_level_1
20,322
21,489
09,1717
13,1958
06,2303
01,2462
16,2664
15,3441
14,3879
08,4449


In [3]:
def check_val(geom):
    return geom.is_valid # return true if geometry is valid in shapely

In [4]:
def fix_geometry(df):
    if df.geometry.is_valid: # if the geometry is valid in shapely, do nothing
        return df.geometry
    
    elif df.geometry.is_valid==False: # if the geometry is invalid, fix it with a buffer of 0
        return df.geometry.buffer(0)

In [5]:
def clean_file(fl):
    '''Clean a nhm hru shapefile and save the output'''
    print(fl)
    dat = gpd.read_file(fl) # load the shapefile
    dat['new_geom'] = dat.apply(fix_geometry,axis=1)
    dat['valid'] = dat.new_geom.map(check_val)
    valid_features = dat.valid.sum()
    num_features = len(dat.hru_id_nat.unique())
    
    if (num_features-valid_features) == 0: # if there are no invalid features
        num = fl.split('_')[-2] # get the region code
        outfl = './data/nhrus/clean_AEA/nhru_%s_clean.shp'%(num)
        dat = dat.set_geometry('new_geom') # assign new geometry to the shapefile
        # remove extra columns
        del dat['valid']
        del dat['geometry']
        dat.to_file(outfl) # save the file
    else: 
        return 'error'

In [6]:
def clean_file_12(fl):
    '''Clean a nhm hru shapefile and save the output'''
    print(fl)
    dat = gpd.read_file(fl) # load the shapefile
    dat['new_geom'] = dat.apply(fix_geometry,axis=1)
    dat['valid'] = dat.new_geom.map(check_val)
    valid_features = dat.valid.sum()
    num_features = len(dat.hru_id.unique())
    
    if (num_features-valid_features) == 0: # if there are no invalid features
        num = fl.split('_')[-2] # get the region code
        outfl = './data/nhrus/clean_AEA/nhru_%s_clean.shp'%(num)
        dat = dat.set_geometry('new_geom') # assign new geometry to the shapefile
        # remove extra columns
        del dat['valid']
        del dat['geometry']
        dat.to_file(outfl) # save the file
    else: 
        return 'error'

In [7]:
def reproject_shp(fl):
    '''reproject the nhm shapefile from WGS84 to EPSG:2163 (Albers Equal Area, National Map)'''
    
    num = fl.split('_')[-1].split('.')[0]
    outfl = './data/nhrus/AEA/nhru_%s_AEA.shp'%(num)
    
    cmd = "ogr2ogr -t_srs 'EPSG:2163' -overwrite %s %s"%(outfl,fl)
    subprocess.call(cmd,shell=True)
    return None


# -s_srs 'EPSG:4326'

In [8]:
# make filenames for the new files:
files = []
for reg in newRegions:
    files.append('./data/nhrus/nhru_%s/nhru_%s.shp'%(reg,reg))

In [9]:
files

['./data/nhrus/nhru_01/nhru_01.shp',
 './data/nhrus/nhru_02/nhru_02.shp',
 './data/nhrus/nhru_03/nhru_03.shp',
 './data/nhrus/nhru_04/nhru_04.shp',
 './data/nhrus/nhru_05/nhru_05.shp',
 './data/nhrus/nhru_06/nhru_06.shp',
 './data/nhrus/nhru_13/nhru_13.shp',
 './data/nhrus/nhru_14/nhru_14.shp',
 './data/nhrus/nhru_15/nhru_15.shp',
 './data/nhrus/nhru_16/nhru_16.shp',
 './data/nhrus/nhru_17/nhru_17.shp',
 './data/nhrus/nhru_18/nhru_18.shp',
 './data/nhrus/nhru_20/nhru_20.shp',
 './data/nhrus/nhru_21/nhru_21.shp']

In [10]:
[reproject_shp(fl) for fl in files]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [12]:
# make new files
files = []
for reg in newRegions:
    files.append('./data/nhrus/AEA/nhru_%s_AEA.shp'%reg)

In [13]:
files

['./data/nhrus/AEA/nhru_01_AEA.shp',
 './data/nhrus/AEA/nhru_02_AEA.shp',
 './data/nhrus/AEA/nhru_03_AEA.shp',
 './data/nhrus/AEA/nhru_04_AEA.shp',
 './data/nhrus/AEA/nhru_05_AEA.shp',
 './data/nhrus/AEA/nhru_06_AEA.shp',
 './data/nhrus/AEA/nhru_13_AEA.shp',
 './data/nhrus/AEA/nhru_14_AEA.shp',
 './data/nhrus/AEA/nhru_15_AEA.shp',
 './data/nhrus/AEA/nhru_16_AEA.shp',
 './data/nhrus/AEA/nhru_17_AEA.shp',
 './data/nhrus/AEA/nhru_18_AEA.shp',
 './data/nhrus/AEA/nhru_20_AEA.shp',
 './data/nhrus/AEA/nhru_21_AEA.shp']

In [25]:
clean_file_12(files[-1])

./data/nhrus/AEA/nhru_12_AEA.shp


## This seems to be an issue with region 8 now....

In [14]:
[clean_file(fl) for fl in files]

./data/nhrus/AEA/nhru_01_AEA.shp
./data/nhrus/AEA/nhru_02_AEA.shp
./data/nhrus/AEA/nhru_03_AEA.shp
./data/nhrus/AEA/nhru_04_AEA.shp
./data/nhrus/AEA/nhru_05_AEA.shp
./data/nhrus/AEA/nhru_06_AEA.shp
./data/nhrus/AEA/nhru_13_AEA.shp
./data/nhrus/AEA/nhru_14_AEA.shp
./data/nhrus/AEA/nhru_15_AEA.shp
./data/nhrus/AEA/nhru_16_AEA.shp
./data/nhrus/AEA/nhru_17_AEA.shp
./data/nhrus/AEA/nhru_18_AEA.shp
./data/nhrus/AEA/nhru_20_AEA.shp
./data/nhrus/AEA/nhru_21_AEA.shp


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]