In [1]:
import geopandas
import numpy as np
import pandas as pd

# Load the data

In [2]:
watersheds = geopandas.read_file('zip://data/watersheds.zip')
gdf = geopandas.read_file('zip://data/storm_points.zip')

In [3]:
watersheds.head(3)

Unnamed: 0,Watershed,TMDL_Water,Phase,Area_Ac,TIA_Ac,TIA_Pct,GlobalID,geometry
0,Roanoke River above Masons Creek,Roanoke River 2,,40530.632918,0.0,0.0,a6b2fd42-5f46-4486-9101-77d3554abd41,"POLYGON ((11024671.733 3652589.891, 11024699.5..."
1,Back Creek,Back Creek,,37561.660519,0.0,0.0,abbb57b8-7d38-4ed4-b70d-d41ed83ea617,"POLYGON ((11086384.494 3616511.612, 11086405.9..."
2,Barnhardt Creek,"Mud Lick Creek, Murray Run, and Ore Branch",,2630.613126,0.0,0.0,4e65165c-536f-4482-b9a1-ba88363f735a,"POLYGON ((11041922.519 3625650.977, 11041777.2..."


In [4]:
gdf.head(10)

Unnamed: 0,UNIQUEID,WATERSHED,geometry
0,1001,Back Creek,POINT (11088195.428 3611249.643)
1,1002,Back Creek,POINT (11025054.510 3597777.981)
2,1003,Carvin Creek,POINT (11058512.062 3680230.599)
3,1004,Roanoke River above Masons Creek,POINT (11012522.583 3639462.329)
4,1005,Roanoke River above Masons Creek,POINT (11018717.113 3637251.922)
5,0,,POINT (11064369.069 3643759.987)
6,0,,POINT (11031997.087 3641956.674)
7,0,,POINT (11039318.002 3643422.669)
8,0,,POINT (11054564.431 3675578.631)
9,0,,POINT (11051263.146 3586817.029)


# Add IDs

In [5]:
max_id = np.max(gdf['UNIQUEID'])

idx = gdf['UNIQUEID'] == 0
n = np.sum(idx)
new_ids = np.arange(max_id+1,max_id+n+1)

print(new_ids)

[1006 1007 1008 1009 1010]


In [6]:
gdf.loc[idx,'UNIQUEID'] = new_ids
gdf.head(10)

Unnamed: 0,UNIQUEID,WATERSHED,geometry
0,1001,Back Creek,POINT (11088195.428 3611249.643)
1,1002,Back Creek,POINT (11025054.510 3597777.981)
2,1003,Carvin Creek,POINT (11058512.062 3680230.599)
3,1004,Roanoke River above Masons Creek,POINT (11012522.583 3639462.329)
4,1005,Roanoke River above Masons Creek,POINT (11018717.113 3637251.922)
5,1006,,POINT (11064369.069 3643759.987)
6,1007,,POINT (11031997.087 3641956.674)
7,1008,,POINT (11039318.002 3643422.669)
8,1009,,POINT (11054564.431 3675578.631)
9,1010,,POINT (11051263.146 3586817.029)


# Add Watersheds

In [7]:
idx = gdf['WATERSHED'].isnull()
np.sum(idx)

5

In [8]:
joined_df = geopandas.sjoin(gdf,watersheds,how='left')
joined_df.head()

Unnamed: 0,UNIQUEID,WATERSHED,geometry,index_right,Watershed,TMDL_Water,Phase,Area_Ac,TIA_Ac,TIA_Pct,GlobalID
0,1001,Back Creek,POINT (11088195.428 3611249.643),1,Back Creek,Back Creek,,37561.660519,0.0,0.0,abbb57b8-7d38-4ed4-b70d-d41ed83ea617
1,1002,Back Creek,POINT (11025054.510 3597777.981),1,Back Creek,Back Creek,,37561.660519,0.0,0.0,abbb57b8-7d38-4ed4-b70d-d41ed83ea617
2,1003,Carvin Creek,POINT (11058512.062 3680230.599),3,Carvin Creek,Carvin Creek,III,14830.576649,0.0,0.0,01525076-e9f5-45be-a686-5a5f664a3835
3,1004,Roanoke River above Masons Creek,POINT (11012522.583 3639462.329),0,Roanoke River above Masons Creek,Roanoke River 2,,40530.632918,0.0,0.0,a6b2fd42-5f46-4486-9101-77d3554abd41
4,1005,Roanoke River above Masons Creek,POINT (11018717.113 3637251.922),0,Roanoke River above Masons Creek,Roanoke River 2,,40530.632918,0.0,0.0,a6b2fd42-5f46-4486-9101-77d3554abd41


In [9]:
missing_watersheds = joined_df.loc[idx,'Watershed']
print(missing_watersheds)

5    Carvin Creek
6     Mason Creek
7    Peters Creek
8    Carvin Creek
9      Back Creek
Name: Watershed, dtype: object


In [10]:
gdf.loc[idx,'WATERSHED'] = missing_watersheds
gdf.head(10)

Unnamed: 0,UNIQUEID,WATERSHED,geometry
0,1001,Back Creek,POINT (11088195.428 3611249.643)
1,1002,Back Creek,POINT (11025054.510 3597777.981)
2,1003,Carvin Creek,POINT (11058512.062 3680230.599)
3,1004,Roanoke River above Masons Creek,POINT (11012522.583 3639462.329)
4,1005,Roanoke River above Masons Creek,POINT (11018717.113 3637251.922)
5,1006,Carvin Creek,POINT (11064369.069 3643759.987)
6,1007,Mason Creek,POINT (11031997.087 3641956.674)
7,1008,Peters Creek,POINT (11039318.002 3643422.669)
8,1009,Carvin Creek,POINT (11054564.431 3675578.631)
9,1010,Back Creek,POINT (11051263.146 3586817.029)


# Write out the data

In [11]:
# We might really just overwrite the data, but for clarity in pedagogy we'll write out a separate file

gdf.to_file('out/storm_points_updated.shp')