For Reference, see ESRI's <A href="https://developers.arcgis.com/python/guide/introduction-to-the-spatially-enabled-dataframe/">Introduction to the Spatially Enabled DataFrame</a>

In [1]:
import arcpy
from arcgis.gis import GIS

import pandas as pd
import numpy as np

In [2]:
# We'll load this object for mapping

gis = GIS()

In [3]:
# Define your working directory

arcpy.env.workspace = "./"

In [4]:
# List the layers in your working directory

fclist = arcpy.ListFeatureClasses()
print(fclist)

['storm_points.shp', 'watersheds.shp']


In [6]:
sdf = pd.DataFrame.spatial.from_featureclass("storm_points.shp")
watersheds = pd.DataFrame.spatial.from_featureclass("watersheds.shp")

In [7]:
watersheds.head(3)

Unnamed: 0,FID,Watershed,TMDL_Water,Phase,Area_Ac,TIA_Ac,TIA_Pct,GlobalID,SHAPE
0,0,Roanoke River above Masons Creek,Roanoke River 2,,40530.632918,0.0,0.0,a6b2fd42-5f46-4486-9101-77d3554abd41,"{""rings"": [[[11024671.733272359, 3652589.89073..."
1,1,Back Creek,Back Creek,,37561.660519,0.0,0.0,abbb57b8-7d38-4ed4-b70d-d41ed83ea617,"{""rings"": [[[11086384.49403295, 3616511.612173..."
2,2,Barnhardt Creek,"Mud Lick Creek, Murray Run, and Ore Branch",,2630.613126,0.0,0.0,4e65165c-536f-4482-b9a1-ba88363f735a,"{""rings"": [[[11041922.518980697, 3625650.97709..."


In [8]:
sdf.head(10)

Unnamed: 0,FID,UNIQUEID,WATERSHED,SHAPE
0,0,1001,Back Creek,"{""x"": 11088195.428318404, ""y"": 3611249.6433582..."
1,1,1002,Back Creek,"{""x"": 11025054.510231826, ""y"": 3597777.9812612..."
2,2,1003,Carvin Creek,"{""x"": 11058512.062181782, ""y"": 3680230.5988518..."
3,3,1004,Roanoke River above Masons Creek,"{""x"": 11012522.582823968, ""y"": 3639462.3290011..."
4,4,1005,Roanoke River above Masons Creek,"{""x"": 11018717.112518717, ""y"": 3637251.9218605..."
5,5,0,,"{""x"": 11064369.069298763, ""y"": 3643759.9865084..."
6,6,0,,"{""x"": 11031997.086837519, ""y"": 3641956.6735182..."
7,7,0,,"{""x"": 11039318.002136372, ""y"": 3643422.6693437..."
8,8,0,,"{""x"": 11054564.430870323, ""y"": 3675578.6309410..."
9,9,0,,"{""x"": 11051263.145793304, ""y"": 3586817.0288054..."


# Visualize Data

In [9]:
m = gis.map("Roanoke, VA", 10)
watersheds.spatial.plot(kind='map',map_widget=m,alpha=.5,renderer_type='u',col='Watershed',pallette='jet')
sdf.spatial.plot(kind='map',map_widget=m,renderer_type='u',col='WATERSHED',pallette='jet')
m

MapView(layout=Layout(height='400px', width='100%'))

# Add IDs

In [10]:
max_id = np.max(sdf['UNIQUEID'])

idx = sdf['UNIQUEID'] == 0
n = np.sum(idx)
new_ids = np.arange(max_id+1,max_id+n+1)

print(new_ids)

[1006 1007 1008 1009 1010]


In [11]:
sdf.loc[idx,'UNIQUEID'] = new_ids
sdf.head(10)

Unnamed: 0,FID,UNIQUEID,WATERSHED,SHAPE
0,0,1001,Back Creek,"{""x"": 11088195.428318404, ""y"": 3611249.6433582..."
1,1,1002,Back Creek,"{""x"": 11025054.510231826, ""y"": 3597777.9812612..."
2,2,1003,Carvin Creek,"{""x"": 11058512.062181782, ""y"": 3680230.5988518..."
3,3,1004,Roanoke River above Masons Creek,"{""x"": 11012522.582823968, ""y"": 3639462.3290011..."
4,4,1005,Roanoke River above Masons Creek,"{""x"": 11018717.112518717, ""y"": 3637251.9218605..."
5,5,1006,,"{""x"": 11064369.069298763, ""y"": 3643759.9865084..."
6,6,1007,,"{""x"": 11031997.086837519, ""y"": 3641956.6735182..."
7,7,1008,,"{""x"": 11039318.002136372, ""y"": 3643422.6693437..."
8,8,1009,,"{""x"": 11054564.430870323, ""y"": 3675578.6309410..."
9,9,1010,,"{""x"": 11051263.145793304, ""y"": 3586817.0288054..."


# Add Watersheds

In [12]:
joined_df = sdf.spatial.join(watersheds.loc[:,['Watershed','SHAPE']])
joined_df.head()

# Note carefully that this output has been scrambled (rows are not in the same order)!  
# Indices (leftmost-column) no longer map as they originally did.
# This really should not happen!

Unnamed: 0,FID,UNIQUEID,WATERSHED,SHAPE,index_right,Watershed
0,0,1001,Back Creek,"{""x"": 11088195.428318404, ""y"": 3611249.6433582...",1,Back Creek
1,1,1002,Back Creek,"{""x"": 11025054.510231826, ""y"": 3597777.9812612...",1,Back Creek
2,9,1010,,"{""x"": 11051263.145793304, ""y"": 3586817.0288054...",1,Back Creek
3,2,1003,Carvin Creek,"{""x"": 11058512.062181782, ""y"": 3680230.5988518...",3,Carvin Creek
4,5,1006,,"{""x"": 11064369.069298763, ""y"": 3643759.9865084...",3,Carvin Creek


In [13]:
# How you locate null values depends on your dataset.  In this case, they are interpreted as space
# (NOT an empty string!)
idx = joined_df['WATERSHED'] == ' '
joined_df.loc[idx,'WATERSHED'] = joined_df.loc[idx,'Watershed']
joined_df.head()

Unnamed: 0,FID,UNIQUEID,WATERSHED,SHAPE,index_right,Watershed
0,0,1001,Back Creek,"{""x"": 11088195.428318404, ""y"": 3611249.6433582...",1,Back Creek
1,1,1002,Back Creek,"{""x"": 11025054.510231826, ""y"": 3597777.9812612...",1,Back Creek
2,9,1010,Back Creek,"{""x"": 11051263.145793304, ""y"": 3586817.0288054...",1,Back Creek
3,2,1003,Carvin Creek,"{""x"": 11058512.062181782, ""y"": 3680230.5988518...",3,Carvin Creek
4,5,1006,Carvin Creek,"{""x"": 11064369.069298763, ""y"": 3643759.9865084...",3,Carvin Creek


In [14]:
joined_df = joined_df.drop(labels=['index_right','Watershed'],axis=1)

In [15]:
joined_df = joined_df.sort_values(by='UNIQUEID')
joined_df.head(10)

Unnamed: 0,FID,UNIQUEID,WATERSHED,SHAPE
0,0,1001,Back Creek,"{""x"": 11088195.428318404, ""y"": 3611249.6433582..."
1,1,1002,Back Creek,"{""x"": 11025054.510231826, ""y"": 3597777.9812612..."
3,2,1003,Carvin Creek,"{""x"": 11058512.062181782, ""y"": 3680230.5988518..."
6,3,1004,Roanoke River above Masons Creek,"{""x"": 11012522.582823968, ""y"": 3639462.3290011..."
7,4,1005,Roanoke River above Masons Creek,"{""x"": 11018717.112518717, ""y"": 3637251.9218605..."
4,5,1006,Carvin Creek,"{""x"": 11064369.069298763, ""y"": 3643759.9865084..."
8,6,1007,Mason Creek,"{""x"": 11031997.086837519, ""y"": 3641956.6735182..."
9,7,1008,Peters Creek,"{""x"": 11039318.002136372, ""y"": 3643422.6693437..."
5,8,1009,Carvin Creek,"{""x"": 11054564.430870323, ""y"": 3675578.6309410..."
2,9,1010,Back Creek,"{""x"": 11051263.145793304, ""y"": 3586817.0288054..."


# Write out the data

In [16]:
# We might really just overwrite the data, but for clarity in pedagogy we'll write out a separate layer

joined_df.spatial.to_featureclass('storm_points_updated.shp')

'D:\\data\\Classes\\Current\\GEOG 4314-5314 - Analysis in GIS\\Content\\04 - Geopandas and Spatially Enabled Data Frames\\old\\Roanoke Stormwater Example - ArcGIS Solution\\storm_points_updated.shp'

# Could we see that process all at once, with no frills?

In [None]:
import pandas as pd
import numpy as np

import arcpy

# Define workspace
arcpy.env.workspace = "./"

# Load layers
sdf = pd.DataFrame.spatial.from_featureclass("storm_points.shp")
watersheds = pd.DataFrame.spatial.from_featureclass("watersheds.shp")

# Add new IDs
max_id = np.max(sdf['UNIQUEID'])
idx = sdf['UNIQUEID'] == 0
n = np.sum(idx)
new_ids = np.arange(max_id+1,max_id+n+1)
sdf.loc[idx,'UNIQUEID'] = new_ids

# Add missing watersheds
joined_df = sdf.spatial.join(watersheds.loc[:,['Watershed','SHAPE']])
idx = joined_df['WATERSHED'] == ' '
joined_df.loc[idx,'WATERSHED'] = joined_df.loc[idx,'Watershed']
joined_df = joined_df.drop(labels=['index_right','Watershed'],axis=1)

# Optionally, sort
joined_df = joined_df.sort_values(by='UNIQUEID')

# Write out the data
joined_df.spatial.to_featureclass('storm_points_updated.shp')