In [1]:
import importlib
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor
import pandas as pd

In [2]:
project_parent = Path('./').absolute().parent

# import the project package from the project package path
# ideally will be imported using 'from arcgis import da'
sys.path.append(str(project_parent/'src'))
import dm

# load the "autoreload" extension so as you change code in src, it gets reloaded
%load_ext autoreload
%autoreload 2

  class GeoAccessorIO(GeoAccessor):


# Introspectivley Examine and Get Geographies

In [3]:
# discover what countries are available, and get dataframe of countries
cntry_df = dm.util.get_countries()

cntry_df

Unnamed: 0,geographic_level,country,year
0,USA_ESRI_2019,USA,2019


In [4]:
# specify a country using the identifier from the country field
usa = dm.Country('USA', source='local')

isinstance(usa, dm.Country)

True

In [5]:
# get the geographic resolutions available for the country as a dataframe from smallest to largest
geos = usa.geographies

geos

Unnamed: 0,geo_name,geo_alias,col_id,col_name,feature_class_path
0,block_groups,Block Groups,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
1,census_tracts,Census Tracts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
2,cities_and_towns_places,Cities and Towns (Places),ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
3,zip_codes,ZIP Codes,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
4,county_subdivisions,County Subdivisions,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
5,counties,Counties,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
6,cbsas,CBSAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
7,congressional_districts,Congressional Districts,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
8,dmas,DMAs,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...
9,states,States,ID,NAME,D:\arcgis\ba_data\Data\Demographic Data\USA_ES...


In [6]:
cbsa_df = usa.cbsas.get('seattle')

cbsa_df

Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-13651055.7226, 5968866.240900002..."


In [7]:
# checking the location of the geometry to make sure it looks correct on a simple map
webmap = cbsa_df.spatial.plot()
webmap.basemap = 'gray-vector'
webmap

MapView(layout=Layout(height='400px', width='100%'))

In [8]:
# get the geographies falling within an area by the geographic name
bg_df = usa.cbsas.get('seattle').block_groups.get()

bg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 217209 entries, 0 to 217208
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype   
---  ------  --------------   -----   
 0   ID      217209 non-null  object  
 1   NAME    217209 non-null  object  
 2   SHAPE   217209 non-null  geometry
dtypes: geometry(1), object(2)
memory usage: 5.0+ MB


In [None]:
# get the geographies falling within an area by the index - makes it easier to get the lowest possible geographic resolution
lvl_df = usa.cbsas.get('seattle').level(0).get()

lvl_df.info()

In [None]:
# checking what the results look like as a table
lvl_df.head()

In [None]:
# checking what the results look like as a simple map
webmap02 = lvl_df.spatial.plot()
webmap02.basemap = 'gray-vector'
webmap02

In [None]:
# also, many times the area of interest is not a standard geography - a district or possilby a sales territory
# in this case, we can use the within method and pass in either a Spatially Enabled DataFrame, Geometry list, or single geometry to get the smaller geographies for analysis
within_df = usa.level(0).within(cbsa_df)

within_df.info()

# Enrich

In [None]:
enrich_vars_df = usa.enrich_variables

In [None]:
enrich_vars = ['list', 'of', 'enrich', 'vars']

In [None]:
# get the geographies falling within an area by the geographic ids - dramatically speeds up enrichment because do not have to perform apportionment
bg_enrich_df = bg_df.enrich(enrich_vars, id_col='ID')

bg_enrich_df = usa_local.cbsas.get('seattle').block_groups.spatial.enrich(enrich_vars, id_col='ID')

bg_enrich_df.head()

In [None]:
# get the geographies falling within an area by the index and just use the geography polygon - makes it easier to get the lowest possible geographic resolution
cbsa_df = usa_local.get('seatttle')
bg_lvl = cbsa_df.level(0)
bg_enrich_df = bg_lvl.enrich(enrich_vars)

bg_enrich_df = usa_local.cbsas.get('seattle').level(0).enrich(enrich_vars)

bg_enrich_df.head()

# Get Locations

In [None]:
# get the store locations from the business listings
loc_brand_df = usa_local.business.search('Ace Hardware')

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa_local.business.search('Ace Hardware').spatial.to_featureclass(gdb_int/'loc_brand')

loc_brand_df.head()

In [None]:
# get all the competitors for the area of interest

# ...by NAICS or SIC code...
loc_comp_df = usa_local.business.get_competitors(
    code=44413005,  # include ablity to specify shorted codes since NAICS codes can be shorter to be more general
    code_type='NAICS', 
    brand_exclude='Ace Hardware'
)

# ...or simplly by looking up using the existing location brand layer as a template
loc_comp_df = usa_local.business.get_competitors(brand_locations=loc_brand_df)

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa_local.business.get_competitors(brand_locations=loc_brand_df).spatial.to_featureclass(gdb_int/'loc_comp')

# Calculate Proximity Metrics

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = usa_local.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_brand_df,
    destination_id_column='STORE_ID'
    destination_brand_or_concept_column='STORE_CONCEPT',              # think Nike Outlet versus Nike Brand Store
)

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = usa_local.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_comp_df,
    destination_id_column='LOCNUM'
    destination_brand_or_concept_column='CONAME',               # think Nike Outlet versus Nike Brand Store
)

# ...and can even chain to create output using dataframe to_... functions
prox_df_brand = usa_local.proximity.get_neareset_nth_locations(orgin_geo_df, 'ID', 'path-to-block-points', 'POP', loc_comp_df, 'LOCNUM', 'CONAME').to_csv('prox_df_brand.csv')