In [1]:
from pathlib import Path

from arcgis.features import GeoAccessor
from dm import Country
from dm.country import DemographicModeling

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

In [2]:
project_parent = Path('./').absolute().parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
gdb_int = dir_int/'interim.gdb'

biz_drop_cols = ['OBJECTID', 'CONAME','SALESVOL', 'HDBRCH', 'ULTNUM', 'PUBPRV', 'EMPNUM', 'FRNCOD', 'ISCODE', 'SQFTCODE', 'LOC_NAME', 'STATUS', 'SCORE', 'SOURCE', 'REC_TYPE']

In [3]:
usa = Country('USA')

usa

<dm.Country - USA (local)>

In [4]:
%%time
aoi_df = usa.cbsas.get('seattle')

aoi_df

Wall time: 5.16 s


Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-122.62951999978839, 47.163890001..."


In [5]:
%%time
biz_df = usa.business.get_by_name('ace hardware', aoi_df).drop(columns=biz_drop_cols)
biz_df.spatial.set_geometry('SHAPE')

biz_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               33 non-null     object  
 1   STREET               33 non-null     object  
 2   CITY                 33 non-null     object  
 3   STATE                33 non-null     object  
 4   STATE_NAME           33 non-null     object  
 5   ZIP                  33 non-null     object  
 6   ZIP4                 33 non-null     object  
 7   NAICS                33 non-null     object  
 8   SIC                  33 non-null     object  
 9   SHAPE                33 non-null     geometry
 10  id                   33 non-null     object  
 11  brand_name           33 non-null     object  
 12  brand_name_category  33 non-null     object  
dtypes: geometry(1), object(12)
memory usage: 3.5+ KB
Wall time: 3.36 s


In [6]:
%%time
comp_df = usa.business.get_competition(biz_df, aoi_df, local_threshold=3).drop(columns=biz_drop_cols)
comp_df.spatial.set_geometry('SHAPE')

comp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               76 non-null     object  
 1   STREET               76 non-null     object  
 2   CITY                 76 non-null     object  
 3   STATE                76 non-null     object  
 4   STATE_NAME           76 non-null     object  
 5   ZIP                  76 non-null     object  
 6   ZIP4                 76 non-null     object  
 7   NAICS                76 non-null     object  
 8   SIC                  76 non-null     object  
 9   SHAPE                76 non-null     geometry
 10  id                   76 non-null     object  
 11  brand_name           76 non-null     object  
 12  brand_name_category  76 non-null     object  
dtypes: geometry(1), object(12)
memory usage: 7.8+ KB
Wall time: 1.6 s


In [29]:
%%time
near_df = biz_df.dm.get_nearest(comp_df, source=usa, destination_count=6, near_prefix='near', destination_columns_to_keep=['NAICS', 'brand_name', 'brand_name_category'], single_row_per_origin=False)
near_df.head()

Wall time: 15.9 s


Unnamed: 0,near_origin_id,near_destination_rank,near_destination_id,near_proximity_kilometers,near_proximity_minutes,near_proximity_side_street_left,near_proximity_side_street_right,SHAPE,near_NAICS,near_brand_name,...,CITY,STATE,STATE_NAME,ZIP,ZIP4,NAICS,SIC,id,brand_name,brand_name_category
0,216082099,1,804462166,5.245106,6.903922,1,0,"{""hasM"": true, ""paths"": [[[-122.43443849999994...",44413005,BROOKDALE LUMBER TRUE VALUE,...,SPANAWAY,WA,Washington,98387,8395,44413005,525104,216082099,SOUTH END ACE HARDWARE,SOUTH END ACE HARDWARE
1,216082099,2,401706143,13.364986,15.537304,1,0,"{""hasM"": true, ""paths"": [[[-122.43443849999994...",44413005,PUGET SOUND SAW,...,SPANAWAY,WA,Washington,98387,8395,44413005,525104,216082099,SOUTH END ACE HARDWARE,SOUTH END ACE HARDWARE
2,216082099,3,444266050,13.603711,15.794491,1,0,"{""hasM"": true, ""paths"": [[[-122.43443849999994...",44413005,MCLENDON HARDWARE INC,...,SPANAWAY,WA,Washington,98387,8395,44413005,525104,216082099,SOUTH END ACE HARDWARE,SOUTH END ACE HARDWARE
3,216082099,4,715857966,14.398246,16.242462,0,1,"{""hasM"": true, ""paths"": [[[-122.43443849999994...",44413005,WILCO FARM STORE,...,SPANAWAY,WA,Washington,98387,8395,44413005,525104,216082099,SOUTH END ACE HARDWARE,SOUTH END ACE HARDWARE
4,216082099,5,740405810,14.537067,17.416629,0,1,"{""hasM"": true, ""paths"": [[[-122.43443849999994...",44413005,CASCADE WHOLESALE HARDWARE,...,SPANAWAY,WA,Washington,98387,8395,44413005,525104,216082099,SOUTH END ACE HARDWARE,SOUTH END ACE HARDWARE


In [30]:
wb_mp = near_df.spatial.plot()
wb_mp.basemap = 'gray-vector'
wb_mp

MapView(layout=Layout(height='400px', width='100%'))

In [28]:
near_df_1 = biz_df.dm.get_nearest(comp_df, source=usa, destination_count=6, near_prefix='near', destination_columns_to_keep=['NAICS', 'brand_name', 'brand_name_category'])
near_df_1 = near_df_1[['id'] + [c for c in near_df_1.columns if c.startswith('near_')]].copy()

near_df_1.head()

Unnamed: 0,id,near_destination_id_01,near_proximity_kilometers_01,near_proximity_minutes_01,near_proximity_side_street_left_01,near_proximity_side_street_right_01,near_NAICS_01,near_brand_name_01,near_brand_name_category_01,near_destination_id_02,...,near_brand_name_05,near_brand_name_category_05,near_destination_id_06,near_proximity_kilometers_06,near_proximity_minutes_06,near_proximity_side_street_left_06,near_proximity_side_street_right_06,near_NAICS_06,near_brand_name_06,near_brand_name_category_06
0,216082099,804462166,5.245106,6.903922,1,0,44413005,BROOKDALE LUMBER TRUE VALUE,local_brand,401706143,...,CASCADE WHOLESALE HARDWARE,local_brand,175096239,15.161768,18.717571,0,1,44413005,LAKEWOOD HARDWARE & PAINT,local_brand
1,371889957,401706143,7.383761,9.345567,0,1,44413005,PUGET SOUND SAW,local_brand,444266050,...,CENTRICSIT,local_brand,2890986,21.095097,26.426051,1,0,44413005,MC LENDON HARDWARE,local_brand
2,460556608,175096239,3.242144,5.378772,0,1,44413005,LAKEWOOD HARDWARE & PAINT,local_brand,740405810,...,MCLENDON HARDWARE INC,MCLENDON HARDWARE INC,715857966,16.018472,18.036884,0,1,44413005,WILCO FARM STORE,local_brand
3,405129289,425066759,9.754976,9.552777,0,1,44413005,MCLENDON HARDWARE INC,MCLENDON HARDWARE INC,716987694,...,SGS HARDWARE,local_brand,421117196,18.254821,16.836483,0,1,44413005,SGS HARDWARE,local_brand
4,404324160,175041706,4.741889,7.552362,0,1,44413005,LINCOLN HARDWARE,local_brand,425066759,...,SGS HARDWARE,local_brand,700565109,6.042069,10.175066,0,1,44413005,S G S HARDWARE,local_brand


In [31]:
near_df_1.iloc[0]

id                                                       216082099
near_destination_id_01                                   804462166
near_proximity_kilometers_01                               5.24511
near_proximity_minutes_01                                  6.90392
near_proximity_side_street_left_01                               1
near_proximity_side_street_right_01                              0
near_NAICS_01                                             44413005
near_brand_name_01                     BROOKDALE LUMBER TRUE VALUE
near_brand_name_category_01                            local_brand
near_destination_id_02                                   401706143
near_proximity_kilometers_02                                13.365
near_proximity_minutes_02                                  15.5373
near_proximity_side_street_left_02                               1
near_proximity_side_street_right_02                              0
near_NAICS_02                                             4441