In [1]:
from pathlib import Path

import arcpy
from arcgis.features import GeoAccessor
from dm import Country
from dm import proximity as prx
from dm.country import DemographicModeling
import pandas as pd
import numpy as np

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

In [2]:
project_parent = Path('./').absolute().parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
gdb_int = dir_int/'interim.gdb'

biz_drop_cols = ['OBJECTID', 'CONAME','SALESVOL', 'HDBRCH', 'ULTNUM', 'PUBPRV', 'EMPNUM', 'FRNCOD', 'ISCODE', 'SQFTCODE', 'LOC_NAME', 'STATUS', 'SCORE', 'SOURCE', 'REC_TYPE']

In [3]:
usa = Country('USA')

usa

<dm.Country - USA (local)>

In [4]:
%%time
aoi_df = usa.cbsas.get('seattle')

aoi_df

Wall time: 4.69 s


Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-122.62951999978839, 47.163890001..."


In [5]:
%%time
bg_df = aoi_df.dm.level(0).get()

bg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2478 entries, 0 to 2477
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   ID      2478 non-null   object  
 1   NAME    2478 non-null   object  
 2   SHAPE   2478 non-null   geometry
dtypes: geometry(1), object(2)
memory usage: 58.2+ KB
Wall time: 13.2 s


In [6]:
%%time
biz_df = usa.business.get_by_name('ace hardware', aoi_df).drop(columns=biz_drop_cols)
biz_df.spatial.set_geometry('SHAPE')

biz_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               33 non-null     object  
 1   STREET               33 non-null     object  
 2   CITY                 33 non-null     object  
 3   STATE                33 non-null     object  
 4   STATE_NAME           33 non-null     object  
 5   ZIP                  33 non-null     object  
 6   ZIP4                 33 non-null     object  
 7   NAICS                33 non-null     object  
 8   SIC                  33 non-null     object  
 9   SHAPE                33 non-null     geometry
 10  id                   33 non-null     object  
 11  brand_name           33 non-null     object  
 12  brand_name_category  33 non-null     object  
dtypes: geometry(1), object(12)
memory usage: 3.5+ KB
Wall time: 1.65 s


In [7]:
%%time
bg_near_biz_df = bg_df.dm.get_nearest(biz_df, origin_id_column='ID', near_prefix='brand')

bg_near_biz_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2478 entries, 0 to 2477
Data columns (total 23 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ID                                    2478 non-null   object  
 1   NAME                                  2478 non-null   object  
 2   SHAPE                                 2478 non-null   geometry
 3   brand_destination_id_01               2476 non-null   object  
 4   brand_proximity_kilometers_01         2476 non-null   float64 
 5   brand_proximity_minutes_01            2476 non-null   float64 
 6   brand_proximity_side_street_left_01   2476 non-null   float64 
 7   brand_proximity_side_street_right_01  2476 non-null   float64 
 8   brand_destination_id_02               2476 non-null   object  
 9   brand_proximity_kilometers_02         2476 non-null   float64 
 10  brand_proximity_minutes_02            2476 non-null   float64 
 11  bran

In [8]:
%%time
comp_df = usa.business.get_competition(biz_df, aoi_df, local_threshold=3).drop(columns=biz_drop_cols)
comp_df.spatial.set_geometry('SHAPE')

comp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               76 non-null     object  
 1   STREET               76 non-null     object  
 2   CITY                 76 non-null     object  
 3   STATE                76 non-null     object  
 4   STATE_NAME           76 non-null     object  
 5   ZIP                  76 non-null     object  
 6   ZIP4                 76 non-null     object  
 7   NAICS                76 non-null     object  
 8   SIC                  76 non-null     object  
 9   SHAPE                76 non-null     geometry
 10  id                   76 non-null     object  
 11  brand_name           76 non-null     object  
 12  brand_name_category  76 non-null     object  
dtypes: geometry(1), object(12)
memory usage: 7.8+ KB
Wall time: 1.75 s


In [None]:
%%time
bg_near_biz_comp_df = bg_near_biz_df.dm.get_nearest(comp_df, origin_id_column='ID', near_prefix='comp', 
                                                    destination_columns_to_keep=['brand_name', 'brand_name_category'])

bg_near_biz_comp_df.info()

In [None]:
bg_near_biz_comp_df.iloc[0]