In [1]:
import os
from pathlib import Path
import time

from arcgis import GIS, GeoAccessor
from dotenv import find_dotenv, load_dotenv
from modeling import Country, ModelingAccessor

load_dotenv(find_dotenv())

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

In [2]:
project_parent = Path('.').absolute().parent.parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
dir_raw = dir_data/'raw'
gdb_int = dir_int/'interim.gdb'
gdb_raw = dir_raw/'raw.gdb'

biz_drop_cols = ['OBJECTID', 'CONAME','SALESVOL', 'HDBRCH', 'ULTNUM', 'PUBPRV', 'EMPNUM', 'FRNCOD', 'ISCODE', 'SQFTCODE', 'LOC_NAME', 'STATUS', 'SCORE', 'SOURCE', 'REC_TYPE']

In [3]:
# gis = GIS(os.getenv('ESRI_PORTAL_URL'), username=os.getenv('ESRI_PORTAL_USERNAME'), password=os.getenv('ESRI_PORTAL_PASSWORD'))
# gis = GIS(os.getenv('BASRV_QA_URL'), username=os.getenv('BASRV_QA_USERNAME'), password=os.getenv('BASRV_QA_PASSWORD'))
gis = GIS(os.getenv('BA_QA_URL'), username=os.getenv('BA_QA_USERNAME'), password=os.getenv('BA_QA_PASSWORD'))
usa = Country('USA')

usa

<modeling.Country - USA (GIS at https://baqa.mapsqa.arcgis.com logged in as jmccune_baqa)>

In [4]:
evars = usa.enrich_variables.loc[:,['name', 'alias', 'description', 'data_collection']]

evars

Unnamed: 0,name,alias,description,data_collection
0,AGE0_CY,2020 Population Age <1,2020 Total Population Age <1 (Esri),1yearincrements
1,AGE1_CY,2020 Population Age 1,2020 Total Population Age 1 (Esri),1yearincrements
2,AGE2_CY,2020 Population Age 2,2020 Total Population Age 2 (Esri),1yearincrements
3,AGE3_CY,2020 Population Age 3,2020 Total Population Age 3 (Esri),1yearincrements
4,AGE4_CY,2020 Population Age 4,2020 Total Population Age 4 (Esri),1yearincrements
...,...,...,...,...
37,MOEMEDYRMV,2019 Median Year Householder Moved In MOE (ACS...,2019 Median Year Householder Moved into Unit M...,yearmovedin
38,RELMEDYRMV,2019 Median Year Householder Moved In REL (ACS...,2019 Median Year Householder Moved into Unit R...,yearmovedin
39,ACSOWNER,2019 Owner Households (ACS 5-Yr),2019 Owner Households (ACS 5-Yr),yearmovedin
40,MOEOWNER,2019 Owner Households MOE (ACS 5-Yr),2019 Owner Households MOE (ACS 5-Yr),yearmovedin


In [5]:
%%time
aoi_df = usa.cbsas.get('seattle')

aoi_df

CPU times: user 109 ms, sys: 8.69 ms, total: 118 ms
Wall time: 841 ms


Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-122.83937000030072, 47.256580000..."


In [6]:
%%time
bg_df = aoi_df.mdl.level(0).get()

bg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2480 entries, 0 to 2479
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   ID      2480 non-null   object  
 1   NAME    2480 non-null   object  
 2   SHAPE   2480 non-null   geometry
dtypes: geometry(1), object(2)
memory usage: 58.2+ KB
CPU times: user 1.27 s, sys: 252 ms, total: 1.52 s
Wall time: 7.95 s


In [7]:
%%time
biz_df = aoi_df.mdl.business.get_by_name('ace hardware')

biz_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               35 non-null     object  
 1   CONAME               35 non-null     object  
 2   NAICS                35 non-null     object  
 3   SIC                  35 non-null     object  
 4   SOURCE               35 non-null     object  
 5   PUBPRV               35 non-null     object  
 6   FRNCOD               35 non-null     object  
 7   ISCODE               35 non-null     object  
 8   CITY                 35 non-null     object  
 9   ZIP                  35 non-null     object  
 10  STATE                35 non-null     object  
 11  SHAPE                35 non-null     geometry
 12  location_id          35 non-null     object  
 13  brand_name           35 non-null     object  
 14  brand_name_category  35 non-null     object  
dtypes: geometry(1), object(14

In [8]:
bg_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2480 entries, 0 to 2479
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   ID      2480 non-null   object  
 1   NAME    2480 non-null   object  
 2   SHAPE   2480 non-null   geometry
dtypes: geometry(1), object(2)
memory usage: 58.2+ KB


In [11]:
%%time
bg_near_biz_df = bg_df[:250].mdl.proximity.get_nearest(biz_df, origin_id_column='ID', near_prefix='brand')

print(bg_near_biz_df.info())
bg_near_biz_df.head()

  @register_dataframe_accessor('mdl')


<class 'pandas.core.frame.DataFrame'>
Int64Index: 8120 entries, 0 to 249
Data columns (total 23 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ID                                    8120 non-null   object  
 1   NAME                                  8120 non-null   object  
 2   brand_destination_id_01               7972 non-null   object  
 3   brand_proximity_traveltime_01         7972 non-null   float64 
 4   brand_proximity_kilometers_01         7972 non-null   float64 
 5   brand_proximity_side_street_left_01   7972 non-null   float64 
 6   brand_proximity_side_street_right_01  7972 non-null   float64 
 7   brand_destination_id_02               7972 non-null   object  
 8   brand_proximity_traveltime_02         7972 non-null   float64 
 9   brand_proximity_kilometers_02         7972 non-null   float64 
 10  brand_proximity_side_street_left_02   7972 non-null   float64 
 11  brand

Unnamed: 0,ID,NAME,brand_destination_id_01,brand_proximity_traveltime_01,brand_proximity_kilometers_01,brand_proximity_side_street_left_01,brand_proximity_side_street_right_01,brand_destination_id_02,brand_proximity_traveltime_02,brand_proximity_kilometers_02,...,brand_proximity_traveltime_03,brand_proximity_kilometers_03,brand_proximity_side_street_left_03,brand_proximity_side_street_right_03,brand_destination_id_04,brand_proximity_traveltime_04,brand_proximity_kilometers_04,brand_proximity_side_street_left_04,brand_proximity_side_street_right_04,SHAPE
0,530330012001,530330012.001,664387990,5.781369,3.065281,1.0,0.0,251373601,15.653281,12.925205,...,16.671709,10.717732,1.0,0.0,724556934,18.890684,16.359468,0.0,1.0,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."


In [12]:
bg_near_biz_df

Unnamed: 0,ID,NAME,brand_destination_id_01,brand_proximity_traveltime_01,brand_proximity_kilometers_01,brand_proximity_side_street_left_01,brand_proximity_side_street_right_01,brand_destination_id_02,brand_proximity_traveltime_02,brand_proximity_kilometers_02,...,brand_proximity_traveltime_03,brand_proximity_kilometers_03,brand_proximity_side_street_left_03,brand_proximity_side_street_right_03,brand_destination_id_04,brand_proximity_traveltime_04,brand_proximity_kilometers_04,brand_proximity_side_street_left_04,brand_proximity_side_street_right_04,SHAPE
0,530330012001,530330012.001,664387990,5.781369,3.065281,1.0,0.0,251373601,15.653281,12.925205,...,16.671709,10.717732,1.0,0.0,724556934,18.890684,16.359468,0.0,1.0,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
1,530330012002,530330012.002,664387990,6.877815,3.744187,1.0,0.0,251373601,15.745409,12.905662,...,15.846284,9.692429,1.0,0.0,724556934,18.982812,16.339925,0.0,1.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,530530612005,530530612.005,,,,,,,,,...,,,,,,,,,,"{""rings"": [[[-122.48218100027339, 47.250023999..."
246,530530612006,530530612.006,,,,,,,,,...,,,,,,,,,,"{""rings"": [[[-122.47135900053718, 47.255440000..."
247,530530624001,530530624.001,,,,,,,,,...,,,,,,,,,,"{""rings"": [[[-122.4261209989607, 47.2231159999..."
248,530530624002,530530624.002,,,,,,,,,...,,,,,,,,,,"{""rings"": [[[-122.42624499970756, 47.211758000..."


In [None]:
%%time
comp_df = usa.business.get_competition(biz_df, aoi_df, local_threshold=3).drop(columns=biz_drop_cols)
comp_df.spatial.set_geometry('SHAPE')

print(comp_df.info())
comp_df.head()

In [None]:
%%time
bg_near_comp_df = bg_df.mdl.proximity.get_nearest(comp_df, origin_id_column='ID', near_prefix='comp', destination_count=6
                                                  destination_columns_to_keep=['brand_name', 'brand_name_category'])

print(bg_near_comp_df.info())
bg_near_comp_df.head()

In [None]:
%%time
biz_near_comp_df = biz_df.mdl.proximity.get_nearest(comp_df, origin_id_column='ID', near_prefix='biz_comp', destination_count=6
                                                    destination_columns_to_keep=['brand_name', 'brand_name_category'])

print(biz_near_comp_df.info())
biz_near_comp_df.head()