In [1]:
import os
from pathlib import Path
import time

from arcgis import GIS, GeoAccessor
from dotenv import find_dotenv, load_dotenv
from modeling import Country, ModelingAccessor

load_dotenv(find_dotenv())

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

In [2]:
project_parent = Path('.').absolute().parent.parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
dir_raw = dir_data/'raw'
gdb_int = dir_int/'interim.gdb'
gdb_raw = dir_raw/'raw.gdb'

biz_drop_cols = ['OBJECTID', 'CONAME','SALESVOL', 'HDBRCH', 'ULTNUM', 'PUBPRV', 'EMPNUM', 'FRNCOD', 'ISCODE', 'SQFTCODE', 'LOC_NAME', 'STATUS', 'SCORE', 'SOURCE', 'REC_TYPE']

In [35]:
# gis = GIS(os.getenv('ESRI_PORTAL_URL'), username=os.getenv('ESRI_PORTAL_USERNAME'), password=os.getenv('ESRI_PORTAL_PASSWORD'))
# gis = GIS(os.getenv('BASRV_QA_URL'), username=os.getenv('BASRV_QA_USERNAME'), password=os.getenv('BASRV_QA_PASSWORD'))
gis = GIS(os.getenv('BA_QA_URL'), username=os.getenv('BA_QA_USERNAME'), password=os.getenv('BA_QA_PASSWORD'))
usa = Country('USA')

usa

<modeling.Country - USA (GIS at https://baqa.mapsqa.arcgis.com logged in as jmccune_baqa)>

In [36]:
%%time
aoi_df = usa.cbsas.get('seattle')

aoi_df

CPU times: user 51.3 ms, sys: 7.56 ms, total: 58.8 ms
Wall time: 1.04 s


Unnamed: 0,ID,NAME,SHAPE
0,42660,"Seattle-Tacoma-Bellevue, WA Metropolitan Stati...","{""rings"": [[[-122.83937000030072, 47.256580000..."


In [37]:
evars = usa.enrich_variables

print(evars.info())

key_vars = evars[
    (evars.name.str.endswith('CY'))
    & (evars.data_collection.str.lower().str.contains('key'))
].reset_index(drop=True)

print(key_vars.info())
key_vars

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18399 entries, 0 to 41
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   name               18399 non-null  object
 1   alias              18399 non-null  object
 2   data_collection    18399 non-null  object
 3   enrich_name        18399 non-null  object
 4   enrich_field_name  18399 non-null  object
 5   description        18306 non-null  object
 6   vintage            18297 non-null  object
 7   units              18399 non-null  object
dtypes: object(8)
memory usage: 1.3+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   name               20 non-null     object
 1   alias              20 non-null     object
 2   data_collection    20 non-null     object
 3   enrich_name        20 non-null     obj

Unnamed: 0,name,alias,data_collection,enrich_name,enrich_field_name,description,vintage,units
0,TOTPOP_CY,2020 Total Population,KeyUSFacts,KeyUSFacts.TOTPOP_CY,KeyUSFacts_TOTPOP_CY,2020 Total Population (Esri),2020,count
1,GQPOP_CY,2020 Group Quarters Population,KeyUSFacts,KeyUSFacts.GQPOP_CY,KeyUSFacts_GQPOP_CY,2020 Group Quarters Population (Esri),2020,count
2,DIVINDX_CY,2020 Diversity Index,KeyUSFacts,KeyUSFacts.DIVINDX_CY,KeyUSFacts_DIVINDX_CY,2020 Diversity Index (Esri),2020,count
3,TOTHH_CY,2020 Total Households,KeyUSFacts,KeyUSFacts.TOTHH_CY,KeyUSFacts_TOTHH_CY,2020 Total Households (Esri),2020,count
4,AVGHHSZ_CY,2020 Average Household Size,KeyUSFacts,KeyUSFacts.AVGHHSZ_CY,KeyUSFacts_AVGHHSZ_CY,2020 Average Household Size (Esri),2020,count
5,MEDHINC_CY,2020 Median Household Income,KeyUSFacts,KeyUSFacts.MEDHINC_CY,KeyUSFacts_MEDHINC_CY,2020 Median Household Income (Esri),2020,currency
6,AVGHINC_CY,2020 Average Household Income,KeyUSFacts,KeyUSFacts.AVGHINC_CY,KeyUSFacts_AVGHINC_CY,2020 Average Household Income (Esri),2020,currency
7,PCI_CY,2020 Per Capita Income,KeyUSFacts,KeyUSFacts.PCI_CY,KeyUSFacts_PCI_CY,2020 Per Capita Income (Esri),2020,currency
8,TOTHU_CY,2020 Total Housing Units,KeyUSFacts,KeyUSFacts.TOTHU_CY,KeyUSFacts_TOTHU_CY,2020 Total Housing Units (Esri),2020,count
9,OWNER_CY,2020 Owner Occupied HUs,KeyUSFacts,KeyUSFacts.OWNER_CY,KeyUSFacts_OWNER_CY,2020 Owner Occupied Housing Units (Esri),2020,count


In [38]:
%%time
bg_df = aoi_df.mdl.level(0).get().mdl.enrich(key_vars)

print(bg_df.info())
bg_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2480 entries, 0 to 2479
Data columns (total 23 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   ID          2480 non-null   object  
 1   NAME        2480 non-null   object  
 2   TOTPOP_CY   1440 non-null   float64 
 3   GQPOP_CY    1440 non-null   float64 
 4   DIVINDX_CY  1440 non-null   float64 
 5   TOTHH_CY    1440 non-null   float64 
 6   AVGHHSZ_CY  1440 non-null   float64 
 7   MEDHINC_CY  1440 non-null   float64 
 8   AVGHINC_CY  1440 non-null   float64 
 9   PCI_CY      1440 non-null   float64 
 10  TOTHU_CY    1440 non-null   float64 
 11  OWNER_CY    1440 non-null   float64 
 12  RENTER_CY   1440 non-null   float64 
 13  VACANT_CY   1440 non-null   float64 
 14  MEDVAL_CY   1440 non-null   float64 
 15  AVGVAL_CY   1440 non-null   float64 
 16  POPGRW10CY  1440 non-null   float64 
 17  HHGRW10CY   1440 non-null   float64 
 18  FAMGRW10CY  1440 non-null   float64 
 19  DPOP_C

Unnamed: 0,ID,NAME,TOTPOP_CY,GQPOP_CY,DIVINDX_CY,TOTHH_CY,AVGHHSZ_CY,MEDHINC_CY,AVGHINC_CY,PCI_CY,...,VACANT_CY,MEDVAL_CY,AVGVAL_CY,POPGRW10CY,HHGRW10CY,FAMGRW10CY,DPOP_CY,DPOPWRK_CY,DPOPRES_CY,SHAPE
0,530330012001,530330012.001,1342.0,0.0,62.5,571.0,2.35,95936.0,129349.0,55036.0,...,50.0,643634.0,647973.0,0.91,0.73,0.94,1112.0,457.0,655.0,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,1392.0,110.0,53.1,571.0,2.25,107276.0,149116.0,61313.0,...,23.0,686765.0,684551.0,0.82,0.77,0.99,1160.0,567.0,593.0,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,1298.0,0.0,45.6,515.0,2.52,132945.0,187017.0,74202.0,...,23.0,851415.0,924422.0,0.7,0.45,0.75,1104.0,447.0,657.0,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,1115.0,2.0,36.7,474.0,2.35,108057.0,173682.0,73838.0,...,34.0,900240.0,993990.0,0.1,-0.14,0.21,982.0,400.0,582.0,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,2365.0,801.0,60.9,775.0,2.02,19815.0,43614.0,15269.0,...,27.0,496667.0,813983.0,1.72,1.17,1.64,1898.0,551.0,1347.0,"{""rings"": [[[-122.38758899913357, 47.669455999..."


In [45]:
%%time
biz_df = aoi_df.mdl.business.get_by_name('ace hardware')

print(biz_df.info())
biz_df.head()

  @register_dataframe_accessor('mdl')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               35 non-null     object  
 1   CONAME               35 non-null     object  
 2   NAICS                35 non-null     object  
 3   SIC                  35 non-null     object  
 4   SOURCE               35 non-null     object  
 5   PUBPRV               35 non-null     object  
 6   FRNCOD               35 non-null     object  
 7   ISCODE               35 non-null     object  
 8   CITY                 35 non-null     object  
 9   ZIP                  35 non-null     object  
 10  STATE                35 non-null     object  
 11  SHAPE                35 non-null     geometry
 12  location_id          35 non-null     object  
 13  brand_name           35 non-null     object  
 14  brand_name_category  35 non-null     object  
dtypes: geometry(1), object(14

Unnamed: 0,LOCNUM,CONAME,NAICS,SIC,SOURCE,PUBPRV,FRNCOD,ISCODE,CITY,ZIP,STATE,SHAPE,location_id,brand_name,brand_name_category
0,174841932,ACE HARDWARE,44413005,525104,Data Axle,,,,LAKE STEVENS,98258,WA,"{""x"": -122.1068519999136, ""y"": 47.998917000209...",174841932,ACE HARDWARE,ACE HARDWARE
1,403513308,ACE HARDWARE,44413005,525104,Data Axle,,,,EVERETT,98203,WA,"{""x"": -122.21200349958885, ""y"": 47.95310699996...",403513308,ACE HARDWARE,ACE HARDWARE
2,668942261,ACE HARDWARE,44413005,525104,Data Axle,,,,MUKILTEO,98275,WA,"{""x"": -122.28130350037961, ""y"": 47.88311400023...",668942261,ACE HARDWARE,ACE HARDWARE
3,698680865,ACE HARDWARE,44413005,525104,Data Axle,,,,EVERETT,98208,WA,"{""x"": -122.20738199982871, ""y"": 47.89885499975...",698680865,ACE HARDWARE,ACE HARDWARE
4,700121193,ACE HARDWARE,44413005,525104,Data Axle,,,,NORMANDY PARK,98148,WA,"{""x"": -122.33713949958971, ""y"": 47.42394300019...",700121193,ACE HARDWARE,ACE HARDWARE


In [40]:
%%time
bg_near_biz_df = bg_df.mdl.proximity.get_nearest(biz_df, origin_id_column='ID', near_prefix='brand')

print(bg_near_biz_df.info())
bg_near_biz_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2480 entries, 0 to 2479
Data columns (total 43 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ID                                    2480 non-null   object  
 1   NAME                                  2480 non-null   object  
 2   TOTPOP_CY                             1440 non-null   float64 
 3   GQPOP_CY                              1440 non-null   float64 
 4   DIVINDX_CY                            1440 non-null   float64 
 5   TOTHH_CY                              1440 non-null   float64 
 6   AVGHHSZ_CY                            1440 non-null   float64 
 7   MEDHINC_CY                            1440 non-null   float64 
 8   AVGHINC_CY                            1440 non-null   float64 
 9   PCI_CY                                1440 non-null   float64 
 10  TOTHU_CY                              1440 non-null   float64 
 11  OWNE

Unnamed: 0,ID,NAME,TOTPOP_CY,GQPOP_CY,DIVINDX_CY,TOTHH_CY,AVGHHSZ_CY,MEDHINC_CY,AVGHINC_CY,PCI_CY,...,brand_proximity_traveltime_03,brand_proximity_kilometers_03,brand_proximity_side_street_left_03,brand_proximity_side_street_right_03,brand_destination_id_04,brand_proximity_traveltime_04,brand_proximity_kilometers_04,brand_proximity_side_street_left_04,brand_proximity_side_street_right_04,SHAPE
0,530330012001,530330012.001,1342.0,0.0,62.5,571.0,2.35,95936.0,129349.0,55036.0,...,16.671709,10.717732,1,0,724556934,18.890684,16.359468,0,1,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,1392.0,110.0,53.1,571.0,2.25,107276.0,149116.0,61313.0,...,15.846284,9.692429,1,0,724556934,18.982812,16.339925,0,1,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,1298.0,0.0,45.6,515.0,2.52,132945.0,187017.0,74202.0,...,17.446454,10.002495,1,0,664387990,18.60211,10.906123,1,0,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,1115.0,2.0,36.7,474.0,2.35,108057.0,173682.0,73838.0,...,16.816368,9.56298,1,0,664387990,19.616108,11.403301,1,0,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,2365.0,801.0,60.9,775.0,2.02,19815.0,43614.0,15269.0,...,16.030117,8.990496,1,0,664387990,20.603537,12.164034,1,0,"{""rings"": [[[-122.38758899913357, 47.669455999..."


In [41]:
%%time
comp_df = aoi_df.mdl.business.get_competition(biz_df, local_threshold=3)
comp_df.spatial.set_geometry('SHAPE')

print(comp_df.info())
comp_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   LOCNUM               72 non-null     object  
 1   CONAME               72 non-null     object  
 2   NAICS                72 non-null     object  
 3   SIC                  72 non-null     object  
 4   SOURCE               72 non-null     object  
 5   PUBPRV               72 non-null     object  
 6   FRNCOD               72 non-null     object  
 7   ISCODE               72 non-null     object  
 8   CITY                 72 non-null     object  
 9   ZIP                  72 non-null     object  
 10  STATE                72 non-null     object  
 11  SHAPE                72 non-null     geometry
 12  location_id          72 non-null     object  
 13  brand_name           72 non-null     object  
 14  brand_name_category  72 non-null     object  
dtypes: geometry(1), object(14

Unnamed: 0,LOCNUM,CONAME,NAICS,SIC,SOURCE,PUBPRV,FRNCOD,ISCODE,CITY,ZIP,STATE,SHAPE,location_id,brand_name,brand_name_category
0,2890986,MC LENDON HARDWARE,44413005,525104,Data Axle,,,,SUMNER,98390,WA,"{""x"": -122.24236500031, ""y"": 47.20460400019538...",2890986,MC LENDON HARDWARE,local_brand
1,6128854,MCLENDON HARDWARE INC,44413005,525104,Data Axle,,,,RENTON,98057,WA,"{""x"": -122.21407799973574, ""y"": 47.47792500017...",6128854,MCLENDON HARDWARE INC,local_brand
2,174262691,GATEWAY TRUE VALUE HARDWARE,44413005,525104,Data Axle,,,,ENUMCLAW,98022,WA,"{""x"": -121.98761549958994, ""y"": 47.20199399976...",174262691,GATEWAY TRUE VALUE HARDWARE,local_brand
3,174471722,TWEEDY & POPP HARDWARE,44413005,525104,Data Axle,,,,SEATTLE,98103,WA,"{""x"": -122.33571339982164, ""y"": 47.66129593026...",174471722,TWEEDY & POPP HARDWARE,local_brand
4,174585992,MAPLE LEAF HARDWARE,44413005,525104,Data Axle,,,,SEATTLE,98115,WA,"{""x"": -122.31726749977874, ""y"": 47.69433000003...",174585992,MAPLE LEAF HARDWARE,local_brand


In [16]:
%%time
bg_near_comp_df = bg_df.mdl.proximity.get_nearest(comp_df, origin_id_column='ID', near_prefix='comp', destination_count=6,
                                                  destination_columns_to_keep=['brand_name', 'brand_name_category'])

print(bg_near_comp_df.info())
bg_near_comp_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2480 entries, 0 to 2479
Data columns (total 45 columns):
 #   Column                               Non-Null Count  Dtype   
---  ------                               --------------  -----   
 0   ID                                   2480 non-null   object  
 1   NAME                                 2480 non-null   object  
 2   comp_destination_id_01               2480 non-null   object  
 3   comp_proximity_traveltime_01         2480 non-null   float64 
 4   comp_proximity_kilometers_01         2480 non-null   float64 
 5   comp_proximity_side_street_left_01   2480 non-null   int64   
 6   comp_proximity_side_street_right_01  2480 non-null   int64   
 7   comp_brand_name_01                   2480 non-null   object  
 8   comp_brand_name_category_01          2480 non-null   object  
 9   comp_destination_id_02               2480 non-null   object  
 10  comp_proximity_traveltime_02         2480 non-null   float64 
 11  comp_proximity_ki

Unnamed: 0,ID,NAME,comp_destination_id_01,comp_proximity_traveltime_01,comp_proximity_kilometers_01,comp_proximity_side_street_left_01,comp_proximity_side_street_right_01,comp_brand_name_01,comp_brand_name_category_01,comp_destination_id_02,...,comp_brand_name_05,comp_brand_name_category_05,comp_destination_id_06,comp_proximity_traveltime_06,comp_proximity_kilometers_06,comp_proximity_side_street_left_06,comp_proximity_side_street_right_06,comp_brand_name_06,comp_brand_name_category_06,SHAPE
0,530330012001,530330012.001,174585992,4.003592,1.821041,0,1,MAPLE LEAF HARDWARE,local_brand,743807120,...,CITY PEOPLE SANDPOINT TRUE VL,local_brand,174471722,12.082842,8.011265,1,0,TWEEDY & POPP HARDWARE,local_brand,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,174585992,3.132663,1.328069,0,1,MAPLE LEAF HARDWARE,local_brand,743807120,...,TWEEDY & POPP HARDWARE,local_brand,301450011,11.571473,5.997022,1,0,STONEWAY HARDWARE & SUPPLY,local_brand,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,403372939,5.116623,2.618996,0,1,STONEWAY HARDWARE,local_brand,743807120,...,HOME BUILDERS CTR,local_brand,174610261,9.182731,5.809547,1,0,BUILDERS HARDWARE & SUPPLY CO,local_brand,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,403372939,4.553495,2.095767,0,1,STONEWAY HARDWARE,local_brand,743807120,...,GREENWOOD TRUE VALUE HARDWARE,local_brand,174610261,8.619602,5.286317,1,0,BUILDERS HARDWARE & SUPPLY CO,local_brand,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,403372939,3.8187,1.865811,0,1,STONEWAY HARDWARE,local_brand,174612515,...,BUILDERS HARDWARE & SUPPLY CO,local_brand,452112691,8.379126,4.286343,1,0,GREENWOOD TRUE VALUE HARDWARE,local_brand,"{""rings"": [[[-122.38758899913357, 47.669455999..."


In [60]:
%%time
biz_near_comp_df = biz_df.mdl.proximity.get_nearest(comp_df, origin_id_column='LOCNUM', near_prefix='biz_comp', 
                                                    destination_id_column='LOCNUM', destination_count=6,
                                                    destination_columns_to_keep=['brand_name', 'brand_name_category'])

print(biz_near_comp_df.info())
biz_near_comp_df.head()

  @register_dataframe_accessor('mdl')


<class 'pandas.core.frame.DataFrame'>
Int64Index: 35 entries, 0 to 34
Data columns (total 57 columns):
 #   Column                                   Non-Null Count  Dtype   
---  ------                                   --------------  -----   
 0   LOCNUM                                   35 non-null     object  
 1   CONAME                                   35 non-null     object  
 2   NAICS                                    35 non-null     object  
 3   SIC                                      35 non-null     object  
 4   SOURCE                                   35 non-null     object  
 5   PUBPRV                                   35 non-null     object  
 6   FRNCOD                                   35 non-null     object  
 7   ISCODE                                   35 non-null     object  
 8   CITY                                     35 non-null     object  
 9   ZIP                                      35 non-null     object  
 10  STATE                                   

Unnamed: 0,LOCNUM,CONAME,NAICS,SIC,SOURCE,PUBPRV,FRNCOD,ISCODE,CITY,ZIP,...,biz_comp_brand_name_05,biz_comp_brand_name_category_05,biz_comp_destination_id_06,biz_comp_proximity_traveltime_06,biz_comp_proximity_kilometers_06,biz_comp_proximity_side_street_left_06,biz_comp_proximity_side_street_right_06,biz_comp_brand_name_06,biz_comp_brand_name_category_06,SHAPE
0,174841932,ACE HARDWARE,44413005,525104,Data Axle,,,,LAKE STEVENS,98258,...,MANOR HARDWARE & CONSTRUCTION,local_brand,502050909,12.507132,12.695267,1,0,MCDANIELS DO IT CTR,local_brand,"{""x"": -122.1068519999136, ""y"": 47.998917000209..."
1,403513308,ACE HARDWARE,44413005,525104,Data Axle,,,,EVERETT,98203,...,AIRCRAFT HARDWARE,local_brand,426702925,14.948232,14.696837,1,0,SENTRY DOOR & HARDWARE LLC,local_brand,"{""x"": -122.21200349958885, ""y"": 47.95310699996..."
2,668942261,ACE HARDWARE,44413005,525104,Data Axle,,,,MUKILTEO,98275,...,CHOWN HARDWARE,local_brand,405053703,16.550462,16.310112,1,0,MANOR HARDWARE & CONSTRUCTION,local_brand,"{""x"": -122.28130350037961, ""y"": 47.88311400023..."
3,698680865,ACE HARDWARE,44413005,525104,Data Axle,,,,EVERETT,98208,...,KING DOOR & HARDWARE,local_brand,415082473,13.627706,12.553305,0,1,CLEARVIEW TRUE VALUE HARDWARE,local_brand,"{""x"": -122.20738199982871, ""y"": 47.89885499975..."
4,700121193,ACE HARDWARE,44413005,525104,Data Axle,,,,NORMANDY PARK,98148,...,MCLENDON HARDWARE INC,local_brand,715313723,19.790454,19.105209,1,0,EARTHWISE ARCHITECTURAL SLVG,local_brand,"{""x"": -122.33713949958971, ""y"": 47.42394300019..."
