In [1]:
import os
from pathlib import Path

from arcgis.gis import GIS
from dotenv import find_dotenv, load_dotenv  # for loading variable from .env file to keep them out of version control
from modeling import Country  # used as the starting point for working with 
import pandas as pd

load_dotenv(find_dotenv())  # recursively crawls up the directory tree until a .env file is found

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

In [2]:
project_parent = Path('.').absolute().parent.parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
dir_raw = dir_data/'raw'

# make sure data directories are present - helps for when project has just been cloned
for ddir in [dir_int, dir_raw]:
    if not ddir.exists():
        ddir.mkdir(parents=True)

# although not necessary, dropping this list of columns later makes it easier to view business lising results
biz_drop_cols = ['OBJECTID', 'CONAME','SALESVOL', 'HDBRCH', 'ULTNUM', 'PUBPRV', 'EMPNUM', 'FRNCOD', 'ISCODE', 'SQFTCODE', 'LOC_NAME', 'STATUS', 'SCORE', 'SOURCE', 'REC_TYPE']

# Setup

Country, Area-of-Interest,  and Enrichment Variables

In [3]:
# connection to Web GIS being used, so have to create it
# NOTE - using values from .env, thus keeping any credentials out of version control
gis = GIS(os.getenv('BA_QA_URL'), username=os.getenv('BA_QA_USERNAME'), password=os.getenv('BA_QA_PASSWORD'))

# explicitly using connection to GIS
usa = Country('USA', source=gis)

# ...although the same can also be done using local resources, ArcGIS Pro with Business Analyst and locally installed data.
# usa = Country('USA', source='local')

usa

<modeling.Country - USA (GIS at https://baqa.mapsqa.arcgis.com logged in as jmccune_baqa)>

In [5]:
%%time

# next, get a study area, an area of interest to work with - this constrains results to a reasonable domain
# NOTE - eventually, as these service evolve, this also will have the ability to span international borders, so essential to the workflow
bg_df = usa.cbsas.get('seattle').mdl.level(0).get()

bg_df

CPU times: user 1.34 s, sys: 282 ms, total: 1.62 s
Wall time: 9.41 s


Unnamed: 0,ID,NAME,SHAPE
0,530330012001,530330012.001,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,"{""rings"": [[[-122.38758899913357, 47.669455999..."
...,...,...,...
2475,530610529043,530610529.043,"{""rings"": [[[-122.14943799870154, 48.064500999..."
2476,530610529044,530610529.044,"{""rings"": [[[-122.14359799983554, 48.059699000..."
2477,530619400011,530619400.011,"{""rings"": [[[-122.22630799998365, 48.097295999..."
2478,530619400012,530619400.012,"{""rings"": [[[-122.19474699952485, 48.075618000..."


In [35]:
enrich_geography_level = 'block_groups'
data = bg_df
enrich_id_column = 'ID'

In [36]:
self = usa

In [42]:
# get the geography level row from the geography levels dataframe by the geo_name
geo_lvl_df = self.geography_levels[self.geography_levels.geo_name == enrich_geography_level]

# if, for some reason, the acutal geography level id was passed in, check for this as well
if len(geo_lvl_df.index) == 0:
    geo_lvl_df = self.geography_levels[self.geography_levels.id == enrich_geography_level]
    
# pitch a fit if no geography level was found, but try to offer helpful suggestions
assert len(geo_lvl_df.index) > 0, f'The specified geography level, {enrich_geography_level} does not appear to be one of the available geography levels. This must be a value from the "geo_name" column in teh dataframe available from the "geography_levels" property of the Country object.'

# get the actual geography level id
geo_lvl = geo_lvl_df.iloc[0].id

# create the list of ids to be used for enrichment
id_lst = list(data[enrich_id_column])

geo_lvl, len(id_lst)

('US.BlockGroups', 2480)

In [41]:
enrich_geography_level = geo_lvl

In [43]:
cntry = usa

  @register_dataframe_accessor('mdl')


In [45]:
bg_df = cntry.cities_and_towns_places.get('seattle').mdl.level(0).get()

print(bg_df.info())
bg_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 478 entries, 0 to 477
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   ID      478 non-null    object  
 1   NAME    478 non-null    object  
 2   SHAPE   478 non-null    geometry
dtypes: geometry(1), object(2)
memory usage: 11.3+ KB
None


Unnamed: 0,ID,NAME,SHAPE
0,530330009001,530330009.001,"{""rings"": [[[-122.28001399933198, 47.719146999..."
1,530330009002,530330009.002,"{""rings"": [[[-122.27643999897352, 47.712159999..."
2,530330010001,530330010.001,"{""rings"": [[[-122.2937919998337, 47.7119679995..."
3,530330010002,530330010.002,"{""rings"": [[[-122.2908140002252, 47.7067909999..."
4,530330011001,530330011.001,"{""rings"": [[[-122.30163299958835, 47.706698999..."


In [None]:
bg_df.mdl.enrich()