In [None]:
import importlib
import os
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcgis.gis import GIS
import arcpy
from dotenv import load_dotenv, find_dotenv
import pandas as pd

In [None]:
# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
project_parent = Path('./').absolute().parent

data_dir = project_parent/'data'

data_raw = data_dir/'raw'
data_ext = data_dir/'external'
data_int = data_dir/'interim'
data_out = data_dir/'processed'

gdb_raw = data_raw/'raw.gdb'
gdb_int = data_int/'interim.gdb'
gdb_out = data_out/'processed.gdb'

# import the project package from the project package path
# ideally will be imported using 'from arcgis import da'
sys.path.append(str(project_parent/'src'))
import dm

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

# Introspectivley Examine and Get Geographies

In [None]:
# get available resources, and show which is currently active
dm.env.data.sources

In [None]:
# set data source
dm.env.data.source = 'LOCAL'
# dm.env.data.source = <GIS object instance>

In [None]:
# discover what countries are available, and get country objects back
cntry_df = dm.countries

cntry_df

In [None]:
# specify a country...

# ...by the country order...
usa = countries[0]

# ...or by the three letter identifier
usa = ba.Country('USA')

In [None]:
# get the geographic resolutions available for the country as a dataframe from smallest to largest
geos = usa.geographies

geos_df

In [None]:
# select the lowest geographic resolution available for the most granular analysis

# ...by index...
geo_0_df = usa.get_geography(0)

# ...by name
geo_0_df = usa.get_geography('block_group')

In [None]:
# select the aoi using a similar method with a little method chaining
aoi = usa.get_geography('cbsa').select('Seattle')[0]

In [None]:
# get the block groups in the area of interest
origin_geo_df = geo_df.spatial.within.(aoi)

# Get Locations

In [None]:
# get the store locations from the business listings
loc_brand_df = usa.business.search('Ace Hardware')

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.search('Ace Hardware').spatial.to_featureclass(gdb_int/'loc_brand')

loc_brand_df.head()

In [None]:
# get all the competitors for the area of interest

# ...by NAICS or SIC code...
loc_comp_df = usa.business.get_competitors(
    code=44413005,  # include ablity to specify shorted codes since NAICS codes can be shorter to be more general
    code_type='NAICS', 
    brand_exclude='Ace Hardware'
)

# ...or simplly by looking up using the existing location brand layer as a template
loc_comp_df = usa.business.get_competitors(brand_locations=loc_brand_df)

# ...and since returning a sptatially enabled dataframe, can use spatial.to_featureclass to save directly with function chaining
usa.business.get_competitors(brand_locations=loc_brand_df).spatial.to_featureclass(gdb_int/'loc_comp')

# Calculate Proximity Metrics

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = dm.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_brand_df,
    destination_id_column='STORE_ID'
    destination_brand_or_concept_column='STORE_CONCEPT',              # think Nike Outlet versus Nike Brand Store
)

In [None]:
# calculate the origin to nth destinations table for brand locations
prox_df_brand = dm.proximity.get_neareset_nth_locations(
    origin_features=orgin_geo_df,
    origin_id_column='ID',                                      
    origin_centroid_weighting_features='path-to-block-points',  # features used to calculate a population weighted centroid location for routing
    origin_centroid_weighting_column='POP',                     # used to weight each population feature for centroid calculation
    destination_locations=loc_comp_df,
    destination_id_column='LOCNUM'
    destination_brand_or_concept_column='CONAME',               # think Nike Outlet versus Nike Brand Store
)

# ...and can even chain to create output using dataframe to_... functions
prox_df_brand = dm.proximity.get_neareset_nth_locations(orgin_geo_df, 'ID', 'path-to-block-points', 'POP', loc_comp_df, 'LOCNUM', 'CONAME').to_csv('prox_df_brand.csv')

# Enrich

In [None]:
# discoveer what enrichment data is available
enrich_df = dm.enrich.variables

enrich_df.head()

In [None]:
# enrich origins with large number of variables

# ...by just getting all of them...
origin_enrich_df = dm.enrich.enrich_data(origin_geo_df, all_varaibles=True)

# ...or by specifying category/categories...
origin_enrich_df = dm.enrich.enrich_data(orgin_geo_df, variable_categories='key_variables')
origin_enrich_df = dm.enrich.enrich_data(orgin_geo_df, variable_categories=['key_variables', 'agebyrace'])

# ...or even chain the output using dataframe to_parquet or to_csv method
dm.enrich.enrich_data(orgin_geo_df, variable_categories='key_variables').to_parquet(dir_int/'origin_enrich.parquet')