# Modeling Modlule

The modeling module provides a single Python API for data scientists to take advantage of the capabilities of ArcGIS as part of demographic geographic data science workflows.

This first cell is largely a bunch of bubblegum and duct tape tying all the things together so I can use this notebook for prototyping and testing.

In [1]:
import importlib
import os
from pathlib import Path
import re
import sys

from dotenv import load_dotenv, find_dotenv
import pandas as pd

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

# load environment variables from .env
load_dotenv(find_dotenv())

dir_src = Path.cwd().parent.parent/'src'

sys.path.insert(0, str(dir_src))

In [2]:
from arcgis.gis import GIS
from modeling import Country

In [3]:
gis_agol = GIS(os.getenv('ESRI_GIS_URL'), username=os.getenv('ESRI_GIS_USERNAME'), password=os.getenv('ESRI_GIS_PASSWORD'))

gis_agol

In [4]:
gis_ent= GIS(os.getenv('ESRI_PORTAL_URL'), username=os.getenv('ESRI_PORTAL_USERNAME'), password=os.getenv('ESRI_PORTAL_PASSWORD'))

gis_ent

In [5]:
local_df = Country('USA', 'local').cbsas.get('seattle').mdl.block_groups.get()

local_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997..."
1,530530714071,530530714.071,"{""rings"": [[[-122.33136200011126, 47.064063999..."
2,530530714072,530530714.072,"{""rings"": [[[-122.35775599975194, 47.065219999..."
3,530530714073,530530714.073,"{""rings"": [[[-122.36863199991484, 47.053068000..."
4,530530714112,530530714.112,"{""rings"": [[[-122.4110792494156, 47.0717044998..."


In [6]:
agol_df = Country('USA', gis_agol).cbsas.get('seattle').mdl.block_groups.get()

agol_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,530330012001,530330012.001,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,"{""rings"": [[[-122.38758899913357, 47.669455999..."


In [7]:
ent_df = Country('USA', gis_ent).cbsas.get('seattle').mdl.block_groups.get()

ent_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,530330012001,530330012.001,"{""rings"": [[[-122.31243049957185, 47.704870499..."
1,530330012002,530330012.002,"{""rings"": [[[-122.3178410004258, 47.7076489995..."
2,530330032001,530330032.001,"{""rings"": [[[-122.38754700019909, 47.675934999..."
3,530330032002,530330032.002,"{""rings"": [[[-122.38758899913357, 47.671643999..."
4,530330032003,530330032.003,"{""rings"": [[[-122.38758899913357, 47.669455999..."


In [94]:
cnty_df = Country('USA', gis_ent).cbsas.get('seattle').mdl.counties.get()

cnty_df

Unnamed: 0,ID,NAME,SHAPE
0,53053,Pierce County,"{""rings"": [[[-122.41766497932484, 47.320186386..."
1,53033,King County,"{""rings"": [[[-121.36751200012111, 47.780137999..."
2,53061,Snohomish County,"{""rings"": [[[-121.68612849977673, 48.298988999..."


In [95]:
self = cnty_df.mdl._data.attrs['_cntry']

In [96]:
ev = self.enrich_variables
enrich_variables = ev[
    (ev.data_collection.str.lower().str.contains('key'))  # get the key variables
    & (ev.name.str.endswith('CY'))                     # just current year (2019) variables
].reset_index(drop=True)

In [97]:
evars = enrich_variables['enrich_field_name']

evars

0      KeyUSFacts_TOTPOP_CY
1       KeyUSFacts_GQPOP_CY
2     KeyUSFacts_DIVINDX_CY
3       KeyUSFacts_TOTHH_CY
4     KeyUSFacts_AVGHHSZ_CY
5     KeyUSFacts_MEDHINC_CY
6     KeyUSFacts_AVGHINC_CY
7         KeyUSFacts_PCI_CY
8       KeyUSFacts_TOTHU_CY
9       KeyUSFacts_OWNER_CY
10     KeyUSFacts_RENTER_CY
11     KeyUSFacts_VACANT_CY
12     KeyUSFacts_MEDVAL_CY
13     KeyUSFacts_AVGVAL_CY
14    KeyUSFacts_POPGRW10CY
15     KeyUSFacts_HHGRW10CY
16    KeyUSFacts_FAMGRW10CY
17       KeyUSFacts_DPOP_CY
18    KeyUSFacts_DPOPWRK_CY
19    KeyUSFacts_DPOPRES_CY
Name: enrich_field_name, dtype: object

In [99]:
out_df = cnty_df.mdl.enrich(enrich_variables)

out_df

Unnamed: 0,ID,NAME,TOTPOP_CY,GQPOP_CY,DIVINDX_CY,TOTHH_CY,AVGHHSZ_CY,MEDHINC_CY,AVGHINC_CY,PCI_CY,...,VACANT_CY,MEDVAL_CY,AVGVAL_CY,POPGRW10CY,HHGRW10CY,FAMGRW10CY,DPOP_CY,DPOPWRK_CY,DPOPRES_CY,SHAPE
0,53053,Pierce County,917565,20565,60.3,342092,2.62,77326,99077,37149,...,23485,343546,417408,1.41,1.29,1.24,884081,376219,507862,"{""rings"": [[[-122.41766497932484, 47.320186386..."
1,53033,King County,2271785,41334,65.4,924539,2.41,100598,135093,55065,...,61820,614306,716039,1.6,1.56,1.49,2381091,1239788,1141303,"{""rings"": [[[-121.36751200012111, 47.780137999..."
2,53061,Snohomish County,834034,9847,56.9,311214,2.65,89662,111835,41812,...,15524,438450,509596,1.54,1.46,1.41,779826,340646,439180,"{""rings"": [[[-121.68612849977673, 48.298988999..."


In [116]:
self.enrich_variables[[n in evars.str.lower().values for n in self.enrich_variables['enrich_field_name'].str.lower()]]

Unnamed: 0,name,alias,data_collection,enrich_name,enrich_field_name,description,vintage,units
2,TOTPOP_CY,2020 Total Population,KeyUSFacts,KeyUSFacts.TOTPOP_CY,KeyUSFacts_TOTPOP_CY,2020 Total Population (Esri),2020,count
4,GQPOP_CY,2020 Group Quarters Population,KeyUSFacts,KeyUSFacts.GQPOP_CY,KeyUSFacts_GQPOP_CY,2020 Group Quarters Population (Esri),2020,count
5,DIVINDX_CY,2020 Diversity Index,KeyUSFacts,KeyUSFacts.DIVINDX_CY,KeyUSFacts_DIVINDX_CY,2020 Diversity Index (Esri),2020,count
8,TOTHH_CY,2020 Total Households,KeyUSFacts,KeyUSFacts.TOTHH_CY,KeyUSFacts_TOTHH_CY,2020 Total Households (Esri),2020,count
10,AVGHHSZ_CY,2020 Average Household Size,KeyUSFacts,KeyUSFacts.AVGHHSZ_CY,KeyUSFacts_AVGHHSZ_CY,2020 Average Household Size (Esri),2020,count
11,MEDHINC_CY,2020 Median Household Income,KeyUSFacts,KeyUSFacts.MEDHINC_CY,KeyUSFacts_MEDHINC_CY,2020 Median Household Income (Esri),2020,currency
13,AVGHINC_CY,2020 Average Household Income,KeyUSFacts,KeyUSFacts.AVGHINC_CY,KeyUSFacts_AVGHINC_CY,2020 Average Household Income (Esri),2020,currency
15,PCI_CY,2020 Per Capita Income,KeyUSFacts,KeyUSFacts.PCI_CY,KeyUSFacts_PCI_CY,2020 Per Capita Income (Esri),2020,currency
19,TOTHU_CY,2020 Total Housing Units,KeyUSFacts,KeyUSFacts.TOTHU_CY,KeyUSFacts_TOTHU_CY,2020 Total Housing Units (Esri),2020,count
21,OWNER_CY,2020 Owner Occupied HUs,KeyUSFacts,KeyUSFacts.OWNER_CY,KeyUSFacts_OWNER_CY,2020 Owner Occupied Housing Units (Esri),2020,count


In [108]:
self.enrich_variables[[evars.str.contains(n) for n in self.enrich_variables['enrich_field_name']]]

ValueError: PandasArray must be 1-dimensional.