In [13]:
from pathlib import Path

import arcpy
from arcgis.features import GeoAccessor
from dm import Country
from dm.country import DemographicModeling
import pandas as pd
import numpy as np

# load the "autoreload" extension so as src code is changed, the changes are picked up in the dataframe
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
project_parent = Path('./').absolute().parent
dir_data = project_parent/'data'
dir_int = dir_data/'interim'
gdb_int = dir_int/'interim.gdb'

bp_fc = r'D:\arcgis\ba_data\us_2020\Data\Demographic Data\block_data.gdb\USA_ESRI_2020_blocks'
weighting_column = 'HH_C'

In [4]:
usa = Country('USA')

In [5]:
bg_df = usa.cbsas.get('seattle').dm.level(0).get()
bg_df.head()

Unnamed: 0,ID,NAME,SHAPE
0,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997..."
1,530530714071,530530714.071,"{""rings"": [[[-122.33136200011126, 47.064063999..."
2,530530714072,530530714.072,"{""rings"": [[[-122.35775599975194, 47.065219999..."
3,530530714073,530530714.073,"{""rings"": [[[-122.36863199991484, 47.053068000..."
4,530530714112,530530714.112,"{""rings"": [[[-122.4110792494156, 47.0717044998..."


In [6]:
bp_df = pd.DataFrame.spatial.from_featureclass(bp_fc, fields=['FIPS', weighting_column])
bp_df.head()

Unnamed: 0,FIPS,HH_C,SHAPE
0,150039812001004,0.0,"{""x"": -18511120.3869, ""y"": 2737388.585900001, ..."
1,150039812001005,0.0,"{""x"": -18502465.812, ""y"": 2727557.4893999994, ..."
2,150039812001006,0.0,"{""x"": -18506030.8083, ""y"": 2737773.204599999, ..."
3,150039812001007,0.0,"{""x"": -18505933.4457, ""y"": 2737758.3016999997,..."
4,150039812001008,0.0,"{""x"": -18511006.362999998, ""y"": 2737422.3838, ..."


In [7]:
bp_df['BG_ID'] = bp_df.FIPS.str[:12]
bp_df.head()

Unnamed: 0,FIPS,HH_C,SHAPE,BG_ID
0,150039812001004,0.0,"{""x"": -18511120.3869, ""y"": 2737388.585900001, ...",150039812001
1,150039812001005,0.0,"{""x"": -18502465.812, ""y"": 2727557.4893999994, ...",150039812001
2,150039812001006,0.0,"{""x"": -18506030.8083, ""y"": 2737773.204599999, ...",150039812001
3,150039812001007,0.0,"{""x"": -18505933.4457, ""y"": 2737758.3016999997,...",150039812001
4,150039812001008,0.0,"{""x"": -18511006.362999998, ""y"": 2737422.3838, ...",150039812001


In [8]:
from dm.spatial import get_weighted_centroid

In [11]:
cnt_df = get_weighted_centroid(input_dataframe, grouping_column, weighting_column)
cnt_df.head()

Unnamed: 0_level_0,SHAPE
BG_ID,Unnamed: 1_level_1
10010201001,"{'x': -9627656.058243131, 'y': 3824486.9645652..."
10010201002,"{'x': -9627677.107471926, 'y': 3826797.6503860..."
10010202001,"{'x': -9626318.856402345, 'y': 3826212.3414651..."
10010202002,"{'x': -9625864.890484827, 'y': 3824687.0136218..."
10010203001,"{'x': -9624720.009880764, 'y': 3826061.9353332..."


In [14]:
bp_df = bp_df.dm.project()
jn_df = bg_df.spatial.join(bp_df, 'left')
jn_df.head()

Unnamed: 0,ID,NAME,SHAPE,index_right,FIPS,HH_C,BG_ID
0,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997...",1578421.0,530530701003023,9.0,530530701003
1,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997...",1578418.0,530530701003005,3.0,530530701003
2,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997...",1578419.0,530530701003018,89.0,530530701003
3,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997...",1578420.0,530530701003020,68.0,530530701003
4,530530701003,530530701.003,"{""rings"": [[[-122.1652430000312, 47.0830489997...",1577653.0,530530701003019,3.0,530530701003


In [15]:
jn_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42416 entries, 0 to 42415
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   ID           42416 non-null  object  
 1   NAME         42416 non-null  object  
 2   SHAPE        42416 non-null  geometry
 3   index_right  42415 non-null  float64 
 4   FIPS         42415 non-null  object  
 5   HH_C         42415 non-null  float64 
 6   BG_ID        42415 non-null  object  
dtypes: float64(2), geometry(1), object(4)
memory usage: 2.3+ MB


In [16]:
jn_cnt_df = get_weighted_centroid(jn_df, 'ID', 'HH_C')
jn_cnt_df.head()

Unnamed: 0_level_0,SHAPE
ID,Unnamed: 1_level_1
530330001001,"{'x': -122.28464314981973, 'y': 47.72683440985..."
530330001002,"{'x': -122.29246667561983, 'y': 47.72931613111..."
530330001003,"{'x': -122.28945573967897, 'y': 47.72280944047..."
530330001004,"{'x': -122.29445632576045, 'y': 47.73193354498..."
530330001005,"{'x': -122.29443736645283, 'y': 47.72292840680..."
