In [1]:
from arcgis.gis import GIS
import arcgis.geoenrichment as geoenrichment

In [2]:
user_id = 'jmccune_geoai'
customer_item_id = '47d2cb05d9c1494797293b62ba167211'

In [3]:
gis = GIS(username=user_id)
gis

Enter password: ········


In [4]:
usa = geoenrichment.Country.get('US')
usa

<Country name:United States>

In [5]:
factors_df = usa.data_collections
print('There are {:,} data variables available from Esri.'.format(len(factors_df.index)))

There are 14,630 data variables available from Esri.


In [6]:
disposableincome_factors_df = factors_df[factors_df.index == 'disposableincome'].copy()
print('For just disposable income there are {:,} variables!'.format(len(disposableincome_factors_df.index)))

For just disposable income there are 104 variables!


In [7]:
mediandi_factors_df = disposableincome_factors_df[disposableincome_factors_df['alias'].str.contains('Median')]
mediandi_factors_df

Unnamed: 0_level_0,analysisVariable,alias,fieldCategory,vintage
dataCollectionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
disposableincome,disposableincome.MEDDI_CY,2018 Median Disposable Income,2018 Disposable Income (Esri),2018
disposableincome,disposableincome.MEDDIA15CY,2018 Median Disposable Inc: HHr 15-24,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA25CY,2018 Median Disposable Inc: HHr 25-34,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA35CY,2018 Median Disposable Inc: HHr 35-44,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA45CY,2018 Median Disposable Inc: HHr 45-54,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA55CY,2018 Median Disposable Inc: HHr 55-64,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA65CY,2018 Median Disposable Inc: HHr 65-74,2018 Disposable Income by Age (Esri),2018
disposableincome,disposableincome.MEDDIA75CY,2018 Median Disposable Inc: HHr 75+,2018 Disposable Income by Age (Esri),2018


In [8]:
enrich_variable_list = list(mediandi_factors_df['analysisVariable'].values)
enrich_variable_list

['disposableincome.MEDDI_CY',
 'disposableincome.MEDDIA15CY',
 'disposableincome.MEDDIA25CY',
 'disposableincome.MEDDIA35CY',
 'disposableincome.MEDDIA45CY',
 'disposableincome.MEDDIA55CY',
 'disposableincome.MEDDIA65CY',
 'disposableincome.MEDDIA75CY']

In [9]:
from arcgis.gis import Item

customer_layer = Item(gis, customer_item_id).layers[0]
customer_layer

<FeatureLayer url:"https://services.arcgis.com/PMTtzuTB6WiPuNSv/arcgis/rest/services/Customers_Update/FeatureServer/0">

In [10]:
id_list = customer_layer.query(return_ids_only=True)
enrich_id_list = [str(id) for id in id_list['objectIds'][:3]]
enrich_id_string = ','.join(enrich_id_list)
enrich_id_string

'1,2,3'

In [11]:
customer_sdf = customer_layer.query(object_ids=enrich_id_string).df
customer_sdf

Unnamed: 0,CITY,CUSTOMER_CLASS,Customer_Spending,DMA,Distance,FIRSTNAME,Join_Count,LASTNAME,OBJECTID,PAYMETHOD,...,Store_ID,TARGET_FID,X_Long,Y_Lat,ZIP,ZIP4,description,test,time_of_day,SHAPE
0,Prattville,Steady,3527.8,Montgomery (Selma) AL,57691.556784,JIM,2,BROWN,1,MC,...,,1,-86.497305,32.474348,36067,2816,Island hemp skirt - The Island Hemp Skirt brin...,,,"{'x': -9628835.9624, 'y': 3825738.4985999987, ..."
1,Prattville,Steady,2667.1,Montgomery (Selma) AL,57691.556784,CARL,2,ATKINS,2,MC,...,,2,-86.478971,32.481213,36067,1814,Vintage logo pkt t-shirt - Keep it on the down...,,,"{'x': -9626795.0102, 'y': 3826644.4267000034, ..."
2,Prattville,Steady,2897.6,Montgomery (Selma) AL,57691.556784,JOHN,2,ASHBY,3,PP,...,,3,-86.457961,32.485113,36067,2110,Solimar pants - In case your travel plans coin...,,,"{'x': -9624456.2475, 'y': 3827159.0424999967, ..."


In [12]:
enrich_df = geoenrichment.enrich(
    study_areas=customer_sdf[['SHAPE']].copy(), # only send the geometry
    analysis_variables=enrich_variable_list,
    return_geometry=False  # already have the geometry locally, so do not repeat
)
enrich_df.set_index('ID', drop=True, inplace=True)  # index to match with original data
enrich_df.index = enrich_df.index.astype(customer_sdf.index.dtype)  # so the join will work later
enrich_df.drop([col for col in enrich_df.columns if 'OBJECTID' in col], inplace=True, axis=1)
enrich_df

Unnamed: 0_level_0,HasData,MEDDIA15CY,MEDDIA25CY,MEDDIA35CY,MEDDIA45CY,MEDDIA55CY,MEDDIA65CY,MEDDIA75CY,MEDDI_CY,aggregationMethod,areaType,bufferRadii,bufferUnits,bufferUnitsAlias,sourceCountry
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,30717,51239,61686,70704,52912,42742,27632,51445,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US
1,1,30717,50397,54461,57858,45039,39248,27789,46602,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US
2,1,36263,47177,54538,57423,48567,46279,34101,48455,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US


In [13]:
customer_enrich_sdf = customer_sdf.join(enrich_df)
customer_enrich_sdf

Unnamed: 0,CITY,CUSTOMER_CLASS,Customer_Spending,DMA,Distance,FIRSTNAME,Join_Count,LASTNAME,OBJECTID,PAYMETHOD,...,MEDDIA55CY,MEDDIA65CY,MEDDIA75CY,MEDDI_CY,aggregationMethod,areaType,bufferRadii,bufferUnits,bufferUnitsAlias,sourceCountry
0,Prattville,Steady,3527.8,Montgomery (Selma) AL,57691.556784,JIM,2,BROWN,1,MC,...,52912,42742,27632,51445,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US
1,Prattville,Steady,2667.1,Montgomery (Selma) AL,57691.556784,CARL,2,ATKINS,2,MC,...,45039,39248,27789,46602,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US
2,Prattville,Steady,2897.6,Montgomery (Selma) AL,57691.556784,JOHN,2,ASHBY,3,PP,...,48567,46279,34101,48455,BlockApportionment:US.BlockGroups,RingBuffer,1,esriMiles,Miles,US
