In [1]:
from arcgis.geoenrichment._business_analyst import Country
import pandas as pd

from index_creation import config

In [2]:
usa = Country('US')

usa

<Country - USA 2022 (local)>

In [3]:
df_ev = usa.enrich_variables[['name', 'alias']].set_index('name').rename(columns={'alias': 'variable_alias'})

df_ev.head()

Unnamed: 0_level_0,variable_alias
name,Unnamed: 1_level_1
THHBASE,2022 Tapestry Household Base
TADULTBASE,2022 Tapestry Adult Pop Base
MP03017h_B,2022 HH Owns Any Baby Furniture or Equipment
MP03017h_I,2022 Index: HH Owns Any Baby Furniture or Equi...
MP03018h_B,2022 HH Bought Baby Furniture or Equipment/6 Mo


In [4]:
df_mv = pd.DataFrame([(k, v) for (k, lst) in config.meta_variables.items() for v in lst], columns=['index_name', 'variable_name'])

df_mv

Unnamed: 0,index_name,variable_name
0,income,HINC0_CY
1,income,HINC15_CY
2,income,HINC25_CY
3,income,HINC35_CY
4,income,HINC50_CY
...,...,...
84,housing_diversity,ACSUNT20
85,housing_diversity,ACSUNT50UP
86,housing_diversity,ACSUNTMOB
87,housing_diversity,ACSUNTOTH


In [5]:
df_mv = df_mv.join(df_ev, on='variable_name', how='left').drop_duplicates().set_index('index_name', drop=True)

df_mv

Unnamed: 0_level_0,variable_name,variable_alias
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1
income,HINC0_CY,2022 HH Income <$15000
income,HINC15_CY,2022 HH Income $15000-24999
income,HINC25_CY,2022 HH Income $25000-34999
income,HINC35_CY,2022 HH Income $35000-49999
income,HINC50_CY,2022 HH Income $50000-74999
...,...,...
housing_diversity,ACSUNT20,2020 Housing: 20 to 49 Units in Structure (ACS...
housing_diversity,ACSUNT50UP,2020 Housing: 50+ Units in Structure (ACS 5-Yr)
housing_diversity,ACSUNTMOB,2020 Housing: Mobile Homes (ACS 5-Yr)
housing_diversity,ACSUNTOTH,2020 Housing: Boat/RV/Van/etc. (ACS 5-Yr)


In [8]:
from typing import Optional

from numpy import vectorize

measurer = vectorize(len)

def df_to_rst(df, table_name: Optional[str] = 'Table Name' ) -> str:
    
    header = f'.. list-table:: {table_name}\n'
    col_len_lst = [measurer(df.index.astype(str)).max()] + list(measurer(df.values.astype(str)).max(axis=0))
    col_len_lst = map(str, col_len_lst)
    header = header + '    :widths: ' + ' '.join(col_len_lst) + '\n    :header-rows: 1\n\n'
    
    col_nm_lst = [f'    * - {df.index.name if df.index.name else ""}'] + [f'      - {c}' for c in df.columns]
    col_str = '\n'.join(col_nm_lst)

    col_str_lst = [col_str]
    
    for idx, col in enumerate(df.iterrows()):
        col_lst = [f'    * - {df.index[idx]}'] + [f'      - {v}' for v in col[1]]
        col_str = '\n'.join(col_lst)
        col_str_lst.append(col_str)
    df_str = '\n'.join(col_str_lst)
    
    df_str = header + df_str
    
    return df_str

In [12]:
for idx in df_mv.index.unique():
    df_idx = df_mv.loc[idx].set_index('variable_name', drop=True)
    print(df_to_rst(df_idx, idx))
    print('\n')

.. list-table:: income
    :widths: 10 29
    :header-rows: 1

    * - variable_name
      - variable_alias
    * - HINC0_CY
      - 2022 HH Income <$15000
    * - HINC15_CY
      - 2022 HH Income $15000-24999
    * - HINC25_CY
      - 2022 HH Income $25000-34999
    * - HINC35_CY
      - 2022 HH Income $35000-49999
    * - HINC50_CY
      - 2022 HH Income $50000-74999
    * - HINC75_CY
      - 2022 HH Income $75000-99999
    * - HINC100_CY
      - 2022 HH Income $100000-149999
    * - HINC150_CY
      - 2022 HH Income $150000-199999
    * - HINC200_CY
      - 2022 HH Income $200000+


.. list-table:: home_value
    :widths: 10 34
    :header-rows: 1

    * - variable_name
      - variable_alias
    * - VAL0_CY
      - 2022 Home Value <$50000
    * - VAL50K_CY
      - 2022 Home Value $50K-99999
    * - VAL100K_CY
      - 2022 Home Value $100K-149999
    * - VAL150K_CY
      - 2022 Home Value $150K-199999
    * - VAL200K_CY
      - 2022 Home Value $200K-249999
    * - VAL250K_CY
      -