# 1. Setup

In [1]:
!pwd

/home/umni2/a/umnilab/users/verma99/mk/spr_4711/code


In [2]:
from mobilkit.umni import *

# 2. Process data

## 2.1. Zones
Simplify geometry for faster loading and processing in the dashboard.

In [14]:
zones = (
    gpd.read_parquet('../data/zones/zones2010.parquet')
    .assign(scale=lambda df: df['scale'].map(
        D(COUNTY='County', TRACT='Tract', BG='Block Group')))
    .assign(geometry=lambda df: df.geometry.simplify(0.005))
    .merge(pd.read_csv('../data/acs/acs2019.csv')
           .astype({'geoid': str, 'popu': np.int32})[['geoid', 'popu']])
    .rename(columns=lambda x: x.upper() if x != 'geometry' else x)
).set_crs(CRS_DEG).disp()

6,407 rows x 8 cols; Memory: 1.8 MiB; CRS: EPSG:4326


Unnamed: 0,GEOID,SCALE,COUNTY,CBSA,ALAND,AWATER,geometry,POPU
,<object>,<object>,<object>,<object>,<int64>,<int64>,<geometry>,<int32>
0.0,18083,County,Knox,,1336514262,20755159,"POLYGON ((-87.506221 38.733533, -87.495799 38....",37065


In [15]:
zones.to_file('../data/dashboard/zones.shp.zip', driver='ESRI Shapefile')

## 2.2. SES
The column labels and descriptions were manually created in `data/acs/ses_cols.csv`.

In [5]:
ses_cols = pd.read_csv('../data/acs/ses_cols.csv').disp(2)

23 rows x 4 cols; Memory: 0.0 MiB


Unnamed: 0,code,label,category,description
,<object>,<object>,<object>,<object>
0.0,popu,Population,Demographics,Total population
1.0,pop_density,Population Density,Demographics,Population density (per sq. mi.)


In [6]:
ses = (
    pd.read_csv('../data/acs/acs2019.csv').disp(0)
    .astype({'geoid': str})
    .melt(['scale', 'geoid'], var_name=['indicator'], value_name='value')
    .merge(ses_cols, left_on='indicator', right_on='code')
    .drop(columns=['code', 'indicator']).rename(columns={'label': 'indicator'})
    .assign(percentile=lambda df: (100 * (
        df.groupby(['scale', 'indicator'])['value']
        .rank(pct=True))).fillna(0).astype(int).rename('rank'))
    .rename(columns=D(value=False, percentile=True)).drop(columns='scale')
    .melt(['geoid', 'category', 'indicator', 'description'], var_name='is_pctile')
    .astype({'geoid': CAT, 'category': CAT, 'indicator': CAT, 'description': CAT, 'is_pctile': bool})
    .rename(columns=str.upper)
).disp()

6,407 rows x 25 cols; Memory: 1.5 MiB


Unnamed: 0,scale,geoid,popu,pop_density,p_minor,p_poc,p_lowedu,m_income,p_pov,p_pov150,p_pov200,p_snap,p_unemploy,p_noinsur,p_disabled,p_lowenglish,p_snglparent,p_crowded,p_renter,p_mobilehome,p_noveh,m_hhperveh,p_nowfh,p_transit,p_walkbike
,<object>,<int64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>


281,908 rows x 6 cols; Memory: 4.4 MiB


Unnamed: 0,GEOID,CATEGORY,INDICATOR,DESCRIPTION,IS_PCTILE,VALUE
,<category>,<category>,<category>,<category>,<bool>,<float64>
0.0,18069,Demographics,Population,Total population,False,36359.0


In [7]:
# ses.to_csv('../data/dashboard/ses.csv', index=False)
ses.to_parquet('../data/dashboard/ses.parquet')

## 2.3. EJ

In [8]:
ejs = (
    pd.read_parquet('../data/ejs/ejs.parquet').disp(0)
    .assign(percentile=lambda df: (100 * (
        df.groupby(['scale', 'indicator'])['value']
        .rank(pct=True))).fillna(0).astype(int).rename('rank'))
    .rename(columns=D(value=False, percentile=True)).drop(columns='scale')
    .melt(['geoid', 'category', 'indicator', 'description'], var_name='is_pctile')
    .astype({'is_pctile': bool})
    .rename(columns=str.upper)
).disp()

64,170 rows x 6 cols; Memory: 1.5 MiB


Unnamed: 0,geoid,scale,category,indicator,description,value
,<category>,<category>,<category>,<category>,<category>,<float64>


128,340 rows x 6 cols; Memory: 2.4 MiB


Unnamed: 0,GEOID,CATEGORY,INDICATOR,DESCRIPTION,IS_PCTILE,VALUE
,<category>,<category>,<category>,<category>,<bool>,<float64>
0.0,180010301001,Transportation,Diesel PM,Diesel particulate matter level in air,False,0.24585


In [9]:
ejs.to_parquet('../data/dashboard/ej.parquet')

# 3. Miscellaneous

## 3.1. Long to wide
For David

In [10]:
(pd.read_parquet('../data/ejs/ejs.parquet')
 .pivot_table('value', 'geoid', 'indicator').disp()
 .to_csv(U.mkfile('../data/dashboard/david/ej.csv')))

6,417 rows x 10 cols; Memory: 1.2 MiB


indicator,Air Toxics Cancer,Air Toxics Respiratory HI,Diesel PM,Ozone,PM2.5,Proximity to NPL,Proximity to RMP,Proximity to TSDF,Proximity to Traffic,Water Discharge
geoid,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
18001,635.60506,7.94658,7.50542,1333.715654,259.173035,0.85877,21.12884,54.02471,6896.231984,0.042906


In [11]:
(pd.read_csv('../data/acs/acs2019.csv').disp(0)
 .to_csv(U.mkfile('../data/dashboard/david/ses.csv'), index=False))

6,407 rows x 25 cols; Memory: 1.5 MiB


Unnamed: 0,scale,geoid,popu,pop_density,p_minor,p_poc,p_lowedu,m_income,p_pov,p_pov150,p_pov200,p_snap,p_unemploy,p_noinsur,p_disabled,p_lowenglish,p_snglparent,p_crowded,p_renter,p_mobilehome,p_noveh,m_hhperveh,p_nowfh,p_transit,p_walkbike
,<object>,<int64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
