## Imports

In [1]:
from utils import *

## Zones
Compute the centroids to be exported.

In [2]:
zones = pd.read_parquet('data/zones.parquet', columns=[
    'geoid', 'level', 'city', 'popu', 'labor'])
centroids = pd.read_parquet('data/centroids.parquet', columns=[
    'geoid', 'x', 'y']).rename(columns=D(x='lon', y='lat'))
zones = zones.merge(centroids, on='geoid').disp()

143,971 rows x 7 cols; Memory: 10.8 MiB


Unnamed: 0,geoid,level,city,popu,labor,lon,lat
,<object>,<category>,<category>,<int32>,<int32>,<float32>,<float32>
0.0,42007,County,Pittsburgh,164781,84153,-80.349266,40.682495


## Opportunities

In [3]:
opport = pd.read_parquet('data/opportunities.parquet').disp()

1,595,036 rows x 4 cols; Memory: 40.2 MiB


Unnamed: 0,geoid,level,kind,opport
,<category>,<category>,<category>,<int32>
0.0,040019426001,BG,Jobs: All,4


## Impedance weights
The impedance function used in this study is the power exponential: $f(x)=\exp({\alpha x^\beta})$. The parameters $\alpha$ and $\beta$ were determined from the NHTS data (as part of the impedance paper under review in JTRG). These are carried over directly here:

In [4]:
imped_params = Pdf([
    ('Drive',   'Jobs', -0.007127, 1.501453),
    ('Drive',   'POIs', -0.020097, 1.361630),
    ('Transit', 'Jobs', -0.000166, 2.096192),
    ('Transit', 'POIs', -0.002062, 1.608027),
    ('Walk',    'Jobs', -0.053588, 1.085346),
    ('Walk',    'POIs', -0.058269, 1.007449),
    ('Bike',    'Jobs', -0.007653, 1.591415),
    ('Bike',    'POIs', -0.028080, 1.145222),
], columns=['mode', 'purpose', 'α', 'β']).disp()

8 rows x 4 cols; Memory: 0.0 MiB


Unnamed: 0,mode,purpose,α,β
,<object>,<object>,<float64>,<float64>
0.0,Drive,Jobs,-0.007127,1.501453


## Travel times
The travel time tables are too large to fit in memory for all cities at once. So, it's better to compute access for each city separately by obtaining travel times separately.

In [5]:
def get_times(city):
    city = city.lower().replace('.', '').replace(' ', '-')
    times = []
    for f in Path(f'data/ttm/{city}').glob(f'*_bg.parquet'):
        mode = f.stem.split('_')[1].title()
        df = pd.read_parquet(f, columns=['src', 'trg', 'time'])
        df.time /= 60 # convert time from seconds to minutes
        times.append(df.assign(mode=mode, level='BG'))
        for level, nchar in [('Tract', 11), ('County', 5)]:
            df.src, df.trg = df.src.str[:nchar], df.trg.str[:nchar]
            df2 = df.groupby(['src', 'trg']).time.agg('median').reset_index()
            times.append(df2.assign(mode=mode, level=level))
    times = pd.concat(times).reset_index(drop=1)
    cols = D(level=CAT, mode=CAT, src=CAT, trg=CAT, time=F32)
    times = times[list(cols)].astype(cols)
    return times

tt = get_times('austin').disp()

2,537,638 rows x 5 cols; Memory: 24.5 MiB


Unnamed: 0,level,mode,src,trg,time
,<category>,<category>,<category>,<category>,<float32>
0.0,BG,Bike,480219501011,480219501011,0.0


## Compute access
<!-- - Impedance weight: $$ w_{i,j}^{k,m} = f_{k,m}(t_{m,i,j}) = t_{m,i,j}^{\alpha_{k,m}} e^{\beta_{k,m} t_{m,i,j}} $$ -->
- Impedance weight: $$ w_{i,j}^{k,m} = f_{k,m}(t_{m,i,j}) = \exp\left(\alpha_{k,m}\cdot t_{m,i,j}^{\beta_{k,m}}\right) $$
- **Contour** (cumulative opportunities): $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} o_j^k $$
- **Gravity**: $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} o_j^k\cdot w_{i,j}^{k,m} $$
- **E2SFCA** (Enhanced 2-Step Floating Catchment Area): $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} \frac{o_j^k}{\sum_{l\in C_j(t)} p_j^k\cdot w_{l,j}^{k,m}} $$
- **M2SFCA** (Modified 2-Step Floating Catchment Area): $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} \frac{o_j^k\cdot w_{i,j}^{k,m}}{\sum_{l\in C_j(t)} p_j^k\cdot w_{l,j}^{k,m}} $$
- **XMFCA** (Multi-Modal Floating Catchment Area): $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} \frac{o_j^k\cdot w_{i,j}^{k,m}}{\sum_{l\in C_j(t)} p_j^k\cdot \max_m (w_{l,j}^{k,m})} $$

In [6]:
thresholds = (15, 30, 45, 60, 90) # travel time thresholds (minutes)

In [7]:
def get_access(city, times=None, zones=zones, opport=opport,
               params=imped_params, thresholds=thresholds):
    od = times.copy() if isinstance(times, Pdf) else get_times(city)
    opport = (opport.rename(columns=D(geoid='trg'))
              .drop(columns='level').astype(D(trg=od.trg.dtype)))
    opport['purpose'] = opport.kind.str.split(':').str[0].astype(CAT)
    od = od.merge(opport, on='trg')
    params = params.astype(D(α=F32, β=F32, mode=od['mode'].dtype,
                             purpose=od.purpose.dtype))
    od = od.merge(params, on=['mode', 'purpose'])
    zones = (filt(zones, city=city).drop(columns='level')
             .rename(columns=D(geoid='src')).astype(D(src=od.src.dtype)))
    od = od.merge(zones, on='src')
    iswork = (od.pop('purpose') == 'Jobs').astype(F32)
    od.popu = (iswork * od.pop('labor') + (1 - iswork) * od.popu).astype(F32)
    od['weight'] = np.exp(od.pop('α') * od['time'] ** od.pop('β'))
    od['demand'] = (od.popu * od.weight).astype(F32)
    od['supply'] = (od.opport * od.weight).astype(F32)
    wmax = (od.groupby(['src', 'trg'], observed=1)
            .weight.max().fillna(0).rename('max_wt'))
    od = od.merge(wmax, on=['src', 'trg'])
    access = []
    for thresh in thresholds:
        df = od[od.time <= thresh].copy()
        df['max_demand'] = (df.popu * df.max_wt).astype(F32)
        grp_cols = ['trg', 'level', 'kind', 'mode']
        demand = (df.groupby(grp_cols, observed=1)
                  [['demand', 'max_demand']].sum().astype(F32).reset_index()
                  .astype({x: df[x].dtype for x in grp_cols}))
        df = df.drop(columns=['demand', 'max_demand'])
        df = df.merge(demand, on=grp_cols)
        df['E2SFCA'] = df.opport / df.demand
        df['M2SFCA'] = df.supply / df.demand
        df['XMFCA'] = df.supply / df.max_demand
        df = (df.groupby(['src', 'level', 'kind', 'mode'], observed=1)
              [['opport', 'supply', 'E2SFCA', 'M2SFCA', 'XMFCA']]
              .sum().astype(F32).reset_index().rename(columns=D(
                  src='geoid', opport='Contour', supply='Gravity')))
        df = df.melt(['geoid', 'level', 'kind', 'mode'],
                     var_name='measure', value_name='access')
        df = df[(df.access > 0) & (df.access != np.inf)]
        access.append(df.assign(thresh=str(thresh)))
    cols = D(measure=CAT, kind=CAT, mode=CAT, thresh=CAT, 
             level=CAT, geoid=CAT, access=F32)
    access = pd.concat(access).reset_index(drop=1).astype(cols)[list(cols)]
    return access

# xs = get_access('Buffalo').disp() # 25s

In [8]:
def compute_access(city, overwrite=True):
    fname = city.lower().replace(' ', '-').replace('.', '')
    outpath = Path(f'data/access/{fname}.parquet')
    if outpath.exists() and not overwrite:
        return pd.read_parquet(outpath)
    t = dt.datetime.now()
    print('Starting:', city)
    xs = get_access(city)
    print(f'Done: {city} ({dt.datetime.now() - t})')
    return xs

cities = sorted(list({f.split('_')[0] for f in os.listdir('data/ttm')}))
# %time pqdm(cities, compute_access, n_jobs=10); # 28m59s
# %time compute_access('chicago') # 13m34s
# %time compute_access('los-angeles') # 28m53s
# %time compute_access('new-york') # 1h12m21s (2 batches)

## Export data
Convert the access data for each city and level into wide format to save space in CSV format. This involves converting all combinations of (metric, opportunity kind, mode, threshold) into one column each so that each row denotes a unique zone.

In [9]:
measures = D(Contour='Co', Gravity='Gr', E2SFCA='E2', M2SFCA='M2', XMFCA='XM')
modes = D(Drive='Dr', Bike='Bi', Walk='Wa')
kinds = {
    'Jobs: All':            'jobTot',
    'Jobs: Low edu':        'jobEdu',
    'Jobs: Low wage':       'jobWag',
    'Jobs: POC':            'jobPOC',
    'POIs: Total':          'poiTot',
    'POIs: Education':      'poiEdu',
    'POIs: Groceries':      'poiGro',
    'POIs: Medical':        'poiMed',
    'POIs: Social Support': 'poiSoc',    
}
fields = Pdf(list(it.product(
    measures, kinds, modes, [str(t) for t in thresholds]
)), columns=['measure', 'kind', 'mode', 'thresh'])
fields.insert(0, 'field', fields.measure.map(measures) + '_' +
              fields.kind.map(kinds) + '_' +
              fields['mode'].map(modes) + '_' + fields.thresh)
fields.disp(None).to_csv('data/access_columns.csv', index=False)

675 rows x 5 cols; Memory: 0.2 MiB


Unnamed: 0,field,measure,kind,mode,thresh
,<object>,<object>,<object>,<object>,<object>
0,Co_jobTot_Dr_15,Contour,Jobs: All,Drive,15
1,Co_jobTot_Dr_30,Contour,Jobs: All,Drive,30
2,Co_jobTot_Dr_45,Contour,Jobs: All,Drive,45
3,Co_jobTot_Dr_60,Contour,Jobs: All,Drive,60
...,...,...,...,...,...
670,XM_poiSoc_Wa_15,XMFCA,POIs: Social Support,Walk,15
671,XM_poiSoc_Wa_30,XMFCA,POIs: Social Support,Walk,30
672,XM_poiSoc_Wa_45,XMFCA,POIs: Social Support,Walk,45
673,XM_poiSoc_Wa_60,XMFCA,POIs: Social Support,Walk,60


In [10]:
# # total 4m14s
# pbar = tqdm(os.listdir('data/access'))
# for file in pbar:
#     city = file.removesuffix('.parquet')
#     pbar.set_description(city)
#     xs = pd.read_parquet('data/access/' + file)
#     xs = xs[xs.measure != '2SFCA']
#     xs.rename(columns=D(scale='level'), errors='ignore', inplace=1)
#     for level, df in xs.groupby('level', observed=1):
#         df = df.merge(fields, on=['measure', 'kind', 'mode', 'thresh'])
#         df = (df.pivot_table('access', 'geoid', 'field', observed=0)
#               .fillna(0).astype(F32))
#         df = zones[['geoid', 'lon', 'lat']].merge(df, on='geoid')
#         outdir = mkdir(f'data/export/{city}')
#         df.to_csv(outdir / f'{city}_{level.lower()}.csv', index=False)