# 1. Setup

In [1]:
from pqdm.processes import pqdm
from scipy.spatial import KDTree

from utils import *

# 2. Compute access

## 2.1. Load data

### 2.1.1. Zone boundaries

In [2]:
zoneMSA = pd.read_parquet(DATA / 'zones/zones_2020.parquet', filters=[
    ('top_msa', '==', True)], columns=['geoid', 'scale', 'state', 'msa', 'aland']).disp()

144,489 rows x 5 cols; Memory: 10.4 MiB


Unnamed: 0,geoid,scale,state,msa,aland
,<object>,<category>,<category>,<category>,<float32>
0.0,04013,County,AZ,Phoenix,9201.749023


In [3]:
zoneIN20 = pd.read_parquet(DATA / 'zones/zones_2020.parquet', filters=[
    ('state', '==', 'IN')], columns=['geoid', 'scale', 'county', 'aland']).disp()

7,078 rows x 4 cols; Memory: 0.7 MiB


Unnamed: 0,geoid,scale,county,aland
,<object>,<category>,<category>,<float32>
0.0,18001,County,Adams,338.924896


### 2.1.2. Population

In [4]:
popu = pd.read_parquet(DATA / 'ses/acs/acs_2021.parquet',
                       columns=['geoid', 'popu', 'labor']).disp()

322,202 rows x 3 cols; Memory: 26.0 MiB


Unnamed: 0,geoid,popu,labor
,<object>,<float64>,<float64>
0.0,01001,58239.0,27550.0


### 2.1.3. Opportunities

In [5]:
jobs = (pd.read_parquet(DATA / 'opport/job_totals_2021.parquet')
        .rename(columns=D(nJobs='opport')).assign(purpose='Work'))

In [6]:
pois = (pd.read_parquet(DATA / 'opport/poi_totals_2020.parquet')
        .rename(columns=D(nPois='opport')).assign(purpose='Non-work'))

In [7]:
opport = pd.concat([jobs, pois]).astype(
    D(geoid=CAT, scale=CAT, purpose=CAT, kind=CAT)).disp()

2,253,050 rows x 5 cols; Memory: 70.1 MiB


Unnamed: 0,geoid,scale,kind,opport,purpose
,<category>,<category>,<category>,<int32>,<category>
0.0,01001,County,All,12318,Work


In [8]:
opport.groupby(['purpose', 'kind']).size().pipe(lambda x: x[x > 0])

purpose   kind          
Non-work  Education         178400
          Groceries         153054
          Medical           178731
          Social Support    149092
          Total             317536
Work      All               322685
          Low edu           321444
          Low wage          319231
          POC               312877
dtype: int64

### 2.1.4. Impedance function

In [9]:
impedParams = pd.read_csv(DATA / 'impedance/fitted-params-2.csv').disp(None)

8 rows x 5 cols; Memory: 0.0 MiB


Unnamed: 0,mode,purpose,alpha,beta,r2
,<object>,<object>,<float64>,<float64>,<float64>
0.0,Drive,Work,-0.00747,1.489207,0.998287
1.0,Drive,Non-work,-0.021925,1.33594,0.995546
2.0,Transit,Work,-0.000169,2.092834,0.999156
3.0,Transit,Non-work,-0.002089,1.605494,0.998025
4.0,Walk,Work,-0.05766,1.062567,0.998082
5.0,Walk,Non-work,-0.062539,0.986936,0.998743
6.0,Bike,Work,-0.007948,1.581497,0.994165
7.0,Bike,Non-work,-0.029389,1.133436,0.992023


## 2.2. Travel times

### 2.2.1. Indiana (GDM)

In [10]:
ttIN = pd.read_parquet(DATA / 'od_cost/gdm_2020.parquet',
                       filters=[('time', '<=', 90 * 60)],
                       columns='scale mode src trg time'.split())
ttIN.time /= 60 # convert to minutes
sameZone = (ttIN[['scale', 'mode', 'src']].drop_duplicates()
            .assign(trg=lambda df: df.src, time=0.))
ttIN = pd.concat([ttIN, sameZone]).astype(
    D(scale=CAT, mode=CAT, src=CAT, trg=CAT, time=F32)).disp()

8,578,746 rows x 5 cols; Memory: 148.7 MiB


Unnamed: 0,scale,mode,src,trg,time
,<category>,<category>,<category>,<category>,<float32>
0.0,Tract,Transit,18105001304,18105001301,54.466667


### 2.2.2. Top 50 MSAs (OSRM)

In [11]:
def get_tt_msa(msa):
    res = []
    cols = D(src_geoid='src', trg_geoid='trg', duration='time')
    for mode in ['drive', 'bike', 'walk']:
        fpath = DATA / f'od_cost/osrm/msa/{msa}/bg_{mode}_2020.parquet'
        df = pd.read_parquet(fpath, columns=list(cols))
        df.duration /= 60 # convert to minutes
        res.append(df.rename(columns=cols).assign(mode=mode.title()))
    df = pd.concat(res).reset_index(drop=1)#.astype(D(mode=CAT))
    res = [df.assign(scale='BG')]
    for scale, nChar in D(County=5, Tract=11).items():
        d = df.assign(src=df.src.str[:nChar], trg=df.trg.str[:nChar])
        d = d.groupby(['src', 'trg', 'mode'])['time'].agg(np.median)
        res.append(d.reset_index().assign(scale=scale))
    df = pd.concat(res).reset_index(drop=1)
    catCols = ['scale', 'mode', 'src', 'trg']
    df = df.astype({x: CAT for x in catCols})[catCols + ['time']]
    return df

df = get_tt_msa('austin').disp()

2,537,638 rows x 5 cols; Memory: 24.6 MiB


Unnamed: 0,scale,mode,src,trg,time
,<category>,<category>,<category>,<category>,<float32>
0.0,BG,Drive,480219501011,480219501011,0.0


## 2.3. Compute accessibility

- Impedance weight: $$ w_{i,j}^{k,m} = f_{k,m}(c_{m,i,j}) = c_{m,i,j}^{\alpha_{k,m}} e^{\beta_{k,m} c_{m,i,j}} $$
- Contour: $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} o_j^k $$
- Gravity: $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} o_j^k\cdot w_{i,j}^{k,m} $$
- Competition: $$ A_i^{k,m,t} = \sum_{j\in C_i(t)} \frac{o_j^k\cdot w_{i,j}^{k,m}}{\sum_{l\in C_j(t)} p_j^k\cdot w_{i,j}^{k,m}} $$

In [12]:
thresholds = (15, 30, 45, 60, 90) # travel time thresholds (minutes)

In [13]:
def get_access(fname, times, opport=opport, popu=popu, params=impedParams,
               thresholds=thresholds, overwrite=False):
    outpath = DATA / f'access/{fname}.parquet'
    if (df := file_check(outpath, overwrite)) is not None: return df
    res = []
    times = times.astype(D(src=str, trg=str))
    opport_grps = opport.groupby(['kind', 'purpose'])
    for thresh in tqdm(thresholds):
        tt = times[times.time <= thresh].copy()
        for (scale, mode), od_ in tt.groupby(['scale', 'mode']):
            for (kind, purp), df in opport_grps:
                df = df[df['scale'] == scale]
                r = params.query(f'mode=="{mode}" & purpose=="{purp}"').iloc[0]
                od = od_[['src', 'trg', 'time']].copy()
                od['weight'] = np.exp(r.alpha * od.pop('time') ** r.beta)
                od = od.merge(df.rename(columns=D(geoid='trg'))[['trg', 'opport']])
                od['numer'] = od['opport'] * od['weight']
                xs = (od.groupby('src')[['opport', 'numer']].sum()
                      .rename(columns=D(opport='Contour', numer='Gravity'))
                      .astype(F32).rename_axis('geoid').reset_index()
                      .melt('geoid', var_name='measure', value_name='access'))
                fca = od.merge(popu, left_on='src', right_on='geoid')
                dmd_col = 'labor' if purp == 'Work' else 'popu'
                fca['denom'] = fca[dmd_col] * fca['weight']
                denom = fca.groupby('trg')['denom'].sum().reset_index()
                od = od.merge(denom, on='trg')
                od['ratio'] = od['numer'] / od['denom']
                fca = (od.groupby('src')['ratio'].sum().rename('access')
                       .astype(F32).rename_axis('geoid')
                       .reset_index().assign(measure='Competition'))
                res.append(pd.concat([xs, fca]).assign(
                    scale=scale, mode=mode, purpose=purp,
                    kind=kind, thresh=str(thresh)))
    xs = pd.concat(res).reset_index(drop=1)
    catCols = 'measure purpose kind mode thresh scale geoid'.split()
    xs = xs.astype({x: CAT for x in catCols} | D(access=F32))
    xs = xs[catCols + ['access']]
    xs.to_parquet(mkfile(outpath))
    return xs

# xs = get_access('base/msa/birmingham', overwrite=0); xs # 58s

### 2.3.1. Indiana (GDM)

In [14]:
xsIN = get_access('base/indiana', ttIN, overwrite=0).disp() # 3m18s

3,496,399 rows x 8 cols; Memory: 40.7 MiB


Unnamed: 0,measure,purpose,kind,mode,thresh,scale,geoid,access
,<category>,<category>,<category>,<category>,<category>,<category>,<category>,<float32>
0.0,Contour,Work,All,Bike,15,BG,180010301001,90.0


### 2.3.2. Top MSAs (OSRM)

In [15]:
def get_access_msa(msa):
    times = get_tt_msa(msa)
    return get_access(f'base/msa/{msa}', times)

msas = os.listdir(f'{DATA}/od_cost/osrm/msa')
# %time pqdm(msas, get_access_msa, n_jobs=25); # 2h30m2s

## Export for publication

In [16]:
def export_access_msa1(path):
    df = pd.read_parquet(path, columns=[
        'measure', 'kind', 'mode', 'thresh', 'geoid', 'access'])
    df.measure = df.measure.astype(str).map(
        D(Contour='C', Gravity='G', Competition='F'))
    df.measure += (df['mode'].str[0] + df['thresh'].astype(str))
    df.kind = df.kind.astype(str).map({v: k for k, v in D(
        J0='All', JE='Low edu', JR='POC', JW='Low wage',
        P0='Total', PE='Education', PG='Groceries',
        PM='Medical', PS='Social Support').items()})
    combs = [x + y for x, y in it.product(
        'CGF', 'D15 D30 D45 D60 W15 W30 B15 B30'.split())]
    df = df[df.measure.isin(combs)]
    df.measure += df.kind
    df = df.pivot_table('access', 'geoid', 'measure')
    df = df.fillna(0).astype(F32)
    return df

# x = export_access_msa1(DATA / 'access/base/msa/austin.parquet'); x

In [17]:
# 39s
# paths = list((DATA / 'access/base/msa').glob('*.parquet'))
# xsMSApub = pd.concat(pqdm(paths, export_access_msa1, n_jobs=40))
# xsMSApub = (zoneMSA.drop(columns='aland').merge(xsMSApub, on='geoid')
#             .sort_values(['scale', 'geoid']).reset_index(drop=1)
#             .rename(columns=D(geoid='fips', msa='city'))
#             .rename(columns=str.upper)).disp()
# xsMSApub.to_csv(DATA / 'access/export_us_msa_access.csv', index=False)
xsMSApub = pd.read_csv(DATA / 'access/export_us_msa_access.csv').disp()

140,890 rows x 220 cols; Memory: 258.0 MiB


Unnamed: 0,FIPS,SCALE,STATE,CITY,CB15J0,CB15JE,CB15JR,CB15JW,CB15P0,CB15PE,...,GW15PS,GW30J0,GW30JE,GW30JR,GW30JW,GW30P0,GW30PE,GW30PG,GW30PM,GW30PS
,<int64>,<object>,<object>,<object>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,...,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
0.0,40130101022,BG,AZ,Phoenix,203.0,72.0,8.0,21.0,6.0,0.0,...,0.0,203.0,72.0,8.0,21.0,6.0,0.0,0.0,0.0,0.0


## 2.4. Maximum accessibility

In [18]:
maxSpeeds = {k: v * MPH2MPS for k, v in D(
    Drive=70, Walk=3.1, Bike=16, Transit=20).items()}
maxTime = 60 * 60 # maximum allowed time (seconds)

In [19]:
def get_max_tt(query, maxSpeeds=maxSpeeds, maxTime=maxTime):
    filters = [(k, '==', v) for k, v in query.items()]
    zones = gpd.read_parquet(
        DATA / 'zones/zones_2020.parquet',
        filters=filters, columns=['geoid', 'scale', 'geometry'])
    xy = zones.centroid.get_coordinates() # columns=['x','y']
    allZones = pd.concat([zones[['geoid', 'scale']], xy], axis=1)
    res = []
    for scale, zones in allZones.groupby('scale'):
        zones = zones.reset_index(drop=1)
        tree = KDTree(zones[['x', 'y']])
        for mode, maxSpeed in maxSpeeds.items():
            maxDist = maxTime * maxSpeed
            src, trg = [zones.loc[idx].reset_index(drop=1) for idx in 
                        tree.query_pairs(maxDist, output_type='ndarray').T]
            dist = ((src.x - trg.x) ** 2 + (src.y - trg.y) ** 2) ** 0.5
            time = dist / maxSpeed
            src, trg = src.geoid.values, trg.geoid.values
            nodes = list(set(src) | set(trg))
            times = pd.concat([Pdf(D(src=nodes, trg=nodes, time=0)),
                               Pdf(D(src=src, trg=trg, time=time)),
                               Pdf(D(src=trg, trg=src, time=time))])
            res.append(times.assign(scale=scale, mode=mode))
    times = pd.concat(res).reset_index(drop=1).astype(D(time=F32))
    times.time = (times.time / 60).astype(F32) # convert to minutes
    catCols = ['scale', 'mode', 'src', 'trg']
    times = times[catCols + ['time']].astype({x: CAT for x in catCols})
    return times

ttMaxIN = get_max_tt(D(state='IN')).disp() # 19s

9,821,504 rows x 5 cols; Memory: 95.1 MiB


Unnamed: 0,scale,mode,src,trg,time
,<category>,<category>,<category>,<category>,<float32>
0.0,BG,Drive,181270511021,181270511021,0.0


### 2.4.1. Indiana

In [20]:
xsMaxIN = get_access('max/indiana', ttMaxIN, overwrite=0).disp() # 6m19s

3,496,399 rows x 8 cols; Memory: 40.7 MiB


Unnamed: 0,measure,purpose,kind,mode,thresh,scale,geoid,access
,<category>,<category>,<category>,<category>,<category>,<category>,<category>,<float32>
0.0,Contour,Work,All,Bike,15,BG,180010301001,90.0


### 2.4.2. Top MSAs

In [21]:
# def get_max_access_msa(msa):
#     name = msa.replace('-', ' ').title()
#     times = get_max_tt(D(msa=name))
#     return get_access(f'max/msa/{msa}', times)

# msas = tqdm(sorted(os.listdir(f'{DATA}/od_cost/osrm/msa')))
# # x = get_max_access_msa('austin'); x # 2m39s
# print(f'Started: {dt.datetime.now()}')
# for msa in tqdm(msas):
#     msas.set_description(msa)
#     try:
#         get_max_access_msa(msa)
#     except Exception as e:
#         print(f'ERROR in {msa}: {e}')
# print(f'Ended: {dt.datetime.now()}')

## 2.5. Derived access

In [22]:
def get_derived_access(fname, overwrite=False):
    outpath = DATA / f'access/derived/{fname}.parquet'
    if (df := file_check(outpath, overwrite)) is not None: return df
    xs = pd.read_parquet(DATA / f'access/base/{fname}.parquet')
    xs.kind = xs.kind.astype(str) + xs.pop('purpose').astype(str).map(
        {'Work': ' jobs', 'Non-work': ' POIs'})
    grpCols = ['measure', 'mode', 'thresh', 'scale', 'geoid']
    xs = xs.pivot_table('access', grpCols, 'kind')
    xs = xs.fillna(0).reset_index().rename_axis(None, axis=1)
    xs = xs.rename(columns={'All jobs': 'Jobs', 'Total POIs': 'POIs'})
    xs['% Low-wage Jobs'] = xs.pop('Low wage jobs') / xs['Jobs'] * 100
    xs['% Jobs for Low-edu'] = xs.pop('Low edu jobs') / xs['Jobs'] * 100
    xs['% Jobs for POC'] = xs.pop('POC jobs') / xs['Jobs'] * 100
    xs['% Education Facilities'] = xs.pop('Education POIs') / xs['POIs'] * 100
    xs['% Grocery Stores'] = xs.pop('Groceries POIs') / xs['POIs'] * 100
    xs['% Medical Facilities'] = xs.pop('Medical POIs') / xs['POIs'] * 100
    xs['% Social Support'] = xs.pop('Social Support POIs') / xs['POIs'] * 100
    xs = xs.melt(grpCols, var_name='kind', value_name='access')
    xs = xs.astype({x: CAT for x in grpCols} | D(kind=CAT, access=F32))
    xs.to_parquet(mkfile(outpath))
    return xs

xsIN2 = get_derived_access('indiana').disp() # 7s

3,570,660 rows x 7 cols; Memory: 38.2 MiB


Unnamed: 0,measure,mode,thresh,scale,geoid,kind,access
,<category>,<category>,<category>,<category>,<category>,<category>,<float32>
0.0,Competition,Bike,15,BG,180010301001,Jobs,0.165441
