# 1. Setup

In [None]:
from pqdm.processes import pqdm
from scipy.spatial import KDTree

from utils import *

: 

## 1.1. Parameters

In [None]:
# travel time thresholds (in minutes)
thresholds = (15, 30, 45, 60, 90)

: 

In [None]:
# max. hypothetical speeds in mi/h
max_speeds = D(drive=60, bike=16, walk=4)

: 

# 2. Prepare data

## 2.1. General

### 2.1.1. States & counties

In [None]:
state_list = (pd.read_csv(DATA / 'zones/state_codes.csv')
              .sort_values('fips').head(51).pipe(lambda df: df[
                  ~df['name'].isin(['Alaska', 'Hawaii'])])).disp()

: 

In [None]:
us_counties = (gpd.read_parquet(DATA / 'zones/us/county_trim.parquet')
               .rename(columns=D(geoid='cnty', name='county'))
               [['cnty', 'county', 'geometry']].set_crs(CRS_M)).disp()

: 

In [None]:
us_states = gpd.read_parquet(DATA / 'zones/us/state_trim.parquet').set_crs(CRS_M).disp(0)

: 

### 2.1.2. Impedance function

In [None]:
df = pd.read_csv(DATA / 'impedance/fitted-params.csv')
df = df.query('mode != "Transit" & measure == "Duration" & parameter != "r2"')
df = df.pivot_table('value', ['mode', 'purpose'], 'parameter').reset_index()
df['mode'] = df['mode'].str.lower()
df['purpose'] = (df['purpose'].str.lower()
                 .str.replace('social/recreational', 'leisure')
                 .str.replace('other services', 'services'))
imped_params = df.rename_axis(None, axis=1).disp()

: 

### 2.1.3. Opportunities

In [None]:
opport = pd.read_parquet(DATA / 'opport/opportunities.parquet').disp()

: 

In [None]:
opport.groupby(['kind','is_work']).size()

: 

In [None]:
def get_access(level, region, mode, thresh, year=2020, max_access=False,
               opport=opport, max_speeds=max_speeds, params=imped_params):
    cat_cols = 'region mode thresh purpose kind geoid'.split()
    opport = (opport.groupby(['kind', 'is_work', 'geoid']).sum()
              .reset_index().astype(D(kind=str)).query('opport > 0'))
    try:
        if max_access:
            zones = gpd.read_parquet(
                DATA / f'zones/{level}_2020.parquet', filters=[
                    ('scale','==','bg'), ('region','==',region)]).to_crs(CRS_M)
            xy = zones.centroid.get_coordinates() # columns=['x','y']
            zones = pd.concat([zones[['geoid', 'geometry']], xy], axis=1)
            max_dist = (thresh * 60) * (max_speeds[mode] * MPH2MPS)
            tree = KDTree(zones[['x', 'y']])
            src, trg = [zones.loc[idx].reset_index(drop=1) for idx in 
                        tree.query_pairs(max_dist, output_type='ndarray').T]
            dist = ((src.x - trg.x) ** 2 + (src.y - trg.y) ** 2) ** 0.5
            time = (dist * M2MI / max_speeds[mode]) * 60 # time in minutes
            src, trg = src.geoid.values, trg.geoid.values
            nodes = list(set(src) | set(trg))
            time = (pd.concat([Pdf(D(src=nodes, trg=nodes, time=0)),
                                Pdf(D(src=src, trg=trg, time=time)),
                                Pdf(D(src=trg, trg=src, time=time))])
                    .astype(D(src=CAT, trg=CAT)))
            time = time.set_index(['src', 'trg'])['time'] / 60
        else:
            tt_file = DATA / f'od_cost/osrm/{level}/{region}/bg_{mode}_{year}.parquet'
            cols = D(src_geoid='src', trg_geoid='trg', duration='time')
            tt = pd.read_parquet(tt_file, columns=cols.keys()).rename(columns=cols)
            tt = tt.set_index(['src', 'trg'])['time'] / 60 # travel time in minutes
            time = tt[tt <= float(thresh)] # travel times for filtered OD pairs
        dfs = []
        for (is_work, kind), df in opport.groupby(['is_work', 'kind']):
            coef = params.query(f'mode=="{mode}"').set_index('purpose')
            row = coef.loc['work' if is_work else kind]
            od = np.exp(-row.alpha * time ** row.beta).rename('weight').reset_index()
            od = od.merge(df, left_on='trg', right_on='geoid')
            od['access'] = od['opport'] * od['weight']
            xs = od.groupby(['src', 'kind'])['access'].sum().reset_index()
            xs = xs.rename(columns=D(src='geoid'))
            xs = xs.assign(kind=kind, purpose='work' if is_work else 'nonwork')
            dfs.append(xs)
        xs = pd.concat(dfs).assign(region=region, mode=mode, thresh=str(thresh))
        xs = xs.astype(D(access=np.float32) | {x: CAT for x in cat_cols})
        xs = xs[cat_cols + ['access']]
    except Exception as e:
        print(type(e), e)
        xs = Pdf([], columns=cat_cols + ['access'])
    return xs

# x = get_access('state', 'arkansas', 'drive', 30).disp()
x = get_access('msa', 'austin', 'drive', 30).disp()
# xmax = get_access('msa', 'austin', 'drive', 60, max_access=True).disp()

: 

## 2.2. Compute accessibility
This only requires total jobs at the destination, not the OD flows.

In [13]:
def get_access_all(level, max_access=False, year=2020, thresholds=thresholds, 
                   njobs=16, overwrite=False):
    max_label = 'max_' if max_access else ''
    outpath = DATA / f'access/{max_label}{level}_{year}.parquet'
    if outpath.exists() and not overwrite:
        return pd.read_parquet(outpath)
    files = sorted(glob(f'{DATA}/od_cost/osrm/{level}/*/bg_*_{year}.parquet'))
    cases = sum([[D(level=level, region=f.split('/')[-2], thresh=t, year=year,
                    mode=f.split('/')[-1].split('_')[1], max_access=max_access)
                  for t in thresholds] for f in files], [])
    np.random.seed(1234)
    np.random.shuffle(cases) # shuffle to hopefully reduce parallel load
    print(len(cases))
    xs = pqdm(cases, get_access, n_jobs=njobs, argument_type='kwargs')
    cat_cols = 'region purpose kind mode thresh geoid'.split()
    xs = pd.concat(xs).reset_index(drop=1).astype(
        {x: CAT for x in cat_cols})[cat_cols + ['access']]
    xs.to_parquet(mkfile(outpath))
    return xs

xs_msa = get_access_all('msa', thresholds=thresholds, overwrite=0).disp() # t=17:55
# xs_max_msa = get_access_all('msa', max_access=True, thresholds=thresholds, overwrite=0).disp(0) # t=27:38
xs_st = get_access_all('state', thresholds=[60], overwrite=0).disp(0) # t=6:17
# xs_max_st = get_access_all('state', max_access=True, thresholds=[60], overwrite=0).disp(0) # t=15:14

12,956,475 rows x 7 cols; Memory: 172.8 MiB


Unnamed: 0,region,purpose,kind,mode,thresh,geoid,access
,<category>,<category>,<category>,<category>,<category>,<category>,<float32>
0.0,seattle,nonwork,leisure,bike,45,530330001011,363.534302


1,663,319 rows x 7 cols; Memory: 44.3 MiB


Unnamed: 0,region,purpose,kind,mode,thresh,geoid,access
,<category>,<category>,<category>,<category>,<category>,<category>,<float32>


## 2.3. Export data

In [14]:
def export_access(xs_st=xs_st, xs_msa=xs_msa, thresh=('30', '60'),
                  kinds=D(total='work', overall='poi',
                          shopping='shop', services='serv'),
                  counties=us_counties, state_codes=state_list,
                  overwrite=False):
    outpath = Path('../us_bg_access_data.csv')
    if outpath.exists() and not overwrite:
        return pd.read_csv(outpath)
    bg = xs_st[['geoid']].drop_duplicates()
    msa = xs_msa[['geoid', 'region']].drop_duplicates()
    msa['msa'] = msa.region.str.replace('-', ' ').str.title()
    bg = bg.merge(msa[['geoid', 'msa']], 'left')
    bg['cnty'], bg['fips'] = bg.geoid.str[:5], bg.geoid.str[:2].astype(int)
    bg = bg.merge(counties[['cnty', 'county']])
    bg = bg.merge(state_codes, on='fips').drop(columns='fips')
    bg = bg.rename(columns=D(code='state', geoid='fips'))
    bg = bg[['fips', 'state', 'county', 'msa']]
    xs = pd.concat([xs_msa.query('mode != "drive"'), xs_st])
    xs = xs.drop(columns=['purpose', 'region'])
    xs = xs[(xs.thresh.isin(thresh)) & (xs.kind.isin(kinds.keys()))]
    xs['purpose'] = xs.pop('kind').map(kinds).astype(str)
    xs = xs.pivot_table('access', 'geoid', ['mode', 'purpose', 'thresh'])
    xs.columns = map('_'.join, xs.columns)
    xs.columns = ['_'.join(x) for x in xs.columns]
    xs = bg.merge(xs, left_on='fips', right_index=True)
    xs = xs.sort_values('fips').reset_index(drop=1)
    xs.to_csv(mkfile(outpath), index=False)
    return xs

xs_export = export_access(overwrite=0).disp() # t=0:09

237,617 rows x 24 cols; Memory: 77.2 MiB


Unnamed: 0,fips,state,county,msa,bike_poi_30,bike_poi_60,bike_serv_30,bike_serv_60,bike_shop_30,bike_shop_60,...,drive_shop_60,drive_work_60,walk_poi_30,walk_poi_60,walk_serv_30,walk_serv_60,walk_shop_30,walk_shop_60,walk_work_30,walk_work_60
,<int64>,<object>,<object>,<object>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,...,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
0.0,10010201001,AL,Autauga,,,,,,,,...,53.220245,58324.05,,,,,,,,
