## Imports and setup

In [3]:
# Grab data from ACS with cenpy (don't need to run every time!)
# Variables:
#    - total population (B01003_001E)
#    - total white (B02008_001E)
#    - total black (B02009_001E)
#    - total asian (B02011_001E)
#    - total hispanic (B03001_001E)
#    - household income (B19001_001E)
#    - median house price (B25077_001E)
#    - total tenure (B25032_001E)
#    - total owner occupied (B25032_002E)
#    - total renter occupied (B25032_007E)
#    - median contract rent (B25058_001E)
#    - total in labor force (B23025_002E)
#    - total employed (civilian) (B23025_004E)
#    - total unemployed (civilian) (B23025_005E)

countydata = gpd.GeoDataFrame()

states = pd.read_csv('../data/states.csv')['State'].values
for state in tqdm(states):
    countydata = countydata.append(acs.from_state(state=state, variables=variables, level='county'))

countydata.to_csv('../data/ACS_countydata.csv')

100%|██████████| 51/51 [19:43<00:00, 23.21s/it]


In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from scipy.linalg import norm

from tqdm import tqdm
from cenpy.products import ACS
from spint.gravity import Gravity, Attraction, Production

acs = ACS()
LA_FIPS = '06037'  # LA County FIPS code
variables = ['B01003_001E', 'B02008_001E', 'B02009_001E', 'B02011_001E', 'B03001_001E', \
    'B19001_001E', 'B25077_001E', 'B25032_001E', 'B25032_002E', 'B25032_007E', \
    'B25058_001E', 'B23025_002E', 'B23025_004E', 'B23025_005E']

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

## Load data

In [None]:
# Flows
inflows_raw = pd.read_csv('../data/LACounty_ACS_2014_2018_All_IN.csv', usecols=['State/County FIPS', 'Total', 'Margin of Error (+/-)']).replace("'", "", regex=True)
outflows_raw = pd.read_csv('../data/LACounty_ACS_2014_2018_All_OUT.csv', usecols=['State/County FIPS', 'Total', 'Margin of Error (+/-)']).replace("'", "", regex=True)
outflows_raw.rename(columns={'State/County FIPS' : 'destFIPS', 'Total' : 'total_out', 'Margin of Error (+/-)' : 'outMOE'}, inplace=True)
inflows_raw.rename(columns={'State/County FIPS' : 'originFIPS', 'Total' : 'total_in', 'Margin of Error (+/-)' : 'inMOE'}, inplace=True)

In [None]:
# County-level demographic data (and convert covariate columns)
countydata = gpd.read_file('../data/ACS_countydata.csv', GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO").replace('', 0)
countydata['B00002_001E'] = countydata['B00002_001E'].astype(float)
countydata['B19001_001E'] = countydata['B19001_001E'].astype(float)
countydata['B25076_001E'] = countydata['B25076_001E'].astype(float)
countydata['B25077_001E'] = countydata['B25077_001E'].astype(float)
countydata['B25078_001E'] = countydata['B25078_001E'].astype(float)

In [None]:
# Ping ACS for data for LA county
la_data = acs.from_county("Los Angeles County, CA", variables=['B19001_001E', 'B25076_001E', 'B25077_001E', 'B25078_001E', 'B00002_001E'], level='county')

## Rearrage data to prep for analysis

In [None]:
# Merge all the data into one big dataframe to make it simpler
data = pd.merge(pd.merge(countydata, outflows_raw, how='inner', left_on='GEOID', right_on='destFIPS'), inflows_raw, how='inner', left_on='GEOID', right_on='originFIPS').set_crs(epsg=3395)
la_covars = np.tile(la_data[variables].values, (data.shape[0], 1))  # repeat the data so that we account for the single location properly

In [None]:
# costs are all distances between LA and the out or in destination
coords = np.hstack((data.centroid.x.values.reshape(-1, 1), data.centroid.y.values.reshape(-1, 1)))
la_coords = np.array([la_data.centroid.x[0], la_data.centroid.y[0]])
cost = norm(la_coords - coords, axis=1)

## Calibrate model 

In [None]:
model = Gravity(flows=data['total_out'].values, o_vars=la_covars, d_vars=data[variables].values, cost=cost, cost_func='exp', constant=False)

In [None]:
paramnames = [*[x + '_o' for x in variables], *[x + '_d' for x in variables], 'distance']
pd.DataFrame(data={'paramname' : paramnames, 'paramval' : model.params, 'SE' : model.std_err, 'tvalue' : model.tvalues, 'pvalue' : model.pvalues})  # intercept, origin attrs, dest attrs, distance

In [None]:
model.SRMSE

In [None]:
out_model = Production(flows=data['total_out'].values, origins=data['originFIPS'].values, d_vars=data[variables].values, cost=cost, cost_func='exp')
in_model = Attraction(flows=data['total_in'].values, destinations=data['destFIPS'].values, o_vars=data[variables].values, cost=cost, cost_func='exp')