In [10]:
import sys
import math
from pathlib import Path


import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd

DIR = Path('..')
sys.path.append(str(DIR))

import arcgdfconvertor as agc

DATA_DIR = DIR/'data/'
OUT_DIR = DIR/'output/'


%load_ext autoreload
%autoreload 2

# Dist Decay Hex

In [51]:
model_tables = list()
cities = [
    'Auckland',
    'Brisbane',
    'Perth',
    'Vancouver',
]
for city in cities:
    #Read CSVs Dist_decay
    access_df = pd.read_csv(
        DATA_DIR/'OD_Summaries'/"{0}_Hex_dist_decay_summary.csv".format(city),
        index_col=0, dtype = {'from': str})
    census_gdf = agc.fc_to_gdf(
        str(DATA_DIR/'Cities.gdb'/'{0}_Hex_Polygon'.format(city)))

    cond = (census_gdf['Median_Income'] > 3) & (census_gdf['POP'] > 3) & (census_gdf['POW'] > 3)
    census_gdf = census_gdf[cond].copy()
    
    census_gdf['log_income'] = census_gdf['Median_Income'].map(math.log)
    census_gdf['log_pop'] = (census_gdf['POP']/2).map(math.log)
    census_gdf['log_job'] = (census_gdf['POW']/2).map(math.log)
    
    access_gdf = census_gdf.merge(
        access_df, 
        left_on = 'Unique_ID', 
        right_on = 'from', 
        how = 'outer')
    
    reg_model = dict()

    for i in range(20, 65, 5):

        #log(A_i )=β_0+β_1 log(I_i )+β_2 log(P_i )+β_3 log(J_i )+ε_i

        access_gdf['log_access'] = access_gdf['dist_decay_{0}'.format(i)].map(math.log)
        reg = sm.ols(formula="log_access ~ log_income + log_pop + log_job", data=access_gdf).fit()
        reg_model[i]=reg
    
    coeff_list = [reg_model[i].params.to_frame().T for i in range(20, 65, 5)]
    coeff_df = pd.concat(coeff_list).reset_index(drop = True)
    coeff_df['travel_time'] = pd.Series([i for i in range(20, 65, 5)])
    pvalues_list = [reg_model[i].pvalues.to_frame().T for i in range(20, 65, 5)]
    pvalues_df = pd.concat(pvalues_list).reset_index(drop = True)
    model_table = coeff_df.join(pvalues_df, lsuffix='_coef', rsuffix='_pValue')
    model_table['city'] = city
    model_tables.append(model_table)

pd.concat(model_tables).reset_index(drop = True)

Unnamed: 0,Intercept_coef,log_income_coef,log_pop_coef,log_job_coef,travel_time,Intercept_pValue,log_income_pValue,log_pop_pValue,log_job_pValue,city
0,1.396023,0.387489,0.065817,0.464618,20,0.5037329,0.02878084,0.08172161,1.300586e-26,Auckland
1,1.970917,0.40473,0.065162,0.440815,25,0.3377055,0.02038582,0.07995008,4.0968710000000003e-25,Auckland
2,2.552349,0.417308,0.065943,0.409018,30,0.2067268,0.01501084,0.07142251,6.420760000000001e-23,Auckland
3,3.421803,0.402524,0.066707,0.369288,35,0.08130103,0.01557142,0.06013986,1.991008e-20,Auckland
4,4.695728,0.350984,0.064397,0.325258,40,0.0119259,0.02613103,0.05585227,4.180953e-18,Auckland
5,6.235728,0.27376,0.05846,0.280881,45,0.0003246652,0.0601245,0.06023776,3.212915e-16,Auckland
6,7.799451,0.190415,0.050875,0.238049,50,6.995588e-07,0.1447855,0.06847212,9.14348e-15,Auckland
7,9.149729,0.120291,0.043653,0.197867,55,5.150799e-11,0.2910146,0.07336548,1.263648e-13,Auckland
8,10.157894,0.073861,0.037463,0.161629,60,5.615174e-17,0.4462211,0.0711436,9.891302e-13,Auckland
9,1.359334,0.270949,0.180065,0.668488,20,0.1737239,0.0412113,8.718298e-08,5.440689e-67,Brisbane
