In [1]:
import sys
import math
from pathlib import Path


import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import numpy as np

DIR = Path('..')
sys.path.append(str(DIR))

import arcgdfconvertor as agc

DATA_DIR = DIR/'data/'
OUT_DIR = DIR/'output/'


%load_ext autoreload
%autoreload 2

In [30]:
cities = [
    'Auckland',
    'Brisbane',
    'Perth',
    'Vancouver',
]

In [3]:
access_pop_percentiles = list()
transit_deprived_dict = dict()
for city in cities:
    #Read CSVs Dist_decay
    access_df = pd.read_csv(
        DATA_DIR/'OD_Summaries'/"{0}_Hex_dist_decay_summary.csv".format(city),
        index_col=0, dtype = {'from': str})
    census_gdf = agc.fc_to_gdf(
        str(DATA_DIR/'Cities.gdb'/'{0}_Hex_Polygon'.format(city)))

    cond = (census_gdf['Median_Income'] > 3) & (census_gdf['POP'] > 3) & (census_gdf['POW'] > 3)
    census_gdf = census_gdf[cond].copy()

    access_gdf = census_gdf.merge(
        access_df, 
        left_on = 'Unique_ID', 
        right_on = 'from', 
        how = 'inner')

    total_jobs = access_gdf['POW'].sum()
    total_pop = access_gdf['POP'].sum()
    total_hex = access_gdf.shape[0]

    #Calculate transit deprivation
    transit_deprived_list = list()
    for i in range(20, 65, 5):
        access_gdf['dist_decay_{0}'.format(i)] = access_gdf['dist_decay_{0}'.format(i)]/total_jobs*100
        
        transit_deprived = access_gdf[access_gdf['dist_decay_{0}'.format(i)] <= 1].shape[0]
        transit_deprived_percent = transit_deprived/total_hex*100
        transit_deprived_list.append(transit_deprived_percent)
    
    transit_deprived_dict[city] = transit_deprived_list
    #agc.gdf_to_fc(access_gdf, str(r'D:\New Folder (7)\output.gdb\WGS84\{0}'.format(city))) #Check the Brisbane glitch!

    #Calculate minimum transit accessibility
    for i in range(20, 65, 5):
        access_gdf['bins'] = access_gdf['dist_decay_{0}'.format(i)].astype(int)
        s = access_gdf.groupby('bins')['POP'].sum()
        df = pd.DataFrame(s).reset_index()
        df['Percentage'] = df['POP']/total_pop
        #df = df.sort_values(by = 'bins', ascending = False)
        df['cumsum'] = df.Percentage.cumsum()
        df['city'] = city
        df['travel_time'] = i
        access_pop_percentiles.append(df)
min_access_df = pd.concat(access_pop_percentiles)
deprivation_df = pd.DataFrame(transit_deprived_dict)
