In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [None]:
import math
import matplotlib.pyplot as plt
from scipy import stats
from functools import reduce

In [None]:
#load in the OD matrix for POI and bus stops extracted from GIS
fp_dest100 = "OD_allPOI_100.csv"
fp_dest300 = "OD_allPOI_300.csv"
fp_dest500 = "OD_allPOI_500.csv"
fp_dest400 = "OD_allPOI_400.csv"
fp_dest200 = "OD_allPOI_200.csv"
fp_dest600 = "OD_allPOI_600.csv"
fp_trips = "trips.csv"

In [None]:
df_DestBus100 = pd.read_csv(fp_dest100)
df_DestBus300 = pd.read_csv(fp_dest300)
df_DestBus500 = pd.read_csv(fp_dest500)
df_DestBus200 = pd.read_csv(fp_dest200)
df_DestBus400 = pd.read_csv(fp_dest400)
df_DestBus600 = pd.read_csv(fp_dest600)
df_trips = pd.read_csv(fp_trips)

In [None]:
df_DestBus100.dest_busid = df_DestBus100.dest_busid.astype(str)
df_DestBus200.dest_busid = df_DestBus200.dest_busid.astype(str)
df_DestBus300.dest_busid = df_DestBus300.dest_busid.astype(str)
df_DestBus400.dest_busid = df_DestBus400.dest_busid.astype(str)
df_DestBus500.dest_busid = df_DestBus500.dest_busid.astype(str)
df_DestBus600.dest_busid = df_DestBus600.dest_busid.astype(str)
df_trips.dest_busid = df_trips.dest_busid.astype(str)

In [None]:
df_DestBus600.info()

In [None]:
#assign bus stops to their planning area
bus_stop = gpd.read_file('busStop_location_svy.shp')
planning_area = gpd.read_file('planning_area.shp')
busStop_area = gpd.sjoin(bus_stop, planning_area, op='within')
len(busStop_area) == len(bus_stop)  

In [None]:
busStop_area = busStop_area[['dest_busid','PLN_AREA_N']].copy().rename(columns={'PLN_AREA_N':'planning_area'})

In [None]:
#identify bus stop interchanges
interchange = pd.read_csv('interchanges_trips.csv')
interchange = interchange[['dest_busid','Has_Interchange']].copy()
interchange.dest_busid = interchange.dest_busid.astype(str)
busStop_area.dest_busid = busStop_area.dest_busid.astype(str)
busStop_area_xInterchange = pd.merge(busStop_area, interchange, on='dest_busid', how='left')
busStop_area_xInterchange = busStop_area_xInterchange.fillna(0)
busStop_area_xInterchange.Has_Interchange = busStop_area_xInterchange.Has_Interchange.astype(str)

In [None]:
#busStop_area_xInterchange.info()
#busStop_area_xInterchange[busStop_area_xInterchange['Has_Interchange']==1]
len(busStop_area.planning_area.unique())

In [None]:
def return_planning_areaXinterchange():
    return busStop_area_xInterchange

In [None]:
# map weights to different POI categories - for no weight measurement change weights to 1
poi_weight = {'child_care': 0.07, 'daily_needs': 0.27, 'culture': 0.07, 'sports':0.28, 'health_care': 0.17,
       'social' : 0.14}


In [None]:
df_trips.info()

In [None]:
# set a beta value (b) based on f(x) = e^-bx with f(0) = 1, for beta value use f(600) as the max dist
beta = round ((np.log(0.01)/-600), 4)

In [None]:
def decay_distance (beta, distance):
    '''
    Input: takes in a beta value and a dataframe series containing distance values
    Output: uses the exponential decay funtion to return a decayed distance value
    '''
    
    return distance.map(lambda x: math.exp(-beta * x))

In [None]:
def categorize_poi (df, name = ''):
    '''
    returns columns for the different categories of poi and their access_scores with weigth 1
    columns are named with additional suffix plus name input
    input: dataframe df with columns [dest_busid, poi_group, decayed_distance]; string name
    '''
    return df.groupby(['dest_busid','poi_group'])['decayed_distance'].sum().squeeze().unstack().add_suffix('_access_score'+name).reset_index()

In [None]:
def dest_summary_table (beta, df_DestBus, df_trips, out_name):
    
    '''
    Generate the accessibility score table by first calculating the access scores for the bus stops and 
    their POI and then merging with the trips data
    Requires three inputs
    1. A dataframe containing the bus stops (dest_busid) and their distance (Total_Leng) to POIs (poi). 
    2. A trip table dataframe containing the exit bus stops (dest_busid), origin bus stops (orig_busid) and number of Trips per bus stop
    3. An appended string name that is used to identify the different distance buffer. E.g '100' to generate
    a table for 100m buffer
    4. A beta value for the decay distance function
    
    The function returns a dataframe containing access scores per bus stop and their categories, total number of destination trips,
    and number of pois per bus stop and average distance per bus stop
    
    '''

    #rename to show that the length is the raw length from GIS in meters
    df_DestBus.rename(columns={'Total_Leng':'raw_length'}, inplace = True)

    #there was infinity in the value and because of these two zero POI
    #print(df_DestBus[df_DestBus['raw_length']==0])

    #df_DestBus = df_DestBus[df_DestBus['raw_length']!=0].copy()
    
    #map POI weights to the POI categories
    df_DestBus['poi_weights'] = df_DestBus['poi_group'].map(poi_weight)
    
    #apply distance decay for an accessibility score

    df_DestBus['decayed_distance'] = decay_distance(beta, df_DestBus['raw_length'])
    
    #calculating POI access_score
    df_DestBus['poi_access'] = df_DestBus.decayed_distance * df_DestBus.poi_weights
    
    #calculating overall access score for a bus stop for all poi
    dest_access = df_DestBus.groupby('dest_busid')['poi_access'].sum().reset_index().rename(columns={'poi_access':'access_score'})
    
    #calculating access score for a bus stop for different category of poi
    dest_access_cat = categorize_poi (df_DestBus, name = out_name)

    #count number of POIs per bus stop
    dest_poi = df_DestBus.groupby('dest_busid')['poi'].count().reset_index()
    
    #get average length per bus stop
    dest_dist = df_DestBus.groupby('dest_busid')['raw_length'].mean().reset_index().rename(columns={'raw_length':'avg_distance'})
    
    #get planning area and interchange
    planning_area = return_planning_areaXinterchange()
    
    # merge the aggregates which reflects the distinct trips per bus stop using #from functools import reduce
    #dfs = [dest_trips,orig_trips,dest_poi, dest_dist, dest_access]
    dfs = [df_trips,dest_poi, dest_dist, dest_access, dest_access_cat, planning_area]
    dest_summary = reduce(lambda left,right: pd.merge(left,right,on='dest_busid', how='left'), dfs)

    dest_summary.dest_busid = dest_summary.dest_busid.astype(str)
    poi_name = 'poi'+ '_'+ out_name
    avg_dist_name = 'avg_distance'+ '_'+ out_name
    access_name = 'access_score'+ '_'+ out_name
    dest_summary.rename(columns={'poi':poi_name, 'avg_distance':avg_dist_name,
       'access_score':access_name}, inplace=True)
    dest_summary = dest_summary.fillna(0)
    
    return dest_summary

In [None]:
dest100_summary = dest_summary_table (beta, df_DestBus100, df_trips, '100')
dest200_summary = dest_summary_table (beta, df_DestBus200, df_trips, '200')
dest300_summary = dest_summary_table (beta, df_DestBus300, df_trips, '300')
dest400_summary = dest_summary_table (beta, df_DestBus400, df_trips, '400')
dest500_summary = dest_summary_table (beta, df_DestBus500, df_trips, '500')
dest600_summary = dest_summary_table (beta, df_DestBus600, df_trips, '600')

In [None]:
#remove 919 bus stops having no POI within 600m
dest600_summary = dest600_summary[dest600_summary['poi_600']!=0] 
dest600_summary.info()

In [None]:
#merge all the distance buffer access dataframes for better descriptive analysis
dfs = [dest600_summary,dest500_summary,dest400_summary,dest300_summary,dest200_summary,dest100_summary]
dest_summary = reduce(lambda left,right: pd.merge(left,right,on='dest_busid', how='left'), dfs)

In [None]:
dest_summary.columns

In [None]:
dest_summary.columns = ['dest_busid', 'TripsPerDest', 'TripsPerOrig', 'poi_600',
       'avg_distance_600', 'access_score_600', 'child_care_access_score600',
       'culture_access_score600', 'daily_needs_access_score600',
       'health_care_access_score600', 'social_access_score600',
       'sports_access_score600', 'planning_area', 'Has_Interchange',
       'TripsPerDest_y', 'TripsPerOrig_y', 'poi_500', 'avg_distance_500',
       'access_score_500', 'child_care_access_score500',
       'culture_access_score500', 'daily_needs_access_score500',
       'health_care_access_score500', 'social_access_score500',
       'sports_access_score500', 'planning_area_y', 'Has_Interchange_y',
       'TripsPerDest_x', 'TripsPerOrig_x', 'poi_400', 'avg_distance_400',
       'access_score_400', 'child_care_access_score400',
       'culture_access_score400', 'daily_needs_access_score400',
       'health_care_access_score400', 'social_access_score400',
       'sports_access_score400', 'planning_area_x', 'Has_Interchange_x',
       'TripsPerDest_y', 'TripsPerOrig_y', 'poi_300', 'avg_distance_300',
       'access_score_300', 'child_care_access_score300',
       'culture_access_score300', 'daily_needs_access_score300',
       'health_care_access_score300', 'social_access_score300',
       'sports_access_score300', 'planning_area_y', 'Has_Interchange_y',
       'TripsPerDest_x', 'TripsPerOrig_x', 'poi_200', 'avg_distance_200',
       'access_score_200', 'child_care_access_score200',
       'culture_access_score200', 'daily_needs_access_score200',
       'health_care_access_score200', 'social_access_score200',
       'sports_access_score200', 'planning_area_x', 'Has_Interchange_x',
       'TripsPerDest_y', 'TripsPerOrig_y', 'poi_100', 'avg_distance_100',
       'access_score_100', 'child_care_access_score100',
       'culture_access_score100', 'daily_needs_access_score100',
       'health_care_access_score100', 'social_access_score100',
       'sports_access_score100', 'planning_area_y', 'Has_Interchange_y']

In [None]:
dest_summary = dest_summary[['dest_busid', 'TripsPerDest','planning_area', 'Has_Interchange', 'poi_100',
       'avg_distance_100', 'access_score_100', 'child_care_access_score100',
       'culture_access_score100', 'daily_needs_access_score100',
       'health_care_access_score100', 'social_access_score100',
       'sports_access_score100', 'poi_200',
       'avg_distance_200', 'access_score_200', 'child_care_access_score200',
       'culture_access_score200', 'daily_needs_access_score200',
       'health_care_access_score200', 'social_access_score200',
       'sports_access_score200', 'poi_300',
       'avg_distance_300', 'access_score_300', 'child_care_access_score300',
       'culture_access_score300', 'daily_needs_access_score300',
       'health_care_access_score300', 'social_access_score300',
       'sports_access_score300','poi_400',
       'avg_distance_400', 'access_score_400', 'child_care_access_score400',
       'culture_access_score400', 'daily_needs_access_score400',
       'health_care_access_score400', 'social_access_score400',
       'sports_access_score400', 'poi_500',
       'avg_distance_500', 'access_score_500', 'child_care_access_score500',
       'culture_access_score500', 'daily_needs_access_score500',
       'health_care_access_score500', 'social_access_score500',
       'sports_access_score500', 'poi_600',
       'avg_distance_600', 'access_score_600', 'child_care_access_score600',
       'culture_access_score600', 'daily_needs_access_score600',
       'health_care_access_score600', 'social_access_score600',
       'sports_access_score600']].copy()

In [None]:
dest_summary.to_csv('access_scores_all.csv', index=False)

In [None]:
#return nan values back to prepare for counting of bus stop with at least 1 POI
dest_summary.replace({0: None})

In [None]:
dest_summary.info()

In [None]:
df_access_scores = dest_summary[['dest_busid', 'TripsPerDest', 'access_score_100', 'access_score_200', 'access_score_300',  'access_score_400', 'access_score_500', 'access_score_600']].copy()

In [None]:
df_access_scores.describe()

In [None]:
df_access_scores.var()

In [None]:
#if we want to remove interchange bus stops
inter_merge = pd.merge(df_access_scores, interchange, on='dest_busid', how='left')
df_NoInter_Access = inter_merge[inter_merge['Has_Interchange'] !=1].copy()
df_NoInter_Access = df_NoInter_Access[['dest_busid', 'TripsPerdest', 'access_score_100', 'access_score_200', 'access_score_300',  'access_score_400', 'access_score_500', 'access_score_600']].copy()
#inter_merge[inter_merge['Has_Interchange'] !=1].describe()

In [None]:
df_NoInter_Access.describe()

In [None]:
df_NoInter_Access.var()

In [None]:
#save the access scores in cummulative format still containing nan values for use in NBR for case 1 y = a + Bx
df_access_scores.to_csv('access_scores_cum_weight.csv', index=False)
df_NoInter_Access.to_csv('access_scores_cum_NoInterchange_weight.csv', index=False)

In [None]:
#extract bus stops with no poi
#no_poi = test2[test2['all_poi_600']==0]
#no_poi.to_csv('busStops_no_POIs_weight.csv', index=False)

In [None]:
#get cummulative counts of Bus Stop with at least one poi
poi_only = dest_summary[['poi_100','poi_200','poi_300','poi_400','poi_500','poi_600']].copy()
poi_only.rename(columns={'poi_100':'100m','poi_200':'200m','poi_300':'300m','poi_400':'400m','poi_500':'500m','poi_600':'600m'}, inplace=True)


In [None]:
childC_only = dest_summary[[
        'child_care_access_score100',
        'child_care_access_score200',
        'child_care_access_score300',
        'child_care_access_score400',
        'child_care_access_score500',
        'child_care_access_score600']].copy()
childC_only.rename(columns={
        'child_care_access_score100':'100m',
        'child_care_access_score200':'200m',
        'child_care_access_score300':'300m',
        'child_care_access_score400':'400m',
        'child_care_access_score500':'500m',
        'child_care_access_score600':'600m'}, inplace=True)
culture_only = dest_summary[[ 
       'culture_access_score100',
       'culture_access_score200',
       'culture_access_score300', 
       'culture_access_score400', 
       'culture_access_score500', 
       'culture_access_score600']].copy()
culture_only.rename(columns={ 
        'culture_access_score100':'100m',
        'culture_access_score200':'200m',
        'culture_access_score300':'300m',
        'culture_access_score400':'400m',
        'culture_access_score500':'500m',
        'culture_access_score600':'600m'}, inplace=True)
shop_only = dest_summary[[ 
       'daily_needs_access_score100',
       'daily_needs_access_score200',
       'daily_needs_access_score300',
       'daily_needs_access_score400',
       'daily_needs_access_score500',
       'daily_needs_access_score600']].copy()
shop_only.rename(columns={ 
        'daily_needs_access_score100':'100m',
        'daily_needs_access_score200':'200m',
        'daily_needs_access_score300':'300m',
        'daily_needs_access_score400':'400m',
        'daily_needs_access_score500':'500m',
        'daily_needs_access_score600':'600m'}, inplace=True)
health_only = dest_summary[[
       'health_care_access_score100', 
       'health_care_access_score200', 
       'health_care_access_score300', 
       'health_care_access_score400', 
       'health_care_access_score500', 
       'health_care_access_score600', ]].copy()
health_only.rename(columns={
        'health_care_access_score100':'100m',
        'health_care_access_score200':'200m',
        'health_care_access_score300':'300m',
        'health_care_access_score400':'400m',
        'health_care_access_score500':'500m',
        'health_care_access_score600':'600m'}, inplace=True)
social_only = dest_summary[[
       'social_access_score100',
       'social_access_score200',
       'social_access_score300',
       'social_access_score400',
       'social_access_score500',
       'social_access_score600']].copy()
social_only.rename(columns={ 
        'social_access_score100':'100m',
        'social_access_score200':'200m',
        'social_access_score300':'300m',
        'social_access_score400':'400m',
        'social_access_score500':'500m',
        'social_access_score600':'600m'}, inplace=True)
sports_only = dest_summary[[
       'sports_access_score100', 
       'sports_access_score200', 
       'sports_access_score300', 
       'sports_access_score400', 
       'sports_access_score500',
       'sports_access_score600']].copy()
sports_only.rename(columns={ 
        'sports_access_score100':'100m',
        'sports_access_score200':'200m',
        'sports_access_score300':'300m',
        'sports_access_score400':'400m',
        'sports_access_score500':'500m',
        'sports_access_score600':'600m'}, inplace=True)

In [None]:
poi_only = pd.DataFrame(poi_only.count()).reset_index()
poi_only.columns=['distance','Bstop_count_all']

childC_only= pd.DataFrame(childC_only.count()).reset_index()
childC_only.columns=['distance','Bstop_count_childC']

culture_only= pd.DataFrame(culture_only.count()).reset_index()
culture_only.columns=['distance','Bstop_count_culture']

shop_only= pd.DataFrame(shop_only.count()).reset_index()
shop_only.columns=['distance','Bstop_count_shop']

health_only= pd.DataFrame(health_only.count()).reset_index()
health_only.columns=['distance','Bstop_count_health']

social_only= pd.DataFrame(social_only.count()).reset_index()
social_only.columns=['distance','Bstop_count_social']

sports_only= pd.DataFrame(sports_only.count()).reset_index()
sports_only.columns=['distance','Bstop_count_sports']


In [None]:
dfList = [poi_only, childC_only['Bstop_count_childC'], culture_only['Bstop_count_culture'], shop_only['Bstop_count_shop'], health_only['Bstop_count_health'], social_only['Bstop_count_social'], sports_only['Bstop_count_sports']]
busStop_counts = pd.concat(dfList, axis = 1)
busStop_counts.set_index('distance', inplace=True)

In [None]:
print(busStop_counts)

In [None]:
ax = busStop_counts.plot.bar(figsize=(10,7))
plt.xlabel("Cummulative Distance buffers")
plt.ylabel("number of bus stops with at least 1 poi within buffer")

In [None]:
#get cummulative counts of Bus Stop with at least one poi
ax = poi_only.plot.bar(colormap='Paired')
plt.xlabel("Cummulative Distance buffers")
plt.ylabel("number of bus stops with at least 1 poi within buffer")