Use virtual environment: ors_env

# <font color = 'purple'> Amenity Scores
- Having quantified accessibility of amenities for each apartment, we rank the apartments for easier comparison.
- This ranking will serve as the basis for our apartment search tool

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import math

In [2]:
import os
os.chdir(r"C:\Users\sharo\Documents\Postgrad\My Data Science Portfolio\Transforming the Way We Search For Flats")

## Import Data

In [3]:
amenities = gpd.read_file("Clean Datasets/dist_to_amenities")
amenities.head()

Unnamed: 0,blk_no,street,Latitude,Longitude,Postcode,n_supmkt_4,dist_supmk,n_food_400,n_mrt_400m,dist_mrt,dist_cbd,n_presch_4,n_sch_2km,prank_2km,grnarea_40,geometry
0,1,BEACH RD,1.303671,103.864479,190001,1,127.108874,155,0,441.695647,64.059681,1,4,146.0,5.580603,POINT (373669.075 144123.588)
1,1,BEDOK STH AVE 1,1.320852,103.933721,460001,2,115.48693,31,0,540.757663,7451.474775,4,18,6.0,2.071161,POINT (381374.284 146019.548)
2,1,CHAI CHEE RD,1.327969,103.922716,461001,1,399.600199,5,0,915.733304,6559.589365,3,12,21.0,0.324354,POINT (380150.144 146806.830)
3,1,CHANGI VILLAGE RD,1.38861,103.988093,500001,0,,54,0,,16243.891728,0,0,,4.586773,POINT (387427.101 153507.525)
4,1,DELTA AVE,1.292075,103.828584,160001,3,210.457681,64,0,673.165241,2137.148412,8,12,15.0,0.0,POINT (369674.526 142843.424)


In [4]:
#rename columns that have been abbreviated while saving
amenities.rename(columns={'n_supmkt_4':'n_supmkt_400',
                          'dist_supmk':'dist_supmkt',
                          'n_presch_4':"n_presch_400",
                         'grnarea_40':'grnarea_400'},inplace=True)

#we don't need geometries for this analysis
amenities.drop(columns=['geometry','Latitude','Longitude'],inplace=True)
amenities.head()

Unnamed: 0,blk_no,street,Postcode,n_supmkt_400,dist_supmkt,n_food_400,n_mrt_400m,dist_mrt,dist_cbd,n_presch_400,n_sch_2km,prank_2km,grnarea_400
0,1,BEACH RD,190001,1,127.108874,155,0,441.695647,64.059681,1,4,146.0,5.580603
1,1,BEDOK STH AVE 1,460001,2,115.48693,31,0,540.757663,7451.474775,4,18,6.0,2.071161
2,1,CHAI CHEE RD,461001,1,399.600199,5,0,915.733304,6559.589365,3,12,21.0,0.324354
3,1,CHANGI VILLAGE RD,500001,0,,54,0,,16243.891728,0,0,,4.586773
4,1,DELTA AVE,160001,3,210.457681,64,0,673.165241,2137.148412,8,12,15.0,0.0


In [6]:
#save the file
**STOP! DO NOT OVERWRITE FILE!**
amenities.to_csv("Clean Datasets/amenities_raw.csv",index=False)

## Compute Amenity Scores
- decided to do this for all items so that I can select which ones to use in Tableau
- Aggregated scores will not be calculated here since it's a simple formula. leave it to Tableau

In [8]:
def percentile_ranking(column,small_to_big):
    ranked_column = (column.rank(method='dense', #this ensures blocks with same value get same rank and rank always increases by 1 between groups (1,2,2,3 instead of 1,2,2,4)
                                 pct=True, #express ranking as percentile
                                 na_option='bottom', #NAs occur for min dist items when there weren't any of that amenity within the search radius. these should get lowest rank
                                 ascending=small_to_big)*100).apply(math.ceil) #convert fraction to percentage and round up to nearest 1%
    return(ranked_column)

In [12]:
score_1 = amenities.copy()

#number of supermarkets within 400m
score_1['p_sup400'] = percentile_ranking(score_1['n_supmkt_400'],small_to_big=False) #most supermarkets gets rank1

#distance to nearest supermarket 
score_1['p_supDist'] = percentile_ranking(score_1['dist_supmkt'],small_to_big=True) #shortest distance gets rank 1

#number of food establishments within 400m
score_1['p_fd400'] = percentile_ranking(score_1['n_food_400'], small_to_big=False) #most food est. gets rank 1

#number of mrt stations within 400m
score_1['p_mrt400'] = percentile_ranking(score_1['n_mrt_400m'], small_to_big=False) #most mrt gets rank 1

#distance to nearest mrt
score_1['p_mrtDist'] = percentile_ranking(score_1['dist_mrt'], small_to_big=True) #shortest distance gets rank 1

#distance to CBD
score_1['p_cbdDist'] = percentile_ranking(score_1['dist_cbd'], small_to_big=True) #shortest distance gets rank 1

#number of preschools within 400m
score_1['p_psc400'] = percentile_ranking(score_1['n_presch_400'], small_to_big=False) #most preschools gets rank1

#number of schools within 2km
score_1['p_sch2km'] = percentile_ranking(score_1['n_sch_2km'], small_to_big=False) #most schools gets rank1

#ranking of best primary school within 2km
score_1['p_pri2km'] = percentile_ranking(score_1['prank_2km'], small_to_big=True) #smallest ranking is the best

#green area within 400m
score_1['p_grn400'] = percentile_ranking(score_1['grnarea_400'], small_to_big=False) #highest green area gets rank1

score_1.drop(columns=['n_supmkt_400', 'dist_supmkt', 'n_food_400', 'n_mrt_400m', 'dist_mrt', 
                      'dist_cbd', 'n_presch_400', 'n_sch_2km', 'prank_2km','grnarea_400'], inplace=True)

score_1.head()

Unnamed: 0,blk_no,street,Postcode,p_sup400,p_supDist,p_fd400,p_mrt400,p_mrtDist,p_cbdDist,p_psc400,p_sch2km,p_pri2km,p_grn400
0,1,BEACH RD,190001,89,13,45,100,25,1,96,91,99,11
1,1,BEDOK STH AVE 1,460001,78,11,89,100,36,27,84,46,10,38
2,1,CHAI CHEE RD,461001,89,77,99,100,72,22,88,65,31,90
3,1,CHANGI VILLAGE RD,500001,100,100,81,100,100,89,100,100,100,13
4,1,DELTA AVE,160001,67,33,77,100,50,5,68,65,25,100


## Save

In [13]:
**STOP! DO NOT OVERWRITE FILE!**
score_1.to_csv("Clean Datasets/amenities_scores.csv",index=False)