In [8]:
import datetime
import math
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import json
from bs4 import BeautifulSoup
import time
import zipfile
import requests
import io
from datetime import date
from pyura import Client
from requests import Session
from geopy.geocoders import Nominatim

## Import CSVs needed

In [27]:
bus_stops_url = 'https://raw.githubusercontent.com/nicolepng/BT4222/main/Data/bus_stops.csv' 
bus_stops = pd.read_csv(bus_stops_url)
bus_stops.drop(bus_stops.columns[0], axis=1, inplace=True)

In [38]:
amenities_url = 'https://raw.githubusercontent.com/nicolepng/BT4222/main/Data/ameneties_per_district.csv' 
ameneties_per_district = pd.read_csv(amenities_url)
ameneties_per_district.drop(ameneties_per_district.columns[0], axis=1, inplace=True)

In [80]:
avg_crime_locations_url = 'https://raw.githubusercontent.com/nicolepng/BT4222/main/Data/average_crimes_by_location_v3.csv' 
average_crimes_by_location_v3 = pd.read_csv(avg_crime_locations_url)

In [81]:
# swap dictionary mapping direction
postal_district = {k: oldk for oldk, oldv in district_postal.items() for k in oldv}

# create new column to obtain the 1st 2 characters of [Postal]
average_crimes_by_location_v3['postal prefix'] = average_crimes_by_location_v3['Postal'].astype(str).str[0:2]

# map postal to district code
average_crimes_by_location_v3['district']= average_crimes_by_location_v3['postal prefix'].map(postal_district) 

# get overall crime rate in each district
average_crimes_by_location_v3 = average_crimes_by_location_v3.groupby(['district']).agg({'Number':'sum'}).reset_index()

district_int = average_crimes_by_location_v3.district.astype(int)
district_int = pd.DataFrame(district_int)
average_crimes_by_location_v3['district'] = district_int

In [91]:
sentiment_url = 'https://raw.githubusercontent.com/nicolepng/BT4222/main/Data/combined_sentiment.csv' 
sentiment = pd.read_csv(sentiment_url)
sentiment = sentiment[['district_num','hwz_sentiment','year']]
sentiment.columns = ['district_num', 'weighted_sentiment', 'year']
sentiment

Unnamed: 0,district_num,weighted_sentiment,year
0,21,0.000000,2018
1,1,0.082516,2018
2,2,0.317045,2018
3,3,0.123678,2018
4,4,0.093259,2018
...,...,...,...
107,24,0.150795,2021
108,25,0.088860,2021
109,26,-0.025054,2021
110,27,0.084183,2021


## Functions needed to get input

In [9]:
# Get district number
def get_postal_onemap(place, district_postal):
    start_code= "https://developers.onemap.sg/commonapi/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal="+ str("Ang Mo Kio Ave 5")
    s_response = requests.get(start_code)
    s_data = json.loads(s_response.text)
    postal = None
    count = 0
    while postal is None:
        count += 1
        if count == 10:
            postal = None
            break
        for i in range(len(s_data['results'])):
            postal = s_data['results'][i]['POSTAL']
            try:
                postal = int(postal)
            except:
                continue
    for district, sub_dist in district_postal.items():
        if str(postal)[:2] in sub_dist:
            dist = district
    try:
        return dist
    except:
        return -1

In [18]:
# get latitude and longitude
def get_lat_long(street):
    geolocator = Nominatim(user_agent="newtestuserbtproj")
    location = geolocator.geocode({"street": street, "country": "Singapore"})
    coordinates = [location.latitude, location.longitude]
    return coordinates

In [36]:
# get number of bus stops
# Formula to calculate distance 
from math import cos, sqrt
R = 6371000 #radius of the Earth in m
def distance(lon1, lat1, lon2, lat2):
    x = (lon2 - lon1) * cos(0.5*(lat2+lat1))
    y = (lat2 - lat1)
    return R * sqrt( x*x + y*y )

def num_of_bus_stops(lat, long):
    busStops = bus_stops.to_dict(orient='records')
    # threshold of within 1km
    numOfStops = []
    buslist = list(filter(lambda d: distance(d["Longitude"], d["Latitude"], long, lat) <= 1000, busStops))
    return len(buslist)

In [57]:
# get num of schools
def num_schools(district):
    sch_list = ameneties_per_district.loc[ameneties_per_district['district'] == district]['school'].item()
    num_sch = sch_list.strip('][').split(',') 
    if " SINGAPORE'" in num_sch:
        num_sch.remove(" SINGAPORE'")
    return len(num_sch)

In [63]:
# get num of supermarkets
def num_supermarkets(district):
    market_list = ameneties_per_district.loc[ameneties_per_district['district'] == district]['supermarkets'].item()
    num_market = market_list.strip('][').split(',') 
    return len(num_market)

In [64]:
# get num of hawker
def num_hawker(district):
    hawker_list = ameneties_per_district.loc[ameneties_per_district['district'] == district]['hawkercentre'].item()
    num_hawker = hawker_list.strip('][').split(',') 
    return len(num_hawker)

In [101]:
# get crime number 
def crime_num(district):
    return average_crimes_by_location_v3[average_crimes_by_location_v3.district == district].Number.item() 

In [102]:
# get sentiment score
def sentiment_score(district, year):
    return sentiment[(sentiment.district_num == district) & (sentiment.year == year)].weighted_sentiment.item()

In [None]:
# get floor range


## Pre-set Variables

In [13]:
district_postal = {
    "1" : ["01", "02", "03", "04", "05", "06"],
    "2" : ["07", "08"],
    "3" : ["14", "15", "16"],
    "4" : ["09", "10"],
    "5" : ["11", "12", '13'],
    "6" : ["17"],
    "7" : ["18", "19"],
    "8" : ["20", "21"],
    "9" : ["22", "23"],
    "10" : ["24", "25", "26", "27"],
    "11" : ["28", "29", "30"],
    "12" : ["31", "32", "33"],
    "13" : ["34", "35", "36", "37"],
    "14" : ["38", "39", "40", "41"],
    "15" : ["42", "43", "44", "45"],
    "16" : ["46", "47", "48"],
    "17" : ["49", "50", "81"],
    "18" : ["51", "52"],
    "19" : ["53", "54", "55", "82"],
    "20" : ["56", "57"],
    "21" : ["58", "59"],
    "22" : ["60", "61", "62", "63", "64"],
    "23" : ["65", "66", "67", "68"],
    "24" : ["69", "70", "71"],
    "25" : ["72", "73"],
    "26" : ["77", "78"],
    "27" : ["75", "76"],
    "28" : ["79", "80"]
}

In [20]:
district_mapping = {
    "1" : ["RAFFLES PLACE MRT STATION EXIT A", "CECIL BUILDING", "MARINA BAY SINGAPORE", "PEOPLE'S PARK COMPLEX"],
    "2" : ["ANSON ROAD", "TANJONG PAGAR MRT STATION EXIT A"],
    "3" : ["QUEENSTOWN MRT STATION EXIT A", "TIONG BAHRU MRT STATION EXIT A"],
    "4" : ["TELOK BLANGAH RISE MARKET", "HARBOURFRONT MRT STATION EXIT A"],
    "5" : ["PASIR PANJANG MRT STATION EXIT A", "CLEMENTI MRT STATION EXIT A"],
    "6" : ["HIGH STREET PLAZA", "CALTEX BEACH ROAD"],
    "7" : ["DBS MIDDLE ROAD 210", "GOLDEN MILE COMPLEX"],
    "8" : ["LITTLE INDIA MRT STATION EXIT A"],
    "9" : ["ORCHARD MRT STATION EXIT A", "THE CAIRNHILL", "GREAT WORLD MRT STATION"],
    "10" : ["BUKIT TIMAH ROAD", "HOLLAND VILLAGE MRT STATION", "TANGLIN MALL"],
    "11" : ["WATTEN ESTATE", "NOVENA MRT STATION EXIT A", "THOMSON PLAZA"],
    "12" : ["BALESTIER PLAZA", "TOA PAYOH MRT STATION EXIT A", "SERANGOON MRT STATION EXIT A"],
    "13" : ["MACPHERSON MRT STATION EXIT A", "BRADDELL MRT STATION EXIT A"],
    "14" : ["PAYA LEBAR MRT STATION EXIT A", "EUNOS MRT STATION EXIT A"],
    "15" : ["TANJONG KATONG GIRLS' SCHOOL", "KOON SENG PARK", "TANJONG KATONG MRT STATION EXIT A"],
    "16" : ["BEDOK MRT STATION EXIT A", "TANAH MERAH MRT STATION EXIT A", "SUNGEI BEDOK MRT STATION", "KEW DRIVE PLAYGROUND"],
    "17" : ["MARIAM WAY PLAYGROUND", "KEMBANGAN MRT STATION EXIT A"],
    "18" : ["TAMPINES MRT STATION EXIT A", "PASIR RIS MRT STATIONEXIT A"],
    "19" : ["SERANGOON GARDEN MARKET EXIT A", "HOUGANG MRT STATION EXIT A", "PUNGGOL MRT STATION EXIT A"],
    "20" : ["BISHAN MRT STATION EXIT A", "ANG MO KIO MRT STATION EXIT A"],
    "21" : ["ESSO UPPER BUKIT TIMAH A", "CLEMENTI PARK", "ULU PANDAN COMMUNITY CLUB"],
    "22" : ["JURONG EAST MRT STATION EXIT A", "BOON LAY MRT STATION EXIT A", "LAKESIDE MRT STATION EXIT A", "CHINESE GARDEN MRT STATION EXIT A"],
    "23" : ["HILLVIEW MRT STATION EXIT A", "GERMAN EUROPEAN SCHOOL SINGAPORE", "BUKIT PANJANG MRT STATION EXIT A1", "CHOA CHU KANG MRT STATION EXIT A"],
    "24" : ["LIM CHU KANG CAMP I", "GARDEN VALE @ TENGAH"],
    "25" : ["KRANJI MRT STATION EXIT A", "THE WOODGROVE"],
    "26" : ["UPPER THOMSON MRT STATION", "SPRINGLEAF GARDEN"],
    "27" : ["YISHUN MRT STATION EXIT A", "SEMBAWANG MRT STATION EXIT A"],
    "28" : ["THE SELETAR MALL"]
}

In [104]:
replace_floors = {
    '01 TO 03' : '01-05',
    '04 TO 06' : '01-05',
    '07 TO 09' : '06-10',
    '10 TO 12' : '11-15',
    '13 TO 15' : '11-15',
    '16 TO 18' : '16-20',
    '19 TO 21' : '16-20',
    '22 TO 24' : '21-25',
    '25 TO 27' : '26-30',
    '28 TO 30' : '26-30',
    '31 TO 33' : '31-35',
    '34 TO 36' : '31-35',
    '37 TO 39' : '36-40',
    '40 TO 42' : '41-45',
    '43 TO 45' : '41-45',
    '46 TO 48' : '46-50',
    
}

## Actual Code

district  
street             
propertyType        
remaining_lease   
price              
school             
hawkercentre        
supermarkets       
Bus Stops Nearby   
crime_number        
latitude          
longitude           
floor_area_sqm     
floor_range        
sentiment
Street given, propertyType given, remaining lease given, square feet also given

In [128]:
street = input("Enter Street Name: ")

propertyType = input("Enter Type of Property: ")

remaining_lease = int(input("Enter num of years left: "))

floor_area_sqm = input("Area of House (Square Metres): ")

floor_range = input("Enter Floor Range: ")

year = int(input("Current Year? "))

Enter Street Name: Jalan Khamis
Enter Type of Property: Semi-detached
Enter num of years left: 999
Area of House (Square Metres): 334.5
Enter Floor Range: -
Current Year? 2021


In [129]:
# Get all details 
district = get_postal_onemap(street, district_postal)
coordinates = get_lat_long(street)
latitude = coordinates[0]
longitude = coordinates[1]
school = num_schools(int(district))
hawkercentre = num_hawker(int(district))
supermarkets = num_supermarkets(int(district))
bus_stops_nearby = num_of_bus_stops(latitude, longitude)
crime_number = crime_num(int(district))
score = sentiment_score(int(district), year)
floor_range = replace_floors.get(floor_range)

AttributeError: 'float' object has no attribute 'district_num'

In [126]:
# Create temp dataframe to fit into model
temp_column_names = ['district', 'street', 'propertyType', 'remaining_lease',
                    'school', 'hawkercentre', 'supermarkets', 'Bus Stops Neaby',
                    'crime_number', 'latitude', 'longitude', 'floor_area_sqm', 'floor_range',
                    'sentiment']
temp = pd.DataFrame(columns = temp_column_names)

In [130]:
sentiment

0.08221698556073556

In [19]:
# get lat and long 
get_lat_long("Jalan Khamis")

[1.3538177, 103.8376954]

In [37]:
# get num of nearby bus stops
num_of_bus_stops(1.3538177,103.8376954)

0

In [118]:
district

37

In [89]:
crime(10)

35.42857142857143