In [20]:
import requests
from requests.exceptions import HTTPError
import pandas as pd
from pprint import pprint
from datetime import date
from dataprep.eda import create_report

# add headers
# cache calls

def datagovsg_api_call(url, sort = 'month desc', limit = '100', years=["2022"]):
    '''
    Function to build the API call and construct the pandas dataframe
    Inputs:
        url: url for API, with resource_id parameters
        sort: field, by ascending/desc
        limit: maximum entries
        years: list of years data required
    Returns a pandas dataframe of the data
    '''
    month_dict = '{"month":['
    for year in years:
        for month in range(1,13):
            month_dict = month_dict + f'"{year}-{str(month).zfill(2)}", '
    month_dict = month_dict[:-2] 
    month_dict = month_dict + ']}'
    url = url+f'&sort={sort}&limit={limit}&filters={month_dict}'
    print(f'API call = {url}')
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        df = pd.DataFrame(data['result']['records'])
    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except Exception as err:
        print(f'Other error occurred: {err}')
    else:
        return df

df = datagovsg_api_call('https://data.gov.sg/api/action/datastore_search?resource_id=f1765b54-a209-4718-8d38-a39237f502b3')
df

API call = https://data.gov.sg/api/action/datastore_search?resource_id=f1765b54-a209-4718-8d38-a39237f502b3&sort=month desc&limit=100&filters={"month":["2022-01", "2022-02", "2022-03", "2022-04", "2022-05", "2022-06", "2022-07", "2022-08", "2022-09", "2022-10", "2022-11", "2022-12"]}


Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,remaining_lease,lease_commence_date,storey_range,_id,block
0,ANG MO KIO,4 ROOM,Model A,95,ANG MO KIO ST 32,698000,2022-12,77 years 10 months,2001,07 TO 09,141222,353
1,BEDOK,3 ROOM,New Generation,67,BEDOK NTH ST 2,340000,2022-12,54 years 06 months,1978,01 TO 03,141258,138
2,ANG MO KIO,3 ROOM,New Generation,68,ANG MO KIO AVE 4,375000,2022-12,57 years 06 months,1981,07 TO 09,141190,155
3,ANG MO KIO,4 ROOM,Model A,90,ANG MO KIO ST 32,670000,2022-12,77 years 10 months,2001,16 TO 18,141221,353
4,BEDOK,2 ROOM,Improved,45,BEDOK NTH ST 3,293000,2022-12,62 years 04 months,1986,07 TO 09,141242,534
...,...,...,...,...,...,...,...,...,...,...,...,...
95,ANG MO KIO,4 ROOM,Model A,93,ANG MO KIO ST 51,958000,2022-12,95 years 04 months,2019,25 TO 27,141224,590A
96,ANG MO KIO,4 ROOM,Model A,93,ANG MO KIO ST 51,880000,2022-12,95 years 03 months,2019,07 TO 09,141226,590B
97,ANG MO KIO,4 ROOM,Model A,93,ANG MO KIO ST 51,910000,2022-12,95 years 03 months,2019,10 TO 12,141227,590B
98,ANG MO KIO,4 ROOM,Model A,90,ANG MO KIO ST 52,620000,2022-12,79 years 01 month,2002,01 TO 03,141230,596A


In [65]:
#create_report(df).show()

1. Town, keep but add new field (region) - str
2. Room types, change to numbers. Executive 4.5, multigeneration 6 - float
3. Examine flat model if similar to room types
4. Floor area keep - int - bin
5. Street name and block - api call into latitude longitude
6. Month - date
7. Storey range - int (every 3 stories is 1)
8. Lease commence - date, calculate remaining
9. _id change into index

In [21]:
def clean_df(df):
    '''
    function to clean the raw dataframe
    '''
    # dictionary for townships
    town_regions = {'Sembawang' : 'North',
                'Woodlands' : 'North',
                'Yishun' : 'North',
                'Ang Mo Kio' : 'North-East',
                'Hougang' : 'North-East',
                'Punggol' : 'North-East',
                'Sengkang' : 'North-East',
                'Serangoon' : 'North-East',
                'Bedok' : 'East',
                'Pasir Ris' : 'East',
                'Tampines' : 'East',
                'Bukit Batok' : 'West',
                'Bukit Panjang' : 'West',
                'Choa Chu Kang' : 'West',
                'Clementi' : 'West',
                'Jurong East' : 'West',
                'Jurong West' : 'West',
                'Tengah' : 'West',
                'Bishan' : 'Central',
                'Bukit Merah' : 'Central',
                'Bukit Timah' : 'Central',
                'Central Area' : 'Central',
                'Geylang' : 'Central',
                'Kallang/ Whampoa' : 'Central',
                'Marine Parade' : 'Central',
                'Queenstown' : 'Central',
                'Toa Payoh' : 'Central'}
    
    abbreviations = {'St' : 'Street', 'Nth' : 'North', 'Ave' : 'Avenue', 'Dr' : 'Drive', 'Rd' : 'Road'}

    def categorise_rooms(flat_type):
        '''
        Helper function for categorising number of rooms
        '''
        if flat_type[0] == 'E':
            return 4.5
        elif flat_type[0] == 'M':
            return 6.0
        else:
            return float(flat_type[0])
    
    # Start
    # set index to overall id
    df.set_index('_id', inplace=True)

    # Create feature "rooms", "max_storey"
    df['rooms'] = df['flat_type'].apply(categorise_rooms)
    df['max_storey'] = df['storey_range'].apply(lambda x: int(x[-2:]))

    # Change dtypes
    df['lease_commence_date'] = df['lease_commence_date'].astype('int')
    df['resale_price'] = df['resale_price'].astype('float')
    df['floor_area_sqm'] = df['floor_area_sqm'].astype('int')
    df['month'] = pd.to_datetime(df['month'], format="%Y-%m-%d")
    #df['remaining_lease_months'] = pd.period_range(start=df['remaining_lease'], end=date.today(), freq='M')

    # Change capitalization of strings
    for column in df.columns:
        if df[column].dtype == 'O':
            df[column] = df[column].str.title()
    for abbreviation, full in abbreviations.items():
        df['street_name'] = df['street_name'].str.replace(abbreviation, full)
    
    # Categorise town regions      
    df['region'] = df['town'].apply(lambda x: town_regions[x])

    # Getting latitude, longitude, postal code
    def get_lat_long(df):
        address = df['block'] + ', ' + df['street_name']
        try:
            call = f'https://developers.onemap.sg/commonapi/search?searchVal={address}&returnGeom=Y&getAddrDetails=Y'
            response = requests.get(call)
            response.raise_for_status()
            data = response.json()
            return data['results'][0]['LATITUDE'] + ',' + data['results'][0]['LONGITUDE'] + ' ' + data['results'][0]['POSTAL']
        except HTTPError as http_err:
            print(f'HTTP error occurred: {http_err}')
        except Exception as err:
            pprint(call)
            print(f'Other error occurred: {err}')

    df['position'] = df.apply(get_lat_long, axis=1)
    df['postal_code'] = df['position'].apply(lambda x: x.split()[1])
    df['lat_long'] = df['position'].apply(lambda x: x.split()[0])

    # Reorder columns
    df = df[['resale_price', 'month', 'region', 'town', 'rooms', 'max_storey', 'floor_area_sqm', 'remaining_lease',
             'lat_long', 'postal_code']]
    
             # Unused columns - 'block', 'street_name', 'lease_commence_date', 'flat_model', 'storey_range', 'flat_type'

    return df

In [22]:
df = clean_df(df)

In [25]:
df

Unnamed: 0_level_0,resale_price,month,region,town,rooms,max_storey,floor_area_sqm,remaining_lease,block,street_name,lease_commence_date,flat_model,storey_range,flat_type,position,postal_code,lat_long
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
141222,698000.0,2022-12-01,North-East,Ang Mo Kio,4.0,9,95,77 Years 10 Months,353,Ang Mo Kio Street 32,2001,Model A,07 To 09,4 Room,"1.36406807879641,103.851612284582 560353",560353,"1.36406807879641,103.851612284582"
141258,340000.0,2022-12-01,East,Bedok,3.0,3,67,54 Years 06 Months,138,Bedok North Street 2,1978,New Generation,01 To 03,3 Room,"1.33001011188999,103.936343483972 460138",460138,"1.33001011188999,103.936343483972"
141190,375000.0,2022-12-01,North-East,Ang Mo Kio,3.0,9,68,57 Years 06 Months,155,Ang Mo Kio Avenue 4,1981,New Generation,07 To 09,3 Room,"1.37587723828643,103.840067482919 560155",560155,"1.37587723828643,103.840067482919"
141221,670000.0,2022-12-01,North-East,Ang Mo Kio,4.0,18,90,77 Years 10 Months,353,Ang Mo Kio Street 32,2001,Model A,16 To 18,4 Room,"1.36406807879641,103.851612284582 560353",560353,"1.36406807879641,103.851612284582"
141242,293000.0,2022-12-01,East,Bedok,2.0,9,45,62 Years 04 Months,534,Bedok North Street 3,1986,Improved,07 To 09,2 Room,"1.33303698213001,103.924812248346 460534",460534,"1.33303698213001,103.924812248346"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141224,958000.0,2022-12-01,North-East,Ang Mo Kio,4.0,27,93,95 Years 04 Months,590A,Ang Mo Kio Street 51,2019,Model A,25 To 27,4 Room,"1.37122178516983,103.853853154442 561590",561590,"1.37122178516983,103.853853154442"
141226,880000.0,2022-12-01,North-East,Ang Mo Kio,4.0,9,93,95 Years 03 Months,590B,Ang Mo Kio Street 51,2019,Model A,07 To 09,4 Room,"1.37118642598512,103.852980282648 562590",562590,"1.37118642598512,103.852980282648"
141227,910000.0,2022-12-01,North-East,Ang Mo Kio,4.0,12,93,95 Years 03 Months,590B,Ang Mo Kio Street 51,2019,Model A,10 To 12,4 Room,"1.37118642598512,103.852980282648 562590",562590,"1.37118642598512,103.852980282648"
141230,620000.0,2022-12-01,North-East,Ang Mo Kio,4.0,3,90,79 Years 01 Month,596A,Ang Mo Kio Street 52,2002,Model A,01 To 03,4 Room,"1.37243023974423,103.850949173949 561596",561596,"1.37243023974423,103.850949173949"


In [74]:
df.dtypes

town                           object
flat_type                      object
flat_model                     object
floor_area_sqm                  int32
street_name                    object
resale_price                  float64
month                  datetime64[ns]
remaining_lease                object
lease_commence_date             int32
storey_range                   object
block                          object
rooms                         float64
max_storey                      int64
region                         object
dtype: object

In [None]:
try:
    response = requests.get(f"https://developers.onemap.sg/privateapi/routingsvc/route?start={start}&end={end}&routeType={routeType}&token={token}")
    response.raise_for_status()
    data = response.json()
    pprint(data)
except HTTPError as http_err:
    print(f'HTTP error occurred: {http_err}')
except Exception as err:
    print(f'Other error occurred: {err}')

In [44]:
street = 'Ang Mo Kio Street 32'
block = '353'
address = block + ', ' + street
print(address)
try:
    response = requests.get(f"https://developers.onemap.sg/commonapi/search?searchVal={address}&returnGeom=Y&getAddrDetails=Y")
    response.raise_for_status()
    data = response.json()
    pprint(data)
    print(data['results'][0]['LATITUDE'])
except HTTPError as http_err:
    print(f'HTTP error occurred: {http_err}')
except Exception as err:
    print(f'Other error occurred: {err}')

353, Ang Mo Kio Street 32
{'found': 1,
 'pageNum': 1,
 'results': [{'ADDRESS': '353 ANG MO KIO STREET 32 AMK HEIGHTS SINGAPORE '
                         '560353',
              'BLK_NO': '353',
              'BUILDING': 'AMK HEIGHTS',
              'LATITUDE': '1.36406807879641',
              'LONGITUDE': '103.851612284582',
              'LONGTITUDE': '103.851612284582',
              'POSTAL': '560353',
              'ROAD_NAME': 'ANG MO KIO STREET 32',
              'SEARCHVAL': 'AMK HEIGHTS',
              'X': '30035.8728063824',
              'Y': '38457.2411144041'}],
 'totalNumPages': 1}
1.36406807879641
