In [1]:
import os
import sys
import zipfile
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

%matplotlib inline

sys.path.append('../')
from src.data_preprocess import DataPreprocessor

%load_ext autoreload
%autoreload 2

## Download dataset

In [2]:
if not os.listdir('../input'):
    !kaggle competitions download -c cs5228-2022-semester-1-final-project -p ../input
    Dataset = "cs5228-2022-semester-1-final-project"
    with zipfile.ZipFile(f"../input/{Dataset}.zip","r") as z:
        z.extractall("../input")

In [3]:
for dirname, _, filenames in os.walk('../input'):
    for filename in filenames:
        if filename.endswith('csv'):
            print(os.path.join(dirname, filename))

../input\example-submission.csv
../input\test.csv
../input\train.csv
../input\auxiliary-data\sg-commerical-centres.csv
../input\auxiliary-data\sg-mrt-stations.csv
../input\auxiliary-data\sg-primary-schools.csv
../input\auxiliary-data\sg-secondary-schools.csv
../input\auxiliary-data\sg-shopping-malls.csv
../input\auxiliary-data\sg-subzones.csv


### train.csv

In [4]:
train_df = pd.read_csv('../input/train.csv')
train_df.head(2)

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,...,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price
0,122881,hdb flat for sale in 866 yishun street 81,sembawang / yishun (d27),866 yishun street 81,hdb 4 rooms,,1988.0,3.0,2.0,1115,...,unspecified,,116.0,https://www.99.co/singapore/hdb/866-yishun-str...,1.414399,103.837196,0,yishun south,yishun,514500.0
1,259374,hdb flat for sale in 506b serangoon north aven...,hougang / punggol / sengkang (d19),hdb-serangoon estate,hdb,99-year leasehold,1992.0,4.0,2.0,1575,...,unspecified,"1, 2, 3, 4, 5, 6 br",,https://www.99.co/singapore/hdb/hdbserangoon-e...,1.372597,103.875625,0,serangoon north,serangoon,995400.0


In [5]:
print(train_df.shape)
display(train_df.isnull().sum())
display(train_df.describe())

(20254, 21)


listing_id                  0
title                       0
address                     0
property_name               0
property_type               0
tenure                   1723
built_year                922
num_beds                   80
num_baths                 434
size_sqft                   0
floor_level             16746
furnishing                  0
available_unit_types     1441
total_num_units          5652
property_details_url        0
lat                         0
lng                         0
elevation                   0
subzone                   113
planning_area             113
price                       0
dtype: int64

Unnamed: 0,listing_id,built_year,num_beds,num_baths,size_sqft,total_num_units,lat,lng,elevation,price
count,20254.0,19332.0,20174.0,19820.0,20254.0,14602.0,20254.0,20254.0,20254.0,20254.0
mean,550763.206428,2010.833695,3.122931,2.643542,1854.364,376.253938,1.434282,103.855356,0.0,5228263.0
std,258874.420108,15.822803,1.281658,1.473835,13543.43,346.882474,1.558472,3.593441,0.0,277974800.0
min,100043.0,1963.0,1.0,1.0,0.0,4.0,1.239621,-77.065364,0.0,0.0
25%,326279.0,2000.0,2.0,2.0,807.0,106.0,1.307329,103.806576,0.0,819000.0
50%,551397.0,2017.0,3.0,2.0,1119.0,296.0,1.329266,103.841552,0.0,1680000.0
75%,774044.5,2023.0,4.0,3.0,1528.0,561.0,1.372461,103.881514,0.0,3242400.0
max,999944.0,2028.0,10.0,10.0,1496000.0,2612.0,69.486768,121.023232,0.0,39242430000.0


In [6]:
fig = px.histogram(train_df, x='price')
fig.show()

## Excessive outliers

In [7]:
train_df_clean = DataPreprocessor.remove_price_outlier(train_df)
fig = px.histogram(train_df_clean, x='price')
fig.show()

## Duplicated records
* same attribute records same price
* same attribute records different price => take average (+/- 200,000)

In [8]:
train_df_clean.shape

(20252, 21)

In [9]:
train_df_clean = DataPreprocessor.remove_duplicates(train_df_clean)
train_df_clean.shape

(16131, 21)

### test.csv

In [10]:
test_df = pd.read_csv('../input/test.csv')
test_df.head(1)

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area
0,777912,1 bed condo for sale in the gazania,17 how sun drive,the gazania,condo,freehold,2022.0,1.0,1.0,463,,unfurnished,"studio, 1, 2, 3, 4, 5 br",250.0,https://www.99.co/singapore/condos-apartments/...,1.344334,103.87869,0,upper paya lebar,serangoon


In [11]:
print(test_df.shape)
display(test_df.isnull().sum())
display(test_df.describe())

(7000, 20)


listing_id                 0
title                      0
address                    2
property_name              0
property_type              0
tenure                   637
built_year               358
num_beds                  35
num_baths                152
size_sqft                  0
floor_level             5844
furnishing                 0
available_unit_types     520
total_num_units         1900
property_details_url       0
lat                        0
lng                        0
elevation                  0
subzone                   33
planning_area             33
dtype: int64

Unnamed: 0,listing_id,built_year,num_beds,num_baths,size_sqft,total_num_units,lat,lng,elevation
count,7000.0,6642.0,6965.0,6848.0,7000.0,5100.0,7000.0,7000.0,7000.0
mean,551687.994143,2010.823999,3.089591,2.624854,1709.027,373.181373,1.41664,103.853102,0.0
std,259038.092508,15.713629,1.276983,1.466597,1860.113894,333.976046,1.34329,3.408832,0.0
min,100108.0,1963.0,1.0,1.0,68.0,6.0,1.239621,-77.065364,0.0
25%,327927.0,2000.0,2.0,2.0,797.0,111.0,1.307189,103.806576,0.0
50%,549475.0,2017.0,3.0,2.0,1119.0,298.0,1.329266,103.842241,0.0
75%,775229.0,2023.0,4.0,3.0,1528.0,561.0,1.370798,103.879948,0.0
max,999981.0,2028.0,10.0,10.0,27500.0,2612.0,69.486768,121.023232,0.0


#### Data fields
* listing_id - unique identifier of the property listing
* title - title of the property listing (e.g., "2 bed condo for sale in 35 gilstead")
* address - address of the property (e.g., "124 punggol walk", "11 sengkang east avenue")
* property_name - name of the property (e.g., "redhill rise", "klimt cairnhill")
* property_type - type of the property (e.g., "condo", "hdb 2 rooms", "landed")
* tenure - tenure of the property (e.g., "freehold", "99-year leasehold")
* built_year - year when the property was was built (e.g., 2014, 2021)
* num_beds - number of bedrooms (e.g., 1, 2, 3)
* num_baths - number of bathrooms (e.g., 1, 2, 3)
* size_sqft - floor area in square feet (e.g., 807, 657, 1628)
* floor_level - information about the floor level of the property (e.g., "high", "low")
* furnishing - information whether the property is furnished (e.g., "fully", "partial")
* available_unit_types - list of all types of units available in the property complex (e.g., "studio, 3, 4, 5 br")
* total_num_units - total number of units in the property complex (e.g., 115, 200)
* property_details_url - URL linking to more information about the property complex
* lat - latitude of property (e.g., 1.328805)
* lng - longitude of property (e.g., 103.74502)
* elevation - elevation of the property in meters (e.g., 10)
* subszone - subzone of block containing the flat in meter (e.g., "blangah rise", "marymount")
* planning_area - planning area of block containing the flat (e.g., "woodlands", "bukit merah")
* price - sales price in SGD

In [12]:
train_df_clean.head(1)

Unnamed: 0,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,...,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,listing_id,price
0,1 bed condo for sale in 10 evelyn,10 evelyn road,10 evelyn,Condo,freehold,2022.0,1.0,1.0,495,,...,"studio, 1, 2, 3 br",56.0,https://www.99.co/singapore/condos-apartments/...,1.31629,103.840576,0,moulmein,novena,667980.0,1424800.0


In [13]:
for col in train_df_clean.columns[1:]:
    """ list all columns' histograms
    """
    print(col)
    # fig = px.histogram(train_df_clean, x=col)
    # fig.show()

address
property_name
property_type
tenure
built_year
num_beds
num_baths
size_sqft
floor_level
furnishing
available_unit_types
total_num_units
property_details_url
lat
lng
elevation
subzone
planning_area
listing_id
price


In [14]:
drop_attributes = {'listing_id'}

## auxiliary data

In [15]:
for dirname, _, filenames in os.walk('../input/auxiliary-data/'):
    for filename in filenames:
        if filename.endswith('csv'):
            print(os.path.join(dirname, filename))
            aux_df = pd.read_csv(os.path.join(dirname, filename))
            display(aux_df.head(1))

../input/auxiliary-data/sg-commerical-centres.csv


Unnamed: 0,name,type,lat,lng,subzone,planning_area
0,Central Business District,CR,1.286768,103.854529,clifford pier,downtown core


../input/auxiliary-data/sg-mrt-stations.csv


Unnamed: 0,code,line,name,opening_year,lat,lng,subzone,planning_area
0,cc1,cc,dhoby ghaut,2010,1.298912,103.846293,dhoby ghaut,museum


../input/auxiliary-data/sg-primary-schools.csv


Unnamed: 0,name,lat,lng,subzone,planning_area
0,Admiralty Primary School,1.442941,103.800345,woodlands east,serangoon


../input/auxiliary-data/sg-secondary-schools.csv


Unnamed: 0,name,lat,lng,subzone,planning_area
0,Admiralty Secondary School,1.445912,103.802908,woodlands east,woodlands


../input/auxiliary-data/sg-shopping-malls.csv


Unnamed: 0,name,lat,lng,subzone,planning_area
0,10 AM,1.275568,103.863591,marina south,marina south


../input/auxiliary-data/sg-subzones.csv


Unnamed: 0,name,area_size,population,planning_area
0,ang mo kio town centre,0.3169,4810,ang mo kio


## 1. title
* no missing value
* containing following attributes (overlapped with other fields):
    1. property_type
    2. for sale
    3. location
* usage:
    1. not useful for modeling
    2. sanity check for other attributes (address, )
    3. impute for other attributes
        * num_beds: 80 NaN
        * subzone/ planning_area: 113 NaN

In [16]:
train_df_clean = DataPreprocessor.preprocess_title(train_df_clean)
test_df = DataPreprocessor.preprocess_title(test_df)

['sale']
['sale']


In [17]:
display(train_df_clean.loc[:, ['title_property_type','property_type']].head(3))
display(train_df_clean.loc[:, ['title_n_beds','num_beds']].head(3)) ## if hdb flat,can get from property_type
display(train_df_clean.loc[:, ['title_address','address']].head(3))

Unnamed: 0,title_property_type,property_type
0,condo,Condo
1,condo,Condo
2,condo,apartment


Unnamed: 0,title_n_beds,num_beds
0,1,1.0
1,1,1.0
2,1,1.0


Unnamed: 0,title_address,address
0,10 evelyn,10 evelyn road
1,10 evelyn,10 evelyn road
2,10 evelyn,10 evelyn road


In [18]:
drop_attributes.add('title')

## 2. address
* no missing value
* not useful itself as one attribute for model
* maybe useful for impute subzone/ planning_area: 113 NaN

In [19]:
train_df_clean['address'].unique()

array(['10 evelyn road', '10 shelford road', '18 woodsville close', ...,
       '32 middle road', '112 punggol walk', 'serangoon terrace'],
      dtype=object)

In [20]:
drop_attributes.add('address')

## 3. property_name
* no missing value
* not useful itself as one attribute for model
* distinct property name can help to impute data (e.g, built year) => but too noisy!

In [21]:
# train_df_clean['property_name_clean'] = train_df_clean['property_name'].str.split('@').str[0].str.strip()
# print(str(train_df_clean['property_name'].unique().tolist()[:100]))
# print(str(train_df_clean['property_name_clean'].unique().tolist()[:100]))
# print(len(train_df_clean['property_name'].unique()))
# print(len(train_df_clean['property_name_clean'].unique()))

In [22]:
drop_attributes.add('property_name')

## 4. property_type
* useful as ordinal cat (possible ranking)

### dirty records
1. small letter
2. hdb vs hdb {n} rooms vs Hdb Executive
3. good class bungalow vs bungalow?
3. different type of house, condo
4. walk-up? land only?

In [23]:
fig = px.box(train_df_clean, x="property_type", y="price")
fig.show()

In [34]:
train_df_clean = DataPreprocessor.preprocess_property_type(train_df_clean)#[['title_property_type','property_type','property_type_info']]

In [35]:
fig = px.box(train_df_clean.sort_values('price'), x="property_type_clean", y="price")
fig.update_xaxes(categoryorder='array',categoryarray = train_df_clean.groupby('property_type_clean').median().sort_values('price').index.to_list())
fig.show()
fig = px.scatter(train_df_clean.groupby('property_type_cat').median().sort_values('price').reset_index(), x="property_type_cat", y="price")
fig.show()

In [36]:
drop_attributes.add('property_type')
drop_attributes.add('property_type_clean')

## tenure
* missing 1595

In [37]:
train_df_clean['tenure'].isnull().sum()

0

In [38]:
fig = px.box(train_df_clean, x="tenure", y="price")
fig.show()

In [39]:
# train_df_clean.groupby('tenure').count()
fig = px.histogram(train_df_clean, x='tenure')
fig.show()

![alt text](../ref/lease.png "tenure type")

In [40]:
train_df_clean = DataPreprocessor.preprocess_tenure(train_df_clean)
fig = px.box(train_df_clean.sort_values('price'), x="tenure", y="price")
fig.update_xaxes(categoryorder='array',categoryarray = train_df_clean.groupby('tenure').median().sort_values('price').index.to_list())
fig.show()
fig = px.scatter(train_df_clean.groupby('tenure_cat').median().sort_values('price').reset_index(), x="tenure_cat", y="price")
fig.show()

In [41]:
drop_attributes.add('tenure')

## built_year
* missing 789

In [42]:
train_df_clean['built_year'].isnull().sum()

789

In [43]:
# fig = px.scatter(train_df_clean[train_df_clean['lat']<10], x='lat', y='lng', color='built_year')
# fig.show()

### Imputation


#### same property has different built year (too noisy)

In [45]:
temp_df = train_df_clean.copy()
temp_df = temp_df.groupby(['property_name']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
temp_df[(temp_df['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df['built_year'].apply(lambda x: len(x)>1))]

Unnamed: 0,property_name,built_year
1280,depot heights,"[2000.0, nan, 2006.0, 1976.0]"
2604,tiong bahru estate,"[nan, 1967.0]"


In [48]:
temp_df = train_df_clean.copy()
temp_df = temp_df.groupby(['property_name','property_type']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
temp_df[(temp_df['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df['built_year'].apply(lambda x: len(x)>1))]

Unnamed: 0,property_name,property_type,built_year
2156,depot heights,hdb,"[2000.0, nan, 2006.0, 1976.0]"
2159,depot heights,hdb 4 rooms,"[2000.0, nan]"


#### Using property_type(property_type_clean), lat, lng to allocate built-year groups
* using raw property_type => can allocate more [nan, built-year] pairs
  ~~~
    temp_df = train_df_clean.copy()
    temp_df['lat_2d'] = temp_df['lat'].round(2)
    temp_df['lng_2d'] = temp_df['lng'].round(2)

    temp_df2 = temp_df.groupby(['property_type_clean','lat_2d', 'lng_2d']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
    temp_df2[(temp_df2['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df2['built_year'].apply(lambda x: len(x)==2))].shape
    > (56, 4)

    temp_df2 = temp_df.groupby(['property_type_clean','lat_2d', 'lng_2d']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
    temp_df2[(temp_df2['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df2['built_year'].apply(lambda x: len(x)==2))].shape
    >(41, 4)
  ~~~

In [91]:
df_ = train_df_clean.copy()

In [154]:
""" grouping properties from finest to broader resolution which only has 1 built_year available
"""
df_ = train_df_clean.copy()
finest_res = 6

def unifier_imputation(df):
    return df.groupby(['lat_res', 'lng_res', 'property_type','subzone','num_beds', 'num_baths'], dropna=False)\
        .apply(DataPreprocessor.impute_built_year_unify)\
            .groupby(['lat_res', 'lng_res', 'property_type'], dropna=False)\
                .apply(DataPreprocessor.impute_built_year_unify)\
                    .groupby(['lat_res', 'lng_res', 'property_type_clean'], dropna=False)\
                        .apply(DataPreprocessor.impute_built_year_unify)\
                            .groupby(['lat_res', 'lng_res'], dropna=False)\
                                .apply(DataPreprocessor.impute_built_year_unify)

for res in range(finest_res, 1, -1):
    df_['lat_res'] = df_['lat'].round(res)
    df_['lng_res'] = df_['lng'].round(res)
    df_ = unifier_imputation(df_)

for res in range(finest_res, 1, -1):
    df_['lat_res'] = df_['lat'].round(res)
    df_['lng_res'] = df_['lng'].round(res)
    df_ = unifier_imputation(df_)

for res in range(finest_res, 1, -1):
    df_['lat_res'] = df_['lat'].round(res)
    df_['lng_res'] = df_['lng'].round(res)
    df_ = unifier_imputation(df_)

# for res in range(2, finest_res+1):
#     df_['lat_res'] = df_['lat'].round(res)
#     df_['lng_res'] = df_['lng'].round(res)
#     df_ = unifier_imputation(df_)   

print(df_['built_year'].isna().sum())

202


In [151]:
# checking more resolutions
for res in range(2, finest_res+1):
    df_['lat_res'] = df_['lat'].round(res)
    df_['lng_res'] = df_['lng'].round(res)
    temp_df1 = df_.groupby(['lat_res', 'lng_res'], dropna=False).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
    display(temp_df1[(temp_df1['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df1['built_year'].apply(lambda x: len(x) == 2))])

Unnamed: 0,lat_res,lng_res,built_year


Unnamed: 0,lat_res,lng_res,built_year
68,1.276,103.835,"[1970.0, nan]"
1619,1.342,103.784,"[1985.0, nan]"


Unnamed: 0,lat_res,lng_res,built_year
82,1.2761,103.8354,"[1970.0, nan]"
1914,1.3423,103.7845,"[1985.0, nan]"


Unnamed: 0,lat_res,lng_res,built_year
83,1.27614,103.83537,"[1970.0, nan]"
1917,1.34234,103.78449,"[1985.0, nan]"


Unnamed: 0,lat_res,lng_res,built_year
83,1.276143,103.835368,"[1970.0, nan]"
1917,1.34234,103.784489,"[1985.0, nan]"


#### increase resolution adding 'furnishing','subzone'

In [67]:
temp_df1 = temp_df.groupby(['property_type','lat_2d', 'lng_2d'])\
    .apply(DataPreprocessor.impute_built_year_1)
temp_df1 = temp_df1.groupby(['property_type','lat_2d', 'lng_2d','furnishing','subzone']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
temp_df1[(temp_df1['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df1['built_year'].apply(lambda x: len(x)==2))].head()

Unnamed: 0,property_type,lat_2d,lng_2d,furnishing,subzone,built_year
166,Condo,1.28,103.79,partial,pasir panjang 2,"[2024.0, nan]"
320,Condo,1.31,103.84,partial,moulmein,"[2023.0, nan]"
322,Condo,1.31,103.84,unfurnished,moulmein,"[2023.0, nan]"
851,Hdb,1.4,103.89,unspecified,matilda,"[2017.0, nan]"
1463,Semi-Detached House,1.36,103.87,unspecified,serangoon garden,"[nan, 2005.0]"


In [68]:
temp_df1 = temp_df.groupby(['property_type','lat_2d', 'lng_2d'])\
    .apply(DataPreprocessor.impute_built_year_1)\
    .groupby(['property_type','lat_2d', 'lng_2d','furnishing','subzone'])\
    .apply(DataPreprocessor.impute_built_year_1)
temp_df1 = temp_df1.groupby(['property_type','lat_2d', 'lng_2d','furnishing','subzone']).apply(lambda x: x['built_year'].unique()).reset_index().rename(columns={0: 'built_year'})
temp_df1[(temp_df1['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df1['built_year'].apply(lambda x: len(x)==2))].head()

Unnamed: 0,property_type,lat_2d,lng_2d,furnishing,subzone,built_year


In [74]:
# temp_df1[(temp_df1['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df1['built_year'].apply(lambda x: len(x) > 2))]
temp_df1 = temp_df.groupby(['property_type','lat_2d', 'lng_2d'])\
    .apply(DataPreprocessor.impute_built_year_1)\
    .groupby(['property_type','lat_2d', 'lng_2d','furnishing','subzone'])\
    .apply(DataPreprocessor.impute_built_year_1)

temp_df1 = temp_df1.groupby(['property_type','lat', 'lng'])\
    .apply(lambda x: x['built_year'].unique())\
        .reset_index()\
            .rename(columns={0: 'built_year'})
temp_df1[(temp_df1['built_year'].apply(lambda x: 'nan' in str(x))) & (temp_df1['built_year'].apply(lambda x: len(x)==2))]

Unnamed: 0,property_type,lat,lng,built_year
332,Condo,1.312942,103.841552,"[2023.0, nan]"
2095,bungalow,1.360245,103.865111,"[nan, 2011.0]"
2499,condo,1.312942,103.841552,"[nan, 2023.0]"
2818,condo,1.357168,103.878167,"[nan, 2018.0]"
5541,hdb 4 rooms,1.27533,103.807211,"[nan, 2017.0]"
6929,semi-detached house,1.329266,103.810931,"[nan, 1983.0]"
6944,semi-detached house,1.332302,103.806979,"[nan, 2011.0]"
6948,semi-detached house,1.33421,103.806576,"[nan, 2011.0]"
6968,semi-detached house,1.344384,103.773833,"[nan, 2005.0]"
7187,terraced house,1.387049,103.872999,"[2013.0, nan]"


In [56]:
temp_df = train_df_clean.copy()
print(temp_df['built_year'].isna().sum())
temp_df = DataPreprocessor.preprocess_built_year(temp_df)
print(temp_df['built_year'].isna().sum())

789
577


* listing_id - unique identifier of the property listing
* title - title of the property listing (e.g., "2 bed condo for sale in 35 gilstead")
* address - address of the property (e.g., "124 punggol walk", "11 sengkang east avenue")
* property_name - name of the property (e.g., "redhill rise", "klimt cairnhill")
* property_type - type of the property (e.g., "condo", "hdb 2 rooms", "landed")
* tenure - tenure of the property (e.g., "freehold", "99-year leasehold")
* built_year - year when the property was was built (e.g., 2014, 2021)
* num_beds - number of bedrooms (e.g., 1, 2, 3)
* num_baths - number of bathrooms (e.g., 1, 2, 3)
* size_sqft - floor area in square feet (e.g., 807, 657, 1628)
* floor_level - information about the floor level of the property (e.g., "high", "low")
* furnishing - information whether the property is furnished (e.g., "fully", "partial")
* available_unit_types - list of all types of units available in the property complex (e.g., "studio, 3, 4, 5 br")
* total_num_units - total number of units in the property complex (e.g., 115, 200)
* property_details_url - URL linking to more information about the property complex
* lat - latitude of property (e.g., 1.328805)
* lng - longitude of property (e.g., 103.74502)
* elevation - elevation of the property in meters (e.g., 10)
* subszone - subzone of block containing the flat in meter (e.g., "blangah rise", "marymount")
* planning_area - planning area of block containing the flat (e.g., "woodlands", "bukit merah")
* price - sales price in SGD