# Get Address

Get local street address from condo website.

We will link the condo name -> address -> transaction prices to generate a heat map. 


In [2]:
import requests
import string
import time

from bs4 import BeautifulSoup

Let's see what happens when we request one condo page's contents

In [4]:
r = requests.get("https://www.99.co/singapore/condos-apartments/astrid-meadows")
r.text[:200]

'<!doctype html>\n<html lang="en-us" data-reactroot="" data-reactid="1" data-react-checksum="1968754706"><head data-reactid="2"><title data-react-helmet="true" data-reactid="3">Astrid Meadows Condo - Pr'

Use BeautifulSoup to parse the page contents

In [5]:
soup = BeautifulSoup(r.text, 'html5lib')
page_headings = soup.find_all('h5')
len(page_headings)

6

In [6]:
title_headings = soup.find_all('h1')
for h in title_headings:
    print(h)
    print(h.text)

<h1 class="Heading__heading__2ncUp" data-reactid="284" itemprop="name">Astrid Meadows</h1>
Astrid Meadows


We can get the street address of the property from the page html

In [7]:
address_str = page_headings[1].text
address_str

'District 10 - 38 Coronation Road West - Condo\xa0View on map'

In [8]:
address_parts = address_str.split('-')
street_address = address_parts[1].strip()
street_address

'38 Coronation Road West'

Now that we have the local street, we can use geocode to get the condo's geo coordinates. 


In [9]:
geocode_request = "https://geocode.xyz/%s?geoit=xml&region=SG" % (street_address)
r = requests.get(geocode_request)
r.text


'<?xml version="1.0" encoding="UTF-8" ?>\n<geodata>\n        <latt>1.31930</latt>\n        <longt>103.79462</longt>\n<elevation></elevation>\n\n<standard><addresst>CORONATION ROAD WEST</addresst><region>SG</region><postal>269257</postal><stnumber>38</stnumber><prov>SG</prov><city>Singapore</city><countryname>Singapore</countryname><confidence>1.00</confidence></standard>\n<alt>\n\n</alt>\n</geodata>\n'

In [10]:
soup = BeautifulSoup(r.text, 'html5lib')
props = soup.find_all('geodata')
latt = props[0].find('latt')
longt = props[0].find('longt')
longitude = float(longt.text)
latitude = float(latt.text)

In [11]:
longitude

103.79462

In [12]:
latitude

1.3193

## Putting It All Together

Now that we know the small pieces work, we can now productionise the small function into a powerful tool. 

* get_geocode_from_address
  * input: address
  * output: latitude, longitude

Function to return latitude and longitude given an address

In [12]:
def get_geocode_from_address(address):
    geocode_request = "https://geocode.xyz/%s?geoit=xml&region=SG" % (address)
    r = requests.get(geocode_request)
    r.text
    
    soup = BeautifulSoup(r.text, 'html5lib')
    props = soup.find_all('geodata')
    latt = props[0].find('latt')
    longt = props[0].find('longt')
    longitude = float(longt.text)
    latitude = float(latt.text)
    return (latitude, longitude)


* get_street_address_from_condo_page
  * input: condo page links
  * output: address

Function to return a street address given a condo page link

In [13]:
def get_condo_page_soup_contents_from_condo_page(condo_link):
    num_retries = 3
    while num_retries > 0:
        r = requests.get(condo_link)
        if len(r.text) > 1:
            soup = BeautifulSoup(r.text, 'html5lib') 
            return soup    
        num_retries = num_retries - 1
        time.sleep(5)
    return None      
    

In [33]:
def get_street_address_from_condo_page(condo_soup):
    title_heading = condo_soup.find_all('h1')
    if len(title_heading) > 0:
        condo_name = title_heading[0].text    
        page_headings = condo_soup.find_all('h5')
        address_str = page_headings[1].text
        address_parts = address_str.split('-')
        street_address = address_parts[1].strip()
        return (condo_name, street_address)
    return ('', '')

condo_link = 'https://www.99.co/singapore/condos-apartments/astrid-meadows'
condo_soup = get_condo_page_soup_contents_from_condo_page(condo_link)
(condo_name, address) = get_street_address_from_condo_page(condo_soup)
assert(condo_name == 'Astrid Meadows')
assert(address == '38 Coronation Road West')

In [45]:
def get_avg_sale_price_from_condo_page(condo_soup):
    divs = condo_soup.find_all('div', class_="TransactionsList__TransactionsStats__28YGE")
    print(len(divs))
        
    
    '''
    <div class="TransactionsList__KeyStats_Value__3Pvxw">S$1,708.0</div>
    
    <div class="TransactionsList__TransactionsStats__28YGE">
    <div class="TransactionsList__KeyStatsRent__1iwwN">
    <div class="TransactionsList__KeyStats_Header__1qADp">Avg.price (psf)</div>
    <div class="TransactionsList__KeyStats_Value__3Pvxw">S$1,708.0</div>
    <div class="TransactionsList__KeyStats_Timeframe__1R0Ty">last 6 months</div></div>
    <div class="TransactionsList__KeyStatsSale__37DhJ">
    <div class="TransactionsList__KeyStats_Header__1qADp">Avg.price (psf)</div>
    <div class="TransactionsList__KeyStats_Value__3Pvxw">S$1,690.0</div>
    <div class="TransactionsList__KeyStats_Timeframe__1R0Ty">last 1 year</div></div></div>
    '''
    
get_avg_sale_price_from_condo_page(condo_soup)
    

0


--

Let's read in the condo page links- get their addresses and recent transction data.

Should save condo data to sqlite, but let's keep it simple and just save each transaction one per row. 

CONDO_ID, NAME, ADDRESS, LONG, LAT, SALES ID, DATE, NUM_BRMS, PRICE, SQFT, PSF


* get_condo_page_links_from_saved_file
  * input: file of condo page links
  * output: array of condo page links

Function to read the list of condo webpage links and return the list as an array

In [16]:
def get_condo_page_links_from_saved_file(file_of_condo_links):
    condo_links = []
    with open(file_of_condo_links, 'r') as infile:
        for line in infile:
            condo_links.append(line)
    return condo_links

# test and assert
condo_links = get_condo_page_links_from_saved_file('all_links.txt')
assert(len(condo_links)>2300)

Function to return the unique name key of a condo given its page link url

In [17]:
def get_condo_keyid_from_weburl(condo_link):
    # the condo keyid is the last part of the weburl
    idx = condo_link.rfind('/')
    condo_key = condo_link[idx+1:]
    return condo_key

condo_key = get_condo_keyid_from_weburl('https://www.99.co/singapore/condos-apartments/astrid-meadows')
assert(condo_key == 'astrid-meadows')

Let's try out getting the names and addresses for just five condos

In [19]:
details = []
condo_links = get_condo_page_links_from_saved_file('all_links.txt')
for i in range(0,5):
    condo_link = condo_links[i].strip()
    condo_key = get_condo_keyid_from_weburl(condo_link)    
    condo_soup = get_condo_page_soup_contents_from_condo_page(condo_link)
    (condo_name, address) = get_street_address_from_condo_page(condo_soup)
    (latitude, longitude) = get_geocode_from_address(address)
    report_line = "%s,%s,%s,%s,%s" % (latitude, longitude, condo_key, condo_name, address)
    print(report_line)
    details.append(report_line)
    time.sleep(5)
    
with open("condo_report.txt", 'w') as outfile:
    for det in details:
        outfile.write(det)
        outfile.write('\n')
    

1.3193,103.79462,astrid-meadows,Astrid Meadows,38 Coronation Road West
1.33058,103.86763,ao-jiang-apartments,Ao Jiang Apartments,15 Meyappa Chettiar Road
1.35001,103.88329,affluence-court,Affluence Court,8 Ah Soo Garden
1.32619,103.85107,ampas-apartment,Ampas Apartment,5 Jalan Ampas
1.33987,103.75607,acacia-lodge,Acacia Lodge,530 Bukit Batok Street 23
