In [95]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from time import sleep
import sys
import re

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

# Start up

In [2]:
if sys.platform == 'darwin':
    driver = webdriver.Chrome('../webdriver/chromedriver_mac')
elif sys.platform == 'win32':
    driver = webdriver.Chrome('../webdriver/chromedriver_win.exe')

In [3]:
driver.get('https://bungol.ca')

In [4]:
login = driver.find_element_by_link_text('Login/Signup')
login.click()

username = driver.find_element_by_name('username')
password = driver.find_element_by_name('password')

username.send_keys('scottw')
password.send_keys('zojzes-dycpoQ-2nidwy')
password.send_keys(Keys.RETURN)

# Gather properties

Choose an area and set filter conditions in bungol before you run the cells below

In [5]:
urls = []

leftSidebarClose = driver.find_element_by_id('leftSidebarClose')
if not ('leftSidebarClosed' in leftSidebarClose.get_attribute('class')):
    leftSidebarClose.click()

icons = driver.find_elements_by_css_selector('div.leaflet-marker-icon')
for icon in icons:
    actions = ActionChains(driver)
    actions.move_to_element(icon).perform()
    sleep(1)

    popup = driver.find_element_by_id('popup')
    urls += [a.get_attribute('href') for a in popup.find_elements_by_tag_name('a')]
    
    # Clear the popup
    driver.execute_script('arguments[0].innerHTML = ""', popup)

In [6]:
len(urls)

143

In [112]:
details = []

for i, url in enumerate(urls):
    if i % 10 == 0:
        print(f'{i+1} / {len(urls)}')
        
    driver.get(url)
    driver.find_element_by_link_text('Community').click()
    wait = WebDriverWait(driver, 5, poll_frequency=1)
    wait.until(EC.text_to_be_present_in_element((By.ID, 'listingCommunityAverageIncome'), ','))
    
    soup = BeautifulSoup(driver.page_source)
    s = pd.Series()

    # Variables whose format is <div id="#...">value</div>
    info = {
        'Address': '#listingAddress',
        'PropertyType': '#listingPropertyType',
        'AvgHouseholdIncome': '#listingCommunityAverageIncome',
        'AvgHouseholdSize': '#listingCommunityHouseholdSize'
    }
    
    for variable, selector in info.items():
        value = soup.select(selector)[0].get_text()
        s[variable] = value

    # Variables whose format is <div id="#...">label: value</div>
    info = {
        'ListPrice': '#listingListPrice',
        'ListingDate': '#listingContractDate',
        'SoldDate': '#listingEndDate',
        'SquareFootage': '#listingSummarySqFt',
        'MaintenanceFee': '#listingSummaryMaintenanceFees',
        'PropertyTax': '#listingSummaryTaxes',
    }

    for variable, selector in info.items():
        tmp = soup.select(selector)[0].get_text().split(':')
        value = tmp[1] if len(tmp) == 2 else None
        s[variable] = value

    # Some special cases
    tmp = soup.select('#listingStatus')[0].get_text().split(' - ')
    s['Status'], s['Price'] = tmp if len(tmp) == 2 else (tmp[0], None)

    tmp = soup.select('#listingBedBath')[0].get_text().split('|')
    s['Bedrooms'], s['Bathrooms'], s['ParkingSpots'] = tmp if len(tmp) == 3 else (None, None, None)
    
    s = s.str.strip()

    # Condo fee coverage, if applicable
    if driver.find_element_by_id('listingCondoFeesTable') is not None:
        expenses = pd.Series({
            'Water': 38.47,
            'Heat': 104.9,
            'Insurance': 63.72,
            'CAC': 0,             # included in hydro
            'Hydro': 98.46,
            'Parking': 200
        })

        for item in ['Water', 'Heat', 'Insurance', 'CAC', 'Hydro', 'Parking', 'Taxes']:
            tmp = driver.find_element_by_id(f'listingCondoFeesTable{item}')
            isCovered = 'bg-success' in tmp.get_attribute('class')

            if item == 'Taxes':
                s['IsTaxesIncluded'] = isCovered
            elif isCovered:
                expenses[item] = 0

        s['UncoveredExpenses'] = expenses.astype('float').sum()
    
    s['URL'] = url
    details.append(s)

1 / 143
11 / 143
21 / 143
31 / 143
41 / 143
51 / 143
61 / 143
71 / 143
81 / 143
91 / 143
101 / 143
111 / 143
121 / 143
131 / 143
141 / 143


In [113]:
df = pd.DataFrame(details)

In [114]:
s = df['Address'].str.extract(r'(.\d+) - (\d.+)', expand=True)
s.columns = ['Unit', 'Address']

df['Unit'] = s['Unit'].fillna('')
df['Address'] = s['Address'].combine_first(df['Address'])

In [115]:
cols = ['ListingDate', 'SoldDate']
df[cols] = df[cols].apply(pd.to_datetime, axis=1)
df['DaysOnMarket'] = (df['SoldDate'] - df['ListingDate']) / pd.Timedelta(days=1)

In [116]:
cols = ['ListPrice', 'Price', 'MaintenanceFee', 'PropertyTax', 'AvgHouseholdIncome', 'AvgHouseholdSize']
df[cols] = df[cols].apply(lambda col: col.str.replace(r'[^0-9\.]', '')).apply(pd.to_numeric)

In [117]:
df['AvgPersonalIncome'] = df['AvgHouseholdIncome'] / df['AvgHouseholdSize']

The reported tax on the listing looks lower than it really is. We take the *greater* of the reported property tax and Toronto's property tax rate of 0.51%.

In [118]:
df['PropertyTax'] = pd.DataFrame({
    'ReportedTax': df['PropertyTax'],
    'CalculatedTax': df['ListPrice'] * 0.0051 / 12
}).max(axis=1)

In [119]:
df['MonthlyExpenses'] = df['MaintenanceFee'] + df['UncoveredExpenses']

In [120]:
cond = df['IsTaxesIncluded'] == False
df.loc[cond, 'MonthlyExpenses'] += df.loc[cond, 'PropertyTax']

In [121]:
df['Bedrooms'] = df['Bedrooms'].apply(eval)

cols = ['Bathrooms', 'ParkingSpots']
df[cols] = df[cols].apply(pd.to_numeric)

In [125]:
def make_hyperlink(cell):
    return f'<a target="_blank" href="{cell}">Link</a>'
    
df[['PropertyType', 'Unit', 'Address', 'URL', 'Status', 'SquareFootage', 'ListPrice', 'MonthlyExpenses', 'Bedrooms', 'Bathrooms', 'ParkingSpots', 'AvgPersonalIncome']] \
    .sort_values('MonthlyExpenses').reset_index(drop=True) \
    .style.format({
        'ListPrice': '{:,.0f}',
        'PropertyTax': '{:,.2f}',
        'MonthlyExpenses': '{:,.2f}',
        'AvgPersonalIncome': '{:,.0f}',
        'URL': make_hyperlink
    })

Unnamed: 0,PropertyType,Unit,Address,URL,Status,SquareFootage,ListPrice,MonthlyExpenses,Bedrooms,Bathrooms,ParkingSpots,AvgPersonalIncome
0,Condo Apartment,#511,"2800 Keele St, North York",Link,For Sale,600-699,464900,466.93,2,1,1,25490
1,Condo Apartment,#619,"2800 Keele St, North York",Link,For Sale,600-699,539000,566.93,2,1,1,25490
2,Condo Apartment,#301,"2800 Keele St, North York",Link,For Sale,900-999,575000,566.93,2,2,1,25490
3,Condo Apartment,#715,"872 Sheppard Ave W, North York",Link,For Sale,800-899,549000,595.83,3,2,1,36643
4,Townhouse (Condo),#117,"19 Coneflower Cres, North York",Link,For Sale,700-799,495000,679.2,3,1,1,32424
5,Condo Apartment,#517,"2800 Keele St, North York",Link,For Sale,600-699,468000,681.83,2,1,1,25490
6,Condo Apartment,#1211,"233 Beecroft Rd, North York",Link,For Sale,600-699,483999,702.17,2,1,1,33848
7,Townhouse (Condo),#356,"3 Everson Dr, North York",Link,For Sale,800-899,509000,744.8,2,1,1,48726
8,Condo Apartment,#108,"26 Olive Ave, North York",Link,Offer Pending,600-699,480000,748.47,2,1,1,30682
9,Townhouse (Condo),b29,"108 Finch Ave W, North York",Link,For Sale,1000-1199,599000,750.83,2,2,1,26816


In [123]:
# .query('(ListPrice > 100000) & (Status == "For Sale") & (MonthlyExpenses < 800)') \