In [None]:
# install necessary library
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
import time
# library to tackle dropdowns
from selenium.webdriver.support.ui import Select
# libraries for explicit waits
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# libraries to help with errors
from selenium.common.exceptions import NoSuchElementException
# liraries that handle anti-bot services (captcha)
from undetected_chromedriver import Chrome
import numpy as np

In [None]:
url = 'https://www.realtor.ca/'
path = 'chromedriver_mac64/chromedriver'
driver = webdriver.Chrome(path)
driver.get(url)
driver.maximize_window()

In [None]:
# get link to toronto real estate listings
toronto_link = driver.find_element(By.XPATH, "//a[@href='/on/toronto/real-estate']")
# click to go toronto listings page
toronto_link.click()

In [None]:
# get filter button
filter_button = driver.find_element(By.XPATH, "//div[@id='mapSearchMoreBtn']")
# click button
filter_button.click()

In [None]:
# find building type dropdown button
dropdown = Select(driver.find_element(By.ID, 'ddlBuildingType'))
# select house 
dropdown.select_by_visible_text('House')
# get search button
filter_search = driver.find_element(By.XPATH, "//div[@id='mapMoreFiltersSearchBtn']")
# click button
filter_search.click()


In [None]:
# get pagination
pagination = driver.find_elements(
    By.XPATH, '//select[contains(@class, "ResultsPages")]')
# fet the pages for the site
pages = pagination[1].find_elements(By.TAG_NAME, 'option')
# get the numerical value of the last page
last_page = int(pages[-1].text)

In [None]:
# assign original window: parent_window
parent_window = driver.current_window_handle

# make necessary empty lists to store data
price = []
address = []
mls_number = []
above_grade_bedrooms = []
below_grade_bedrooms = []
bathrooms = []
community_name = []
land_size = []
annual_prop_tax = []

In [None]:
# define start page
current_page = 1

# check if the original page is less than the last page
while current_page <= last_page:
    
    # wait for 3 seconds
    time.sleep(3)

    # get container of real estate listings
    container = driver.find_element(By.ID, 'listInnerCon')
    # get each listing
    listings = container.find_elements(By.XPATH, "./div")

    # iterate through each listing on the page
    for listing in listings:
        
        # find the listing button
        listing_button = listing.find_element(
            By.XPATH, './/a[contains(@class,"listingDetailsLink")]')
        # click listing button to open a new tab
        listing_button.click()
        
        # wait 10 seconds
        time.sleep(10)

        # assing the handles of the original and new tab to handles
        handles = driver.window_handles
        # iterate through the window handles
        for handle in handles:
            # check if the current handle is not the original handle
            if handle != parent_window:
                # switch control to that new tab
                driver.switch_to.window(handle)
                # wait for 3 seconds
                time.sleep(3)

                # find all necessary data and append to their respective lists
                # if data can not be found append Nan to their lists
                try:
                    price.append(driver.find_element(
                        By.XPATH, '//div[contains(@ id, "listingPriceValue")]').text)
                except NoSuchElementException:
                    price.append(np.NaN)
                try:
                    address.append(driver.find_element(
                        By.XPATH, '//h1[contains(@id, "listingAddress")]').text)
                except NoSuchElementException:
                    address.append(np.NAN)
                try:
                    mls_number.append(driver.find_element(
                        By.XPATH, '//span[contains(@id, "MLNumberVal")]').text)
                except NoSuchElementException:
                    mls_number.append(np.NAN)
                try:
                    above_grade_bedrooms.append(driver.find_element(
                        By.XPATH, '//div[contains(@id, "AboveGrade")]').text)
                except NoSuchElementException:
                    above_grade_bedrooms.append(np.NAN)
                try:
                    below_grade_bedrooms.append(driver.find_element(
                        By.XPATH, '//div[contains(@id, "BelowGrade")]').text)
                except NoSuchElementException:
                    below_grade_bedrooms.append(np.NAN)
                try:
                    bathrooms.append(driver.find_element(
                        By.XPATH, '//div[@id = "propertyDetailsSectionVal_Total"]').text)
                except NoSuchElementException:
                    bathrooms.append(np.NAN)
                try:
                    community_name.append(driver.find_element(
                        By.XPATH, '//div[contains(@id,"CommunityName")]').text)
                except NoSuchElementException:
                    community_name.append(np.NAN)
                try:
                    land_size.append(driver.find_element(
                        By.XPATH, '//div[contains(@id, "LandSize")]').text)
                except NoSuchElementException:
                    land_size.append(np.NAN)
                try:
                    annual_prop_tax.append(driver.find_element(
                        By.XPATH, '//div[contains(@id, "AnnualPropertyTaxes")]').text)
                except NoSuchElementException:
                    annual_prop_tax.append(np.NAN)
                driver.close()
                driver.switch_to.window(parent_window)
    
    # increase current page by 1 (go to the next page)
    current_page += 1
    
    # find the next page button  
    try:
        next_page_buttons = driver.find_elements(
            By.XPATH, '//div[contains(@class, "paginationLinkText")]')
        next_page = next_page_buttons[-2]
        # click to go to the next page
        next_page.click()
    except:
        pass

In [None]:
# end driver
driver.quit()

In [None]:
# create dataframe
real_estate_listings = pd.DataFrame(
    {'mls_number':mls_number,
     'price':price,
     'address':address,
     'above_grade_bedrooms':above_grade_bedrooms,
     'below_grade_bedrooms':below_grade_bedrooms,
     'bathrooms':bathrooms,
     'community_name':community_name,
     'land_size':land_size,
     'annual_prop_tax':annual_prop_tax
     })

In [None]:
#save to csv
real_estate_listings.to_csv('listings.csv',index=False)