# RentPad Scraper

In [None]:
# Import required libraries
from pprint import pprint
import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

import googlemaps
import os
import time
import random
import csv
import pandas as pd
import sys

sys.path.append(os.path.abspath(".."))
from keys import googlemaps_api_key

In [None]:
gmaps = googlemaps_api_key

In [None]:
# Create an instance of ChromeOptions to configure browser settings
options = uc.options.ChromeOptions()

# Add an argument to start the Chrome browser window maximized
options.add_argument('--start-maximized')
options.add_argument("--disable-popup-blocking")

In [None]:
# Launch a Chrome browser instance using undetected_chromedriver with the specified options
URL = 'https://rentpad.com.ph/long-term-rentals/quezon-city/apartment'
driver = uc.Chrome(options=options, use_subprocess=False)
driver.get(URL)

i = 0

In [None]:
def get_main_container():
    # Get the container for the condos listings
    main_container = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "view-tile-float-wrap"))
    )
    return main_container

In [None]:
def enrich_listing(name, city="Quezon City, Philippines"):
    query = f"{name}, {city}"
    
    # Geocode
    geocode = gmaps.geocode(query)
    if not geocode:
        return {}
    
    location = geocode[0]["geometry"]["location"]
    lat, lng = location["lat"], location["lng"]
    
    # Reverse geocode for address
    address = geocode[0].get("formatted_address", "")
    
    # Extract City/Municipality, Province, Region
    components = geocode[0]["address_components"]
    city_name, province, region = None, None, None
    for comp in components:
        if "locality" in comp["types"]:
            city_name = comp["long_name"]
        elif "administrative_area_level_2" in comp["types"]:
            province = comp["long_name"]
        elif "administrative_area_level_1" in comp["types"]:
            region = comp["long_name"]
    
    # Nearby places
    tags = ["school", "hospital", "shopping_mall", "supermarket", "church", 
            "park", "gym", "restaurant", "bank", 
            "pharmacy", "police", "subway_station", "train_station", "university",
            "transit_station", "bus_station"]
    
    nearby_results = {}
    for tag in tags:
        places = gmaps.places_nearby(location=(lat, lng), radius=500, type=tag)
        nearby_results[tag] = 1 if places.get("results") else 0

    # Reviews & rating
    place_id = geocode[0]["place_id"]
    details = gmaps.place(place_id=place_id, fields=["rating", "user_ratings_total"])
    rating = details["result"].get("rating")
    reviews_count = details["result"].get("user_ratings_total")
    
    return {
        "Latitude": lat,
        "Longitude": lng,
        "Address": address,
        "City": city_name,
        "Province": province,
        "Region": region,
        "Rating": rating,
        "ReviewsCount": reviews_count,
        **nearby_results
    }

In [None]:
def get_condo_details():

    time.sleep(5)

    condos = []
    main_window = driver.current_window_handle

    main_container = get_main_container()
    print(main_container)

    # From the main_container, get all the individual condo listings
    condo_link_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, "div.view-tile-left-floater.listing-holder > a")
        )
    )

    print(f"Found {len(condo_link_elements)} condos on this page.")

    for condo in condo_link_elements:

        link = condo.get_attribute("href")
        print(f"Processing link: {link}")

        parent = condo.find_element(By.XPATH, "./ancestor::div[@class='view-tile-left-floater listing-holder']")
        name = parent.find_element(By.CSS_SELECTOR, "div[style*='min-height: 40px']").text.strip()
        price = parent.find_element(By.CSS_SELECTOR, "td[align='right'] span").text.strip()

        print(f"Name: {name}, Price: {price}")

        print("Opening:", link)

        # Open each condo link in a new tab
        driver.execute_script("window.open(arguments[0]);", link)
        driver.switch_to.window(driver.window_handles[-1])

        # Extract the details of the condo from the new tab
        table_listing_details_container = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.ID, "table-listing-details"))
        )

        soup = BeautifulSoup(table_listing_details_container.get_attribute('outerHTML'), 'html.parser')
        rows = soup.find_all('tr')

        details = {}
        for row in rows:
            cols = row.find_all('td')
            if len(cols) == 2:
                key = cols[0].get_text(strip=True).replace(":", "")
                value = cols[1].get_text(strip=True)

                if key and value:
                    details[key] = value

        try:
            amenities_container = driver.find_element(By.XPATH, "//b[text()='Amenities']/following::table[1]")
            soup_amenities = BeautifulSoup(amenities_container.get_attribute("outerHTML"), "html.parser")

            amenity_rows = soup_amenities.find_all("tr")
            amenities = []

            for row in amenity_rows:
                cols = row.find_all("td")
                if len(cols) == 2:
                    amenity = cols[1].get_text(strip=True)
                    if amenity:
                        amenities.append(amenity)

            # Join with ;
            amenities_str = "; ".join(amenities) if amenities else None

        except:
            amenities_str = None

        # Google Maps
        maps_details = enrich_listing(name)

        condos.append({
            'Name': name,
            'Price': price,
            'Link': link,
            'Amenities': amenities_str,
            **details,
            **maps_details
        })

        # Sleep
        time.sleep(random.uniform(2, 3))

        # Close tab and go back to main window
        driver.close()
        driver.switch_to.window(main_window)

        print(condos)
       
    return condos

In [None]:
all_condos = []
has_next_page = True

while has_next_page:
    # Scrape current page
    condos = get_condo_details()
    all_condos.extend(condos)
    print(f"Scraped {len(all_condos)} condos so far")

    try:
        # Click next page if it exists
        next_page_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.ID, 'btn-page-next'))
        )
        next_page_button.click()
        time.sleep(3) 

        # Wait for the next page to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "view-tile-float-wrap"))
        )

    except:
        # No next page found, stop the loop
        print("No more pages.")
        has_next_page = False

In [None]:
df = pd.DataFrame(all_condos)
df.to_csv('C:\\Users\\Rae\\Desktop\\condo-price-prediction\\Data\\rentpad_long_term_listings.csv', index=False)

In [None]:
df