In [None]:
#Import required selenium modules
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

#Import webdriver_manager for easy management (auto-updation, global path) of driver executable
from webdriver_manager.chrome import ChromeDriverManager

import time #time for various wait/sleep functions
import csv #csv for storing the scraped data

import pandas as pd

#url
url = "https://www.olx.co.id/bogor-kota_g4000021/q-rumah"

#Set chromedriver location (automated using webdriver-manager)
service = Service(executable_path=ChromeDriverManager().install())
#create a chrome webdriver instance
driver = webdriver.Chrome(service=service)
#navigate the browser instance to the input_url
driver.get(url)

while True:
    '''
    While loop to load all results for the search query by finding and clicking on the "Load More" button unless
    the button doesn't exist i.e., all results loaded, no more results to load. Without this, only the first 20 listings
    will be scraped
    '''
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@data-aut-id="btnLoadMore"]')))
        driver.find_element(By.XPATH, '//*[@data-aut-id="btnLoadMore"]').click()
        print("Clicked 'Load More'")
    except ( NoSuchElementException, TimeoutException, StaleElementReferenceException ):
        break

'''FIND LISTINGS AND STORE TO A LIST'''
#Finding the UL element containing all the search result items (as list items)
# listings_list = driver.find_element(By.CSS_SELECTOR, "#container > main > div > div > section > div > div > div:nth-child(6) > div._3etsg > div > div:nth-child(2) > ul")
car_listing_ul = None
all_uls = driver.find_elements(By.TAG_NAME, 'ul')
for uls in all_uls:
    if uls.get_attribute("data-aut-id") == "itemsList":
        car_listing_ul = uls

#Finding All list items (li) inside the UL list
listings = car_listing_ul.find_elements(By.TAG_NAME, "li")

#Initialize empty list to store scrapped data (for further writing on a csv file)
scraped_datas = []

#Loop through each list element (list item) and further find required info from their respective XPath
for listing in listings:
    if listing.get_attribute('data-aut-id') == 'itemBox':
        try:
            ad_link = listing.find_element(By.XPATH, "./a").get_attribute("href")
        except NoSuchElementException:
            print("No ad_link")
            break
        try:
            ad_price = listing.find_element(By.CSS_SELECTOR, "span[data-aut-id='itemPrice']").text
        except NoSuchElementException:
            print("No ad_price")
        try:
            ad_title = listing.find_element(By.CSS_SELECTOR, "span[data-aut-id='itemTitle']").text
        except NoSuchElementException:
            print("No ad_title")
        try:
            ad_location = listing.find_element(By.CSS_SELECTOR, "span[data-aut-id='itemDetails']").text
        except NoSuchElementException:
            print("No ad_detail")  
        try:
            ad_location = listing.find_element(By.CSS_SELECTOR, "span[data-aut-id='item-location']").text
        except NoSuchElementException:
            print("No ad_location")    
        scraped_datas.append([ad_link, ad_price, ad_title, ad_detail, ad_location])

'''WRITE SCRAPED LIST TO CSV'''
with open(f'scraped_data/bogor-house-price-olx.csv', 'w', newline='', encoding="utf-8") as file: 
    writer = csv.writer(file)
    headers = ['Ad Link','Price', 'Ad Title', 'Ad Detail', 'Ad Location']
    writer.writerow(headers)
    for data in scraped_datas:
        writer.writerow(data)

In [12]:
df = pd.read_csv("scraped_data/bogor-house-price-olx.csv")

In [13]:
df.head()

Unnamed: 0,Ad Link,Price,Ad Title,Ad Detail,Ad Location
0,https://www.olx.co.id/item/rumah-adem-strategi...,Rp 4.900.000.000,Rumah adem strategis dekat tol di perumahan fa...,5 KT - 3 KM - 200 m2,"BOGOR BARAT - KOTA, BOGOR KOTA"
1,https://www.olx.co.id/item/sewa-unit-murah-bog...,Rp 3.450.000,Sewa unit murah bogor icon,1 KT - 1 KM - 26 m2,"BOGOR UTARA - KOTA, BOGOR KOTA"
2,https://www.olx.co.id/item/dijual-rumah-di-vil...,Rp 630.000.000,Dijual Rumah di Vila Bogor Indah 5,2 KT - 1 KM - 65 m2,"TANAH SEREAL, BOGOR KOTA"
3,https://www.olx.co.id/item/di-jual-rumah-baru-...,Rp 450.000.000,Di jual rumah baru siap huni,2 KT - 1 KM - 63 m2,"CIOMAS, BOGOR KAB."
4,https://www.olx.co.id/item/dijual-rumah-2-lant...,Rp 2.500.000.000,Dijual rumah 2 lantai siap huni di kota bogor,5 KT - 6 KM - 400 m2,"BOGOR SELATAN - KOTA, BOGOR KOTA"
