Below is a Python snippet that uses BeautifulSoup to scrape data for two properties from Immoweb:

In [19]:
import requests
from bs4 import BeautifulSoup

def get_property_price(url):
    """
    Get the price of a property from the given URL.

    Args:
        url (str): The URL of the property page.

    Returns:
        str: The price of the property.
    """
    # Send a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the price element by <p> tag
    price_element = soup.find('p', class_='classified__price').text.strip().replace('\n', '')

    return price_element

# Define the URL of the property page
url = "https://www.immoweb.be/nl/zoekertje/appartement/te-koop/gavere/9890/11140877"

# Get the price of the property
price = get_property_price(url)

# Print the price
print("Price:", price)


Price: € 419.760*                                                419760€


In [17]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_immoweb_data(url):
    """
    Scrape data from an Immoweb classified page.

    Args:
        url (str): The URL of the Immoweb classified page.

    Returns:
        None
    """
    # Set up Selenium WebDriver (make sure to install the appropriate driver for your browser)
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        # Load the page
        driver.get(url)

        # Wait for the table to be present
        table = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, 'classified-table'))
        )

        # Extract every element in the table
        rows = table.find_elements(By.TAG_NAME, 'tr')  # Find all rows in the table

        for row in rows:
            columns = row.find_elements(By.TAG_NAME, 'td')  # Find all columns (cells) in the row

            # Find the associated header for each cell in the row
            header = row.find_element(By.TAG_NAME, 'th').text.strip()

            for column in columns:
                # Extract and print the text content of the cell along with its associated header
                print(f"{header}: {column.text.strip()}")

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URL
url = "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049"

# Call the function to scrape data
scrape_immoweb_data(url)


Available as of: After signing the deed
Construction year: 1926
Number of floors: 2
Building condition: Just renovated
Street frontage width: 6 m
Number of frontages: 3
Covered parking spaces: 2
Outdoor parking spaces: 2
Surroundings type: Living area (residential, urban or rural)


In [13]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_immoweb_data(url, csv_file):
    """
    Scrape data from an Immoweb classified page and save it to a CSV file.

    Args:
        url (str): The URL of the Immoweb classified page.
        csv_file (str): The name of the CSV file to save the data to.

    Returns:
        None
    """
    # Set up Selenium WebDriver (make sure to install the appropriate driver for your browser)
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        # Load the page
        driver.get(url)

        with open(csv_file, mode='w', newline='', encoding='utf-8') as file:  # Specify UTF-8 encoding
            writer = csv.writer(file)

            # Find all tables with class "classified-table"
            tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

            # Loop through each table
            for table in tables:
                # Find all rows in the table
                rows = table.find_elements(By.TAG_NAME, 'tr')

                for row in rows:
                    # Find all cells in the row
                    cells = row.find_elements(By.TAG_NAME, 'td')

                    # Check if the row contains header cells (th) or data cells (td)
                    if row.find_elements(By.TAG_NAME, 'th'):
                        # If the row contains header cells, extract the header text
                        header = row.find_element(By.TAG_NAME, 'th').text.strip()
                        row_data = [header] + [cell.text.strip() for cell in cells]
                    else:
                        # If the row contains data cells only, create a row with empty header
                        row_data = [""] + [cell.text.strip() for cell in cells]

                    # Write the row data to the CSV file
                    writer.writerow(row_data)

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URL
url = "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049"

# Define the CSV file name
csv_file = "immoweb_data.csv"

# Call the function to scrape data and save to CSV
scrape_immoweb_data(url, csv_file)


In [20]:
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_urls(start_page, last_page, num_urls=3):
    """
    Scrape property URLs from Immoweb for a range of pages.

    Args:
        start_page (int): The starting page number.
        last_page (int): The ending page number.
        num_urls (int): The number of URLs to scrape. Default is 3.

    Returns:
        None
    """
    # Set up Selenium WebDriver
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        url_count = 0  # Initialize URL count
        for page in range(start_page, last_page + 1):
            # Construct the URL
            url = f'https://www.immoweb.be/nl/zoeken/huis-en-appartement/te-koop?countries=BE&page={page}&orderBy=relevance'

            # Load the page
            driver.get(url)

            # Find all property URLs
            urls = driver.find_elements(By.CSS_SELECTOR, 'a.card__title-link')

            for url_element in urls:
                property_url = url_element.get_attribute('href')
                print(property_url)
                url_count += 1  # Increment URL count
                if url_count == num_urls:
                    break  # Exit the loop if desired number of URLs is reached
            if url_count == num_urls:
                break  # Exit the outer loop if desired number of URLs is reached

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Example usage: scrape URLs from page 1 to 2 with a limit of 3 URLs
scrape_urls(1, 2)


https://www.immoweb.be/nl/zoekertje/appartement/te-koop/evere/1140/11050526
https://www.immoweb.be/nl/zoekertje/appartement/te-koop/evere/1140/11012764
https://www.immoweb.be/nl/zoekertje/penthouse/te-koop/ukkel/1180/11154663


In [29]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_immoweb_data(urls, csv_file):
    """
    Scrape data from Immoweb classified pages and save it to a CSV file.

    Args:
        urls (list): A list of URLs of Immoweb classified pages.
        csv_file (str): The name of the CSV file to save the data to.

    Returns:
        None
    """
    # Set up Selenium WebDriver (make sure to install the appropriate driver for your browser)
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        with open(csv_file, mode='w', newline='', encoding='utf-8') as file:  # Specify UTF-8 encoding
            writer = csv.writer(file)

            for url in urls:
                # Load the page
                driver.get(url)

                # Find all tables with class "classified-table"
                tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

                # Loop through each table
                for table in tables:
                    # Find all rows in the table
                    rows = table.find_elements(By.TAG_NAME, 'tr')

                    for row in rows:
                        # Find all cells in the row
                        cells = row.find_elements(By.TAG_NAME, 'td')

                        # Check if the row contains header cells (th) or data cells (td)
                        if row.find_elements(By.TAG_NAME, 'th'):
                            # If the row contains header cells, extract the header text
                            header = row.find_element(By.TAG_NAME, 'th').text.strip()
                            row_data = [header] + [cell.text.strip() for cell in cells]
                        else:
                            # If the row contains data cells only, create a row with empty header
                            row_data = [""] + [cell.text.strip() for cell in cells]

                        # Write the row data to the CSV file
                        writer.writerow(row_data)

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URLs
urls = [
    "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049",    
    "https://www.immoweb.be/en/classified/house/for-sale/schaerbeek/1030/11150110"
]

# Define the CSV file name
csv_file = "immoweb_data.csv"

# Call the function to scrape data and save to CSV
scrape_immoweb_data(urls, csv_file)


In [33]:
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_immoweb_data(urls, csv_file):
    """
    Scrape data from Immoweb classified pages and save it to a CSV file.

    Args:
        urls (list): A list of URLs of Immoweb classified pages.
        csv_file (str): The name of the CSV file to save the data to.

    Returns:
        None
    """
    # Set up Chrome options for headless mode
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode

    # Set up Selenium WebDriver with Chrome options
    driver = webdriver.Chrome(options=chrome_options)  # Change this to the appropriate driver for your browser

    try:
        for url in urls:
            # Load the page
            driver.get(url)
            
            # Find all tables with class "classified-table"
            tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

            # Loop through each table
            for table in tables:
                # Find all rows in the table
                rows = table.find_elements(By.TAG_NAME, 'tr')

                for row in rows:
                    # Find all cells in the row
                    cells = row.find_elements(By.TAG_NAME, 'td')

                    # Check if the row contains header cells (th) or data cells (td)
                    if row.find_elements(By.TAG_NAME, 'th'):
                        # If the row contains header cells, extract the header text
                        header = row.find_element(By.TAG_NAME, 'th').text.strip()
                        row_data = [header] + [cell.text.strip() for cell in cells]
                    else:
                        # If the row contains data cells only, create a row with empty header
                        row_data = [""] + [cell.text.strip() for cell in cells]

                    # Write the row data to the CSV file
                    with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
                        writer = csv.writer(file)
                        writer.writerow(row_data)

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URLs
urls = [
    "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049",
    "https://www.immoweb.be/en/classified/house/for-sale/schaerbeek/1030/11150110"
]

# Define the CSV file name
csv_file = "immoweb_data.csv"

# Call the function to scrape data and save to CSV
scrape_immoweb_data(urls, csv_file)


In [38]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_immoweb_data(urls, csv_file):
    """
    Scrape data from Immoweb classified pages and save it to a CSV file.

    Args:
        urls (list): A list of URLs of Immoweb classified pages.
        csv_file (str): The name of the CSV file to save the data to.

    Returns:
        None
    """
    # Set up Selenium WebDriver
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        for url in urls:
            # Load the page
            driver.get(url)
            
            # Find all tables with class "classified-table"
            tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

            # Print tables found
            print("Tables found on page:", url)
            for idx, table in enumerate(tables, 1):
                print(f"Table {idx}:")
                print(table.text)  # Print the text content of the table

                # Optionally, you can print the HTML content of the table
                # print(table.get_attribute('innerHTML'))

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URLs
urls = [
    "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049",
    "https://www.immoweb.be/en/classified/house/for-sale/schaerbeek/1030/11150110"
]

# Define the CSV file name
csv_file = "immoweb_data.csv"

# Call the function to scrape data and save to CSV
scrape_immoweb_data(urls, csv_file)


Tables found on page: https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049
Table 1:
Available as of After signing the deed
Construction year 1926
Number of floors 2
Building condition Just renovated
Street frontage width 6 m
Number of frontages 3
Covered parking spaces 2
Outdoor parking spaces 2
Surroundings type Living area (residential, urban or rural)
Table 2:
Living area 107 m²
square meters
Living room surface 30 m²
square meters
Dining room Yes
How many fireplaces? 1
Kitchen type USA hyper equipped
Kitchen surface 9 m²
square meters
Bedrooms 3
Bedroom 1 surface 9 m²
square meters
Bedroom 2 surface 16 m²
square meters
Bedroom 3 surface 17 m²
square meters
Dressing room No
Bathrooms 1
Toilets 2
Laundry room Yes
Office No
Professional space No
Basement surface 9 m²
square meters
Attic surface 17 m²
square meters
Isolated Yes
Armored door No
Table 3:
Surface of the plot 250 m²
square meters
Land is facing street No
Wooded land No
Plot at rear Yes
Flat land Yes

This script will write the text content of each table found on the specified URLs to a .txt file named immoweb_data.txt. Each table's content will be separated by newline characters for better readability. Adjust the file paths and URLs as needed.

In [39]:
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_immoweb_data(urls, txt_file):
    """
    Scrape data from Immoweb classified pages and save it to a text file.

    Args:
        urls (list): A list of URLs of Immoweb classified pages.
        txt_file (str): The name of the text file to save the data to.

    Returns:
        None
    """
    # Set up Selenium WebDriver
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        with open(txt_file, 'w', encoding='utf-8') as file:
            for url in urls:
                # Load the page
                driver.get(url)
                
                # Find all tables with class "classified-table"
                tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

                # Write tables found to the text file
                file.write(f"Tables found on page: {url}\n")
                for idx, table in enumerate(tables, 1):
                    file.write(f"Table {idx}:\n")
                    file.write(table.text + "\n\n")  # Write the text content of the table to the file

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

# Define the URLs
urls = [
    "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049",
    "https://www.immoweb.be/en/classified/house/for-sale/schaerbeek/1030/11150110"
]

# Define the text file name
txt_file = "immoweb_data.txt"

# Call the function to scrape data and save to a text file
scrape_immoweb_data(urls, txt_file)


This script will create multiple threads, each responsible for scraping data from one URL. It uses a lock to synchronize file writes to ensure that data is written correctly to the text file. The total execution time of the script is printed at the end. Adjust the URLs and file paths as needed.

In [None]:
import threading
import time
from selenium import webdriver
from selenium.webdriver.common.by import By

def scrape_immoweb_data(url, txt_file, lock):
    """
    Scrape data from an Immoweb classified page and save it to a text file.

    Args:
        url (str): The URL of the Immoweb classified page.
        txt_file (str): The name of the text file to save the data to.
        lock (threading.Lock): A lock to synchronize file writes.

    Returns:
        None
    """
    # Set up Selenium WebDriver
    driver = webdriver.Chrome()  # Change this to the appropriate driver for your browser

    try:
        # Load the page
        driver.get(url)
        
        # Find all tables with class "classified-table"
        tables = driver.find_elements(By.CSS_SELECTOR, 'table.classified-table')

        # Write tables found to the text file
        with lock:
            with open(txt_file, 'a', encoding='utf-8') as file:
                file.write(f"Tables found on page: {url}\n")
                for idx, table in enumerate(tables, 1):
                    file.write(f"Table {idx}:\n")
                    file.write(table.text + "\n\n")  # Write the text content of the table to the file

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Quit the WebDriver
        driver.quit()

def main():
    # Define the URLs
    urls = [
        "https://www.immoweb.be/en/classified/house/for-sale/anderlecht/1070/11150049",
        "https://www.immoweb.be/en/classified/house/for-sale/schaerbeek/1030/11150110"
    ]

    # Define the text file name
    txt_file = "immoweb_data.txt"

    # Create a lock for file writing
    lock = threading.Lock()

    start_time = time.time()

    # Create threads for scraping data from each URL
    threads = []
    for url in urls:
        thread = threading.Thread(target=scrape_immoweb_data, args=(url, txt_file, lock))
        thread.start()
        threads.append(thread)

    # Wait for all threads to finish
    for thread in threads:
        thread.join()

    end_time = time.time()

    # Calculate and print the total execution time
    print(f"Total execution time: {end_time - start_time:.2f} seconds")

if __name__ == "__main__":
    main()
