In [1]:
# to extract all properties urls (needed to handle with javascript)
from selenium import webdriver 

# to access the html content of a single property url
import requests 

# to select parts of an XML or HTML text using CSS or XPath and extract data from it
from parsel import Selector 


# 1) Obtain 10000 url of houses with webdriver (appartments below)

driver = webdriver.Chrome(executable_path='../web_drivers/chromedriver.exe')

# The url of each house that resulted from the search will be stored in the "houses_url" list.
houses_url = []

# Iterate through all result pages (i) and get the url of each of them
for i in range(1, 334):
    apikey = str(i)+'&orderBy=relevance'
    url = 'https://www.immoweb.be/en/search/house/for-sale?countries=BE&page='+apikey

    # An implicit wait tells WebDriver to poll the DOM for a
    #  certain amount of time when trying to find any element 
    #     (or elements) not immediately available. 
    driver.implicitly_wait(10)
    
    # The first thing you’ll want to do with WebDriver is navigate
    #   to a link. The normal way to do this is by calling get method:    
    driver.get(url)

    # Selector allows you to select parts of an XML or HTML text using CSS
    #   or XPath expressions and extract data from it.
    sel = Selector(text=driver.page_source) 

    # Store the xpath query of houses
    xpath_houses = '//*[@id="main-content"]/li//h2//a/@href'
    
    # Find nodes matching the xpath ``query`` and return the result
    page_houses_url = sel.xpath(xpath_houses).extract()
    
    # There are approximately 30 houses in each page.
    # Add each page url list to houses_url, like in a matrix.
    houses_url.append(page_houses_url)

# Store all houses urls in a csv file
with open('../csv_files/houses_apartments_urls.csv', 'w') as file:
    for page_url in houses_url:
        for url in page_url:
            file.write(url+'\n')

# The url of each appartment that resulted from the search will be stored in the "houses_url" list
apartments_url = []

for i in range(1, 334):
    # We used 'i' to build urls of the 333 page in immoweb.
    #   So, we can reach 333 pages with for loop.
    apikey = str(i)+'&orderBy=relevance'
    url = 'https://www.immoweb.be/en/search/apartment/for-sale?countries=BE&page='+apikey

    # An implicit wait tells WebDriver to poll the DOM for a
    #   certain amount of time when trying to find any element 
    #     (or elements) not immediately available. 
    driver.implicitly_wait(10)
    
    # The first thing you’ll want to do with WebDriver is navigate
    #   to a link. The normal way to do this is by calling get method:    
    driver.get(url)

    # Selector` allows you to select parts of an XML or HTML text using CSS
    #   or XPath expressions and extract data from it.
    sel = Selector(text=driver.page_source) 

    # xpath query of the houses in the immoweb page
    xpath_apartments = '//*[@id="main-content"]/li//h2//a/@href'
    
    # Find nodes matching the xpath ``query`` and return the result
    page_apartments_url = sel.xpath(xpath_apartments).extract()
    
    # There are approximately 30 houses in each page.
    # We add each page url list to houses_url like matrix.
    apartments_url.append(page_apartments_url)

# As with houses, store all appartments urls in the same csv file
with open('../csv_files/houses_apartments_urls.csv', 'a') as file:
    for page_url in apartments_url:
        for url in page_url:
            file.write(url+'\n')

ModuleNotFoundError: No module named 'selenium'

In [None]:
class House:
    def __init__(self, house_dict):
        self.house_dict = house_dict

    def type_sale(self):
        try:
            flags = self.house_dict.get('flags', {})
            if flags.get('isPublicSale'):
                return 'Public Sale'
            elif flags.get('isNotarySale'):
                return 'Notary Sale'
            elif flags.get('isAnInteractiveSale'):
                return 'Interactive Sale'
            else:
                return None
        except KeyError as e:
            print(f"KeyError: {e} is missing in house_dict")
            return None
        except Exception as e:
            print(f"Unexpected error: {e}")
            return None


In [None]:
house_data = {
    'flags': {
        'isPublicSale': True,
        'isNotarySale': False,
        'isAnInteractiveSale': False
    }
}

house = House(house_data)
print(house.type_sale())  # Output: Public Sale


In [3]:
import requests
from bs4 import BeautifulSoup

def get_type_of_sale(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Check for HTTP errors

        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the section that contains the type of sale
        # This might vary, so you need to inspect the page source to find the correct element
        type_of_sale = None

        # Example of extracting the type of sale information (this might need adjustment)
        # Inspect the page to find the right class or ID
        sale_info = soup.find_all('li', class_='classified__transaction')
        for info in sale_info:
            if 'Sale type' in info.text:
                type_of_sale = info.text.strip().replace('Sale type: ', '')
                break

        return type_of_sale

    except requests.RequestException as e:
        print(f"Request error: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

# URL of the specific listing
url = 'https://www.immoweb.be/en/classified/house/for-sale/ans/4430/11479751'
type_of_sale = get_type_of_sale(url)
print(f"Type of Sale: {type_of_sale}")


Request error: 403 Client Error: Forbidden for url: https://www.immoweb.be/en/classified/house/for-sale/ans/4430/11479751
Type of Sale: None


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# Path to your WebDriver
webdriver_path = '../web_drivers/chromedriver.exe'

# Set up Chrome options (optional)
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode

# Create a new instance of the Chrome driver
service = Service(webdriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Navigate to the property page
url = 'https://www.immoweb.be/en/classified/house/for-sale/ans/4430/11479751'
driver.get(url)

try:
    # Locate the sale type element on the page
    sale_type = None
    sale_type_element = driver.find_element(By.XPATH, "//span[contains(text(),'Type of sale')]/following-sibling::span")
    if sale_type_element:
        sale_type = sale_type_element.text
    print(f"Type of sale: {sale_type}")

finally:
    # Close the browser
    driver.quit()


ModuleNotFoundError: No module named 'selenium'