In [3]:
from bs4 import BeautifulSoup
import requests

def extract_listing_info(html_content):
    """
    Extracts information about property listings from HTML content.

    Args:
        html_content: The HTML content of the page containing the listings.

    Returns:
        A list of dictionaries, where each dictionary represents a property listing
        and contains the extracted information.
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    listings = []

    listing_cards = soup.find_all('div', {'data-id': 'listing-card-container'})
    
    print(listing_cards)

    for card in listing_cards:
        listing = {}

        # ID
        listing['id'] = card.parent.get('id').replace('listing-list-card-', '')

        # Price
        price_element = card.find('b', class_='relative leading-[140%]')
        if price_element:
            listing['price'] = price_element.text.strip()

        # Location
        location_element = card.find('div', class_='w-full relative leading-[170%]')
        if location_element:
            listing['location'] = location_element.text.strip()

        # Type
        type_element = card.find('b', class_='relative leading-[160%] truncate')
        if type_element:
            listing['type'] = type_element.text.strip()

        # Area
        area_element = card.find_all('b', class_='relative leading-[160%] whitespace-nowrap')[0]
        if area_element:
            listing['area'] = area_element.text.strip()

        # Bedrooms
        bedrooms_element = card.find_all('b', class_='relative leading-[160%] whitespace-nowrap')[1]
        if bedrooms_element:
            listing['bedrooms'] = bedrooms_element.text.strip()

        # Bathrooms
        bathrooms_element = card.find_all('b', class_='relative leading-[160%] whitespace-nowrap')[2]
        if bathrooms_element:
            listing['bathrooms'] = bathrooms_element.text.strip()

        # Agent
        agent_element = card.find('img', class_='w-[3rem] relative rounded h-[3rem] object-cover object-top')
        if agent_element:
            listing['agent'] = agent_element.get('alt')

        # Image URL
        img_element = card.find('img', class_='opacity-100 absolute block top-1/2 left-1/2 w-full h-full -translate-y-1/2 -translate-x-1/2 transition-opacity duration-200 object-center object-cover')
        if img_element:
            listing['image_url'] = img_element.get('data-src')

        # Virtual Tour (check for the presence of the "Visita Virtual" element)
        virtual_tour_element = card.find('b', class_='relative leading-[160%]')
        listing['virtual_tour'] = 'Yes' if virtual_tour_element and virtual_tour_element.text.strip() == 'Visita Virtual' else 'No'

        # Link
        link_element = card.find('a', {'data-id': 'listing-card-link'})
        if link_element:
            listing['link'] = "https://www.idealista.pt" + link_element.get('href')

        listings.append(listing)

    return listings

# Example usage (if you have the HTML content in a file):
# with open("your_html_file.html", "r", encoding="utf-8") as f:
#     html_content = f.read()



In [4]:
# Example usage (if you want to fetch the HTML content from a URL):
url = 'https://www.remax.pt/pt/arrendar/imoveis/habitacao/lisboa/lisboa/r/t,preco__1200?s=%7B%22rg%22%3A%22Lisboa%22%7D&p=1&o=-PublishDate'
response = requests.get(url)
response.encoding = 'utf-8' # Ensure correct encoding
html_content = response.text

listings_data = extract_listing_info(html_content)

# Print the extracted data (or process it further as needed)
for listing in listings_data:
    print(listing)

[]
