In [3]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

## List of all areas

In [49]:
df_areas= pd.read_csv("bayut_area_guides.csv")

In [None]:
df_areas.head(2)

In [None]:
options = uc.ChromeOptions()
options.headless = True  # Run in headless mode
driver = uc.Chrome(options=options)
headers = []

In [32]:
headers=["about-","in-a-nutshell",
         "community-overview","properties-in"
         ,"transportation-and-parking-spaces","supermarkets-near","mosques-near",
         "other-places-of-worship","schools-near","clinics-and-hospitals-near","nearby-areas","malls-near"
         ,"restaurants-near","beaches-near","leisure-activities-and-notable-landmarks","outdoor-activities,-fitness-and-beauty"
         ,"community-events","social-community-groups","things-to-consider","location"]

In [None]:
data=[]

In [35]:
def get_markdown(section,headers):
    markdown_content = []
    current = driver.execute_script('return arguments[0].nextElementSibling', section)
    while True:
        try:
            print(current.get_attribute('class'))
            # Check if the current element has the markdown-elements class
            if 'markdown-elements' in current.get_attribute('class'):
                markdown_content.append(current.text)
            else:
                current_id = current.get_attribute('id')
                if any(current_id and (current_id.startswith(header.strip('*')) for header in headers)):
                    break
                else:
                    markdown_content.append(current.text)
            
            # Move to the next sibling
            current = driver.execute_script('return arguments[0].nextElementSibling', current)
            # Break if there are no more siblings
            if not current:
                break
                
        except Exception as e:
            print(f"Error: {e}")
            break
    
    #print(f"Found {len(markdown_content)} markdown sections")
    #print("\n".join(markdown_content))
    return markdown_content

## scarping logic for info on areas

In [None]:
data=[]
for _,row in df_areas.iterrows():
    obj={}
    url = f"https://www.bayut.com{row["url"]}"
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    body = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#__next')))
    print(row["url"])
    obj["area"] = row["img_name"]
    for header in headers:
        print(header)
        print(f'div[id^="{header}"]')
        try:
            section = body.find_element(By.CSS_SELECTOR, f'div[id^="{header}"]')
            if section:
                obj[header] = "\n".join(get_markdown(section, headers))
        except Exception as e:
            print(f"Could not find section for header: {header}")
            obj[header] = ""  # Set empty string for missing sections
    data.append(obj)

In [39]:
df_areas_info = pd.DataFrame(data)

In [61]:
df_areas_info.to_csv('bayut_areas_info.csv', index=False)

In [None]:
df_areas_info.shape

In [None]:
df_areas_info.head(1)

### Listings of sales

In [43]:
listings_sales=[]

In [44]:

wait = WebDriverWait(driver, 20)
for page in range(1,8):
    url = "https://www.bayut.com/for-sale/property/dubai/dubai-marina/"
    if page != 1:
        url = f"{url}page-{page}/"
    driver.get(url)
    print(page)
    ul = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'ul[class="e20beb46"]')))
    rows = ul.find_elements(By.CSS_SELECTOR, "li[role='article']")
    for item in rows:
        obj={}
        title_div = item.find_element(By.CSS_SELECTOR, "h2[id='active_title']")
        obj["title"]=title_div.text
        print(title_div.text)
        try:
            price_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Price']")
            obj["price"]=price_div.text
        except Exception as e:
            obj["price"]= ""
            
        type_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Type']")
        obj["type"]=type_div.text
        
        try:
            beds_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Beds']")
            obj["beds"]=beds_div.text
        except Exception as e:
            obj["beds"]= ""
        try:
            studio_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Studio']")
            obj["studio"]=studio_div.text
        except Exception as e:
            obj["studio"]= ""

        baths_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Baths']")
        obj["baths"]=baths_div.text
        
        area_div = item.find_element(By.CSS_SELECTOR, "span[aria-label='Area']")
        obj["area"]=area_div.text
        
        location_div = item.find_element(By.CSS_SELECTOR, "div[aria-label='Location']")
        obj["location"]=location_div.text
        
        try:
            handover_div = item.find_element(By.CSS_SELECTOR, "div[aria-label='Handover Card Badge']")
            obj["handover"]=handover_div.text.strip('HANDOVER').replace('\n', '').strip()
        except Exception as e:
            obj["handover"]= ""
        try:
            payment_div = item.find_element(By.CSS_SELECTOR, "div[aria-label='Payment Plan Hoverable Card Badge']")
            obj["payment"]=payment_div.text.strip('PAYMENT PLAN').replace('\n', '').strip()
        except Exception as e:
            obj["payment"]= ""
        try:
            off_plan = item.find_element(By.CSS_SELECTOR, "div[aria-label='Off-plan badge']")
            obj["offplan"]=off_plan.text
        except Exception as e:
            obj["offplan"]= ""

        try:
            verified_plan = item.find_element(By.CSS_SELECTOR, "div[aria-label='Property Verified Button']")
            obj["verified_plan"]=verified_plan.text
        except Exception as e:
            obj["verified_plan"]= ""
        try:
            agent_plan = item.find_element(By.CSS_SELECTOR, "div[aria-label='TruBroker']")
            obj["agent_plan"]=agent_plan.text
        except Exception as e:
            obj["agent_plan"]= ""

        listings_sales.append(obj)

1
Vacant on Transfer | Fully Upgraded | Palm Views
Iconic Skyline | 3 Bedroom | 80/20 Payment Plan
EXCLUSIVE | HIGH-FLOOR | PARTIAL SEA VIEW | FURNISHED
High Floor | Vacant | Multiple Options
Luxury 2 Bedroom Duplex | Vacant | Marina View
VOT | Balcony | Marina View | Corner Unit
Exclusive | Well Kept | High Floor
LOW FLOOR | GOLF VIEW | FURNISHED
Vacant Now | Huge Terrace | Full View
Fully Furnished | Low Floor | Vacant
Full Sea Views | Vacant Now | Furnished
Exclusive| Marina View| Vacant I Store in Basement
Exclusive | Upgraded Unit | Duplex
STUNNING SEA VIEWS | HIGH FLOOR 01 SERIES | TURN-KEY
Golf Course Views | VOT | 3 Bedrooms + Maid
Branded Residences | HighFloor | Vacant
Private Jacuzzi | High Floor | Rare | Reformed
Multiple Options I Best Location | Vacant
Full Marina View | Vacant | Converted 3 Bed
Exclusive | Near Metro | Furnished | Good ROI
New Listing | Full Views | Sea and Marina
Marina View | Luxurious | Great Price
Marina Views | Vacant | With Study
Sea View |Upgraded

In [45]:
len(listings_sales)

168

In [46]:
listings_sales

[{'title': 'Vacant on Transfer | Fully Upgraded | Palm Views',
  'price': '2,875,000',
  'type': 'Apartment',
  'beds': '2',
  'studio': '',
  'baths': '2',
  'area': '1,183 sqft',
  'location': 'Princess Tower, Dubai Marina, Dubai',
  'handover': '',
  'payment': '',
  'offplan': '',
  'verified_plan': 'on 21st of April 2025',
  'agent_plan': 'TruBroker™'},
 {'title': 'Iconic Skyline | 3 Bedroom | 80/20 Payment Plan',
  'price': '4,575,888',
  'type': 'Apartment',
  'beds': '3',
  'studio': '',
  'baths': '3',
  'area': '1,990 sqft',
  'location': 'Marina Cove, Dubai Marina, Dubai',
  'handover': 'Q4 2029',
  'payment': '80/20',
  'offplan': 'Off-Plan',
  'verified_plan': 'on',
  'agent_plan': ''},
 {'title': 'EXCLUSIVE | HIGH-FLOOR | PARTIAL SEA VIEW | FURNISHED',
  'price': '1,230,000',
  'type': 'Apartment',
  'beds': '1',
  'studio': '',
  'baths': '1',
  'area': '989 sqft',
  'location': 'Marina 101, Dubai Marina, Dubai',
  'handover': '',
  'payment': '',
  'offplan': '',
  'ver

In [47]:
df_listing_sales_townhouses = pd.DataFrame(listings_sales)
df_listing_sales_townhouses.to_csv("bayut_listing_sales_dubai_marina.csv")