# Web Scraping Real Estate Websites

## 1.Real Estate Data Scraping – Hilal Properties

### 1.1 Load Required Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

### 1.2 Define Target URLs and Headers

In [2]:
# Headers to mimic browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# URL template for paginated rent listings
BASE_URL = "https://hilalprp.com.om"
URL_TEMPLATE = BASE_URL + "/properties-search/page/{}/?status=for-rent"
MAX_PAGES = 100  # Safety limit

properties = {
    "Title": [],
    "Location": [],
    "Bedrooms": [],
    "Bathrooms": [],
    "Price": [],
    "Size": [],
    "Listing_Type": []
}


### 1.3 Loop Through Pages and Scrape Listings

In [3]:
for page in range(1, MAX_PAGES + 1):
    url = URL_TEMPLATE.format(page)
    print(f"\nScraping page {page}: {url}")
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except Exception as e:
        print(f"Failed to fetch page {page}: {e}")
        break

    soup = BeautifulSoup(response.text, "html.parser")
    cards = soup.find_all("article", class_="rh_list_card")

    if not cards:
        print("No more listings found.")
        break

    for card in cards:
        # Title
        title_tag = card.find("h3")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        # Price
        price_tag = card.find("p", class_="price")
        price = price_tag.get_text(strip=True).replace("OMR", "").replace(",", "").strip() if price_tag else "N/A"

        # Listing type
        status_tag = card.find("span", class_="status")
        listing_type = status_tag.get_text(strip=True) if status_tag else "N/A"

        # Meta info
        meta_dict = {"Bedrooms": "N/A", "Bathrooms": "N/A", "Size": "N/A"}
        meta_wrap = card.find_all("div", class_="rh_prop_card__meta")

        for item in meta_wrap:
            label = item.find("span", class_="rh_meta_titles")
            value = item.find("span", class_="figure")
            if label and value:
                label_text = label.get_text(strip=True).lower()
                val_text = value.get_text(strip=True)
                if "bedroom" in label_text:
                    meta_dict["Bedrooms"] = val_text
                elif "bathroom" in label_text:
                    meta_dict["Bathrooms"] = val_text
                elif "area" in label_text or "size" in label_text or "sqmt" in label_text:
                    meta_dict["Size"] = val_text

        # Detail page for location
        location = "N/A"
        detail_link_tag = card.find("a", href=True)
        if detail_link_tag:
            detail_url = detail_link_tag['href']
            try:
                detail_resp = requests.get(detail_url, headers=headers)
                detail_resp.raise_for_status()
                detail_soup = BeautifulSoup(detail_resp.content, 'html.parser')
                location_tag = detail_soup.find("a", href=lambda x: x and "/property-city/" in x)
                if location_tag:
                    location = location_tag.get_text(strip=True)
            except Exception as e:
                print(f"Error fetching detail page: {e}")

        # Append all data
        properties["Title"].append(title)
        properties["Location"].append(location)
        properties["Bedrooms"].append(meta_dict["Bedrooms"])
        properties["Bathrooms"].append(meta_dict["Bathrooms"])
        properties["Price"].append(price)
        properties["Size"].append(meta_dict["Size"])
        properties["Listing_Type"].append(listing_type)

    time.sleep(1)  # polite pause between pages


Scraping page 1: https://hilalprp.com.om/properties-search/page/1/?status=for-rent

Scraping page 2: https://hilalprp.com.om/properties-search/page/2/?status=for-rent

Scraping page 3: https://hilalprp.com.om/properties-search/page/3/?status=for-rent

Scraping page 4: https://hilalprp.com.om/properties-search/page/4/?status=for-rent

Scraping page 5: https://hilalprp.com.om/properties-search/page/5/?status=for-rent

Scraping page 6: https://hilalprp.com.om/properties-search/page/6/?status=for-rent

Scraping page 7: https://hilalprp.com.om/properties-search/page/7/?status=for-rent

Scraping page 8: https://hilalprp.com.om/properties-search/page/8/?status=for-rent

Scraping page 9: https://hilalprp.com.om/properties-search/page/9/?status=for-rent

Scraping page 10: https://hilalprp.com.om/properties-search/page/10/?status=for-rent

Scraping page 11: https://hilalprp.com.om/properties-search/page/11/?status=for-rent

Scraping page 12: https://hilalprp.com.om/properties-search/page/12/?st

### 1.4 Create DataFrame and Preview Results

In [17]:
#converting to a dataframe
df_Hilal = pd.DataFrame(properties)
print("Hilal Properties DataFrame")
df_Hilal.head()

Hilal Properties DataFrame


Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Price,Size,Listing_Type
0,2-BEDROOM APARTMENT,Al Ansab,2.0,2.0,300,,For Rent
1,2-BEDROOM APARTMENT,Shatti Al Qurum,2.0,2.0,500,,For Rent
2,4+1 BEDROOM TWIN VILLA,Madinat Qaboos (MQ),4.0,5.0,1500,,For Rent
3,COMMERCIAL SHOP,Ghala,,,370,39.0,For Rent
4,COMMERCIAL OFFICE SPACE,Bausher,,2.0,4,,For Rent


### 1.5 Save Data to CSV File

In [12]:
# Save to CSV
df_Hilal.to_csv("hilal_rental_data.csv", index=False)
print("\nSaved to hilal_rental_data.csv")



Saved to hilal_rental_data.csv


## 2.Real Estate Data Scraping – Dubizzle website

### 2.1 Define Target URLs and Headres

In [13]:
headers = {"User-Agent": "Mozilla/5.0"}
main_url = "https://www.dubizzle.com.om"

# Prepare storage
dubizzle_data = {
    'property_name': [],
    'price': [],
    'location': [],
    'area': [],
    'bathrooms': [],
    'beds': []
}


### 2.2 Loop Through Pages and Scrape Listings 

In [24]:
for page_number in range(1, 105):
    current_page_url = f"{main_url}/en/properties/properties-for-rent/?page={page_number}"
    print(f"Scraping page {page_number}: {current_page_url}")

    response = requests.get(current_page_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    cards = soup.find_all('li', attrs={"aria-label": "Listing"})
    if not cards:
        print("No listings found on this page. Stopping.")
        break

    for card in cards:
        title = card.find('h2', class_='_562a2db2')
        dubizzle_data['property_name'].append(title.text.strip() if title else 'No title')

        price = card.find('div', attrs={'aria-label': 'Price'})
        dubizzle_data['price'].append(price.text.strip() if price else 'Price not mentioned')

        location = card.find('span', class_='f7d5e47e')
        dubizzle_data['location'].append(location.text.strip() if location else 'Location not mentioned')

        area_tag = card.find('span', attrs={'aria-label': 'Area'})
        area_value = area_tag.find('span', class_='_3e1113f0') if area_tag else None
        dubizzle_data['area'].append(area_value.text.strip() if area_value else 'Not specified')

        bath_tag = card.find('span', attrs={'aria-label': 'Bathrooms'})
        bath_value = bath_tag.find('span', class_='_3e1113f0') if bath_tag else None
        dubizzle_data['bathrooms'].append(bath_value.text.strip() if bath_value else 'Not specified')

        bed_tag = card.find('span', attrs={'aria-label': 'Beds'})
        bed_value = bed_tag.find('span', class_='_3e1113f0') if bed_tag else None
        dubizzle_data['beds'].append(bed_value.text.strip() if bed_value else 'Not specified')

    time.sleep(1.5)

Scraping page 1: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=1
Scraping page 2: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=2
Scraping page 3: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=3
Scraping page 4: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=4
Scraping page 5: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=5
Scraping page 6: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=6
Scraping page 7: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=7
Scraping page 8: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=8
Scraping page 9: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=9
Scraping page 10: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=10
Scraping page 11: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=11
Scraping page 12: https://www.dubizzle.

### 2.3 Create DataFrame and Preview Results

In [25]:
df_Dubizzle= pd.DataFrame(dubizzle_data)
print("Dubizzle DataFrame: \n")
df_Dubizzle.head()

Dubizzle DataFrame: 



Unnamed: 0,property_name,price,location,area,bathrooms,beds
0,MADINAT AL ILAM | EXCELLENT 4+1 BR TOWNHOUSE,OMR 650,"Madinat As Sultan Qaboos, Muscat•",300 SQM,6,4
1,AL KHUWAIR | WELL MAINTAINED 2 BHK FLAT,OMR 280,"Al Khuwair, Muscat•",105 SQM,2,2
2,Elegant 4 BHK Villa for rent @ Qurum,OMR 650,"Qurum, Muscat•",300 SQM,5,4
3,ANSAB | 8 BEDROOM VILLA,OMR 400,"Ansab, Muscat•",300 SQM,9,8
4,ADV905*4BHK Villa for rent in Madinat Illam in...,OMR 750,"Qurum, Muscat•",300 SQM,4,4


### 2.4 Save Data to CSV File

In [27]:
# Save to CSV
df_Dubizzle.to_csv("dubizzle_properties_for_rent.csv", index=False)
print("\nSaved to dubizzle_properties_for_rent.csv")



Saved to dubizzle_properties_for_rent.csv
