### Import Required Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

### Scraping Loop

In [2]:
# Base URL and pagination setup
BASE_URL = "https://hilalprp.com.om"
URL_TEMPLATE = BASE_URL + "/properties-search/page/{}/?status=for-rent"
MAX_PAGES = 18

# Data storage structure
hilal_data = {
    'property_title': [],
    'location': [],
    'number_of_rooms': [],
    'price': [],
    'size': []
}

locations_list = []
current_page = 1

# Start scraping
while True:
    url = URL_TEMPLATE.format(current_page)
    print(f"Fetching page {current_page}: {url}")

    try:
        response = requests.get(url, headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        })
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch page {current_page}: {e}")
        break

    soup = BeautifulSoup(response.content, 'html.parser')
    listings = soup.find_all('article', class_='rh_list_card')

    if not listings:
        print(f"No listings found on page {current_page}. Stopping.")
        break

    print(f"{len(listings)} listings found on page {current_page}")

    for listing in listings:
        # Extract property title
        title_tag = listing.find("h3")
        title = title_tag.get_text(strip=True) if title_tag else None

        # Extract price
        price_tag = listing.find('p', class_='price')
        price = price_tag.text.strip() if price_tag else None

        # Extract figures: rooms and possibly size
        figure_tags = listing.find_all('span', class_='figure')

        # Number of rooms is usually the first span.figure
        number_of_rooms = figure_tags[0].text.strip() if len(figure_tags) >= 1 else None

        # Extract property size (associated with "sqmt" label)
        size = None
        for fig in figure_tags:
            label_tag = fig.find_next_sibling('span', class_='label')
            if label_tag and 'sqmt' in label_tag.text.lower():
                size = fig.text.strip()
                break

        # Append values to the main data dictionary
        hilal_data['property_title'].append(title)
        hilal_data['number_of_rooms'].append(number_of_rooms)
        hilal_data['price'].append(price)
        hilal_data['size'].append(size)

        # Extract detail page URL to retrieve location
        link_tag = listing.find('a', href=True)
        listing_url = link_tag['href'] if link_tag else None

        if listing_url:
            full_url = listing_url if listing_url.startswith("http") else BASE_URL + listing_url
            try:
                detail_res = requests.get(full_url, headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
                })
                detail_res.raise_for_status()
                detail_soup = BeautifulSoup(detail_res.content, 'html.parser')
                location_tag = detail_soup.find('a', href=lambda href: href and '/property-city/' in href)
                location = location_tag.text.strip() if location_tag else None
            except:
                location = None
        else:
            location = None

        locations_list.append(location)

    current_page += 1
    if current_page > MAX_PAGES:
        print("Reached the maximum number of pages.")
        break

    time.sleep(1)

print("Scraping complete.")

# Assign the collected locations to the main data dictionary
hilal_data['location'] = locations_list

Fetching page 1: https://hilalprp.com.om/properties-search/page/1/?status=for-rent
10 listings found on page 1
Fetching page 2: https://hilalprp.com.om/properties-search/page/2/?status=for-rent
10 listings found on page 2
Fetching page 3: https://hilalprp.com.om/properties-search/page/3/?status=for-rent
10 listings found on page 3
Fetching page 4: https://hilalprp.com.om/properties-search/page/4/?status=for-rent
10 listings found on page 4
Fetching page 5: https://hilalprp.com.om/properties-search/page/5/?status=for-rent
10 listings found on page 5
Fetching page 6: https://hilalprp.com.om/properties-search/page/6/?status=for-rent
10 listings found on page 6
Fetching page 7: https://hilalprp.com.om/properties-search/page/7/?status=for-rent
10 listings found on page 7
Fetching page 8: https://hilalprp.com.om/properties-search/page/8/?status=for-rent
10 listings found on page 8
Fetching page 9: https://hilalprp.com.om/properties-search/page/9/?status=for-rent
10 listings found on page 9
F

In [3]:
import pandas as pd

df_hilal = pd.DataFrame(hilal_data)
df_hilal.head(10)

Unnamed: 0,property_title,location,number_of_rooms,price,size
0,2-BEDROOM APARTMENT,Al Ansab,2.0,OMR300,
1,2-BEDROOM APARTMENT,Shatti Al Qurum,2.0,OMR500,
2,4+1 BEDROOM TWIN VILLA,Madinat Qaboos (MQ),4.0,"OMR1,500",
3,COMMERCIAL SHOP,Ghala,39.0,OMR370,
4,COMMERCIAL OFFICE SPACE,Bausher,2.0,OMR4,
5,"2, 3 & 4-BEDROOM COMPOUND VILLAS",Rusayl,234.0,OMR650,
6,COMMERCIAL SPACE,Shatti Al Qurum,,OMR6,
7,6+1 BEDROOM TWIN VILLA,Bausher,6.0,"OMR1,000",
8,5+1 BEDROOM TWIN VILLA,Al Ansab,5.0,OMR750,
9,RETAIL SPACES,Al Khuwair,,OMR7,


In [4]:
df_hilal['size'].unique()

array([None, '+-130', '+-85', '+-300', '+-340', '+-350', '+-400'],
      dtype=object)

In [5]:
null_count = df_hilal['size'].isnull().sum()
print("Number of null values in 'size':", null_count)

Number of null values in 'size': 167


In [6]:
df_hilal.shape

(173, 5)

In [7]:
df_hilal['location'].unique()

array(['Al Ansab', 'Shatti Al Qurum', 'Madinat Qaboos (MQ)', 'Ghala',
       'Bausher', 'Rusayl', 'Al Khuwair', None, 'Azaiba', 'Maabelah',
       'Al Ghoubrah', 'Darsait', 'Al Mawaleh', 'Madinat Al llam',
       'Muscat Hills', 'The Wave Muscat', 'Qurum', 'Al Hail', 'Al Khoudh',
       'Ruwi', 'Salalah', 'MUSCAT BAY', 'Al Mouj', 'Mutrah', 'Al Awabi',
       'Seeb', 'Muscat', 'Nizwa'], dtype=object)

In [8]:
df_hilal.shape

(173, 5)

In [9]:
df_hilal.to_csv('hilal_rent_listings.csv', index=False, encoding='utf-8-sig')
print("Data saved to 'hilal_rent_listings.csv'")

Data saved to 'hilal_rent_listings.csv'
