# Dubizzle Web Scraping Process

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time

base_url = "https://www.dubizzle.com.om"
current_page_url = f"{base_url}/en/properties/properties-for-rent/"

all_dubizzle_data = {
    'dub_property_name': [],
    'dub_rent_price': [],
    'dub_prop_location': [],
    'dub_prop_area': [],
    'dub_prop_bathrooms': [],
    'dub_prop_beds': []
}

while True:
    response = requests.get(current_page_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    cards = soup.find_all('li', attrs={"aria-label": "Listing"})

    for card in cards:
        prop_title = card.find('h2', class_ = '_562a2db2')
        if prop_title:
            property_title = prop_title.text.strip()
            all_dubizzle_data['dub_property_name'].append(property_title)

        prop_price = card.find('span', class_ = 'ddc1b288')
        if prop_price:
            dub_prop_rent_price = prop_price.text.strip()
            all_dubizzle_data['dub_rent_price'].append(dub_prop_rent_price)
        else:
            all_dubizzle_data['dub_rent_price'].append("price not mentioned")

        prop_loc = card.find('span', class_ = 'f7d5e47e')
        if prop_loc:
            dub_prop_loc = prop_loc.text.strip()
            all_dubizzle_data['dub_prop_location'].append(dub_prop_loc)
        else:
            all_dubizzle_data['dub_prop_location'].append('no location mentioned')

        area = card.find('span', attrs={'aria-label': 'Area'})
        if area:
            value = area.find('span', class_='_3e1113f0')
            all_dubizzle_data['dub_prop_area'].append(value.text.strip())
        else:
            all_dubizzle_data['dub_prop_area'].append('not mentioned')

        baths = card.find('span', attrs={'aria-label': 'Bathrooms'})
        if baths:
            value = baths.find('span', class_='_3e1113f0')
            all_dubizzle_data['dub_prop_bathrooms'].append(value.text.strip())
        else:
            all_dubizzle_data['dub_prop_bathrooms'].append('not mentioned')

        beds = card.find('span', attrs={'aria-label': 'Beds'})
        if beds:
            value = beds.find('span', class_='_3e1113f0')
            all_dubizzle_data['dub_prop_beds'].append(value.text.strip())
        else:
            all_dubizzle_data['dub_prop_beds'].append('not mentioned')

    next_button = soup.find('div', class_ = '_44eaf83c ')
    next_link = soup.find('a', class_='_7b3d179e', href=True, string=lambda x: x and x.isdigit())

    if next_link:
        next_page_relative_url = next_link['href']
        current_page_url = requests.compat.urljoin(base_url, next_page_relative_url)
        time.sleep(1)
    else:
        print("No next page found.")
        break

No next page found.


In [2]:
df_dubizzle = pd.DataFrame(all_dubizzle_data)
df_dubizzle

Unnamed: 0,dub_property_name,dub_rent_price,dub_prop_location,dub_prop_area,dub_prop_bathrooms,dub_prop_beds
0,SR-HK-595 Fully furnished Townhouse for rent i...,OMR 800,"The Wave (Almouj), Muscat•",200 SQM,3,2
1,محلات للايجار في العامرات,OMR 280,"Al Amarat, Muscat•",48 SQM,not mentioned,not mentioned
2,MADINAT QABOOS | MODERN 3 BR APARTMENT,OMR 800,"Madinat As Sultan Qaboos, Muscat•",125 SQM,2,3
3,MADINAT SULTAN QABOOS | BEAUTIFUL 1 BR APARTMENT,OMR 500,"Madinat As Sultan Qaboos, Muscat•",85 SQM,2,1
4,"Branded Residence, spacious apartment with Gol...","OMR 2,500","The Wave (Almouj), Muscat•",264 SQM,5,3
...,...,...,...,...,...,...
4360,AZAIBA | BEAUTIFUL 4+1 BR VILLA,OMR 630,"Azaiba, Muscat•",300 SQM,5,4
4361,"ADV901**A beautiful, Maintained Villa 4BHK+Mai...",OMR 700,"Qurum, Muscat•",365 SQM,5,4
4362,ADW02***Well-Maintained 2BHK Fully Furnited Fl...,OMR 700,"The Wave (Almouj), Muscat•",110 SQM,3,2
4363,ADV936** 4bhk + Maid's villa for rent in Qurum,"OMR 1,200","Qurum, Muscat•",420 SQM,5,4
