In [1]:
from bs4 import BeautifulSoup as bs
import re
import requests
import pandas as pd

# Store all data rows as dictionaries
all_rows = []

# Loop through 100 pages
for page in range(1, 101):
    print(f"Scraping page {page}...")
    url = f"https://housing.com/in/buy/hyderabad/hyderabad?page={page}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve page {page}, skipping...")
        continue

    soup = bs(response.text, 'html.parser')

    titles = soup.find_all('div', class_='title-style')
    subtitles = soup.find_all('h2', class_='subtitle-style')
    prices = soup.find_all('div', class_='T_blackText _c8dlk8 _7l1ulh T_descriptionStyle _t91dk0 _r31e5h _g3exct _csbfng _bx1t02')
    areas = soup.find_all('div', class_="_c81fwx _cs1nn1 _g38jkm _fr1ti3 _vy1wug _7ls3je _h312gs _kbqmn7od T_dataPointStyle")
    sellers = soup.find_all('div', class_=['sellerName-label', 'seller-subtitle'])

    # Get max listings on page to avoid index errors
    num_listings = min(len(titles), len(subtitles), len(prices), len(areas))

    for i in range(num_listings):
        name = titles[i].text.strip()
        subtitle = subtitles[i].text.strip()
        price = prices[i].text.strip()
        single_price = re.search(r"₹[\d,.]+\s*[A-Za-z]*", price).group()
        area_text = areas[i].text.strip()

        bhk = re.search(r"\d\sBHK", subtitle)
        prop_type = re.search(r"(Apartment|Flat|Independent Houses|Villa|PGs)", subtitle)
        location = subtitle.split("in ", 1)[1].strip() if "in " in subtitle else None

        size = re.search(r"\d[\d,]*\s+sq\.ft", area_text)
        status = re.search(r"(Under Construction|Ready to Move)", area_text)
        delivery = re.search(r"[A-Za-z]+,?\s+\d{4}", area_text)

        listed_by = sellers[i].text.strip() if i < len(sellers) else None

        all_rows.append({
            "Name": name,
            "Location": location,
            "BHK": bhk.group() if bhk else None,
            "Type": prop_type.group() if prop_type else None,
            "Price": single_price,
            "Size": size.group() if size else None,
            "Status": status.group() if status else None,
            "Delivery Date": delivery.group() if delivery else None,
            "Listed By": listed_by
        })

# Convert to DataFrame
df = pd.DataFrame(all_rows)

# Save and preview
df.to_csv("hyderabad_real_estate.csv", index=False)
print("Scraping completed and saved to CSV!")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Scraping page 44...
Scraping page 45...
Scraping page 46...
Scraping page 47...
Scraping page 48...
Scraping page 49...
Scraping page 50...
Scraping 