In [9]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
import time


base_url = 'https://www.magicbricks.com/ready-to-move-flats-in-pune-pppfs'
headers = {
    'Content-Type': 'text/html; charset=UTF-8',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Encoding': 'gzip, deflate, br'
}

all_data = {
    'Area': [],
    'Property': [],
    'Furnished Status': [],
    'Bathroom': [],
    'Society': [],
    'Price': [],
    'Floor': [],
    'Status': []
}

num_pages = 50
for page in range(1, num_pages + 1):
    url = f"{base_url}?page={page}" if page > 1 else base_url
    print(f"Scraping page {page}: {url}")
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  
        soup = BeautifulSoup(response.text, 'html.parser')
        sections = soup.find_all('div', class_='mb-srp__list')
        
        if not sections:
            print(f"No listings found on page {page}. Stopping.")
            break
        
        for section in sections:
            text = section.get_text(separator=" ", strip=True)
            if page == 1 and len(all_data['Area']) == 0:
                print("Sample raw text for first listing:", text[:500] + "...")
            
            # Area
            area = re.findall(r"Area\s*(\d+\.?\d*)\s*sqft", text)
            all_data['Area'].append(area[0] if area else np.nan)
            
            # Property Type
            prop_type = re.findall(r"(\d BHK)", text)
            all_data['Property'].append(prop_type[0] if prop_type else np.nan)
            
            # Furnished Status
            furnishing = re.findall(r"Furnishing\s*(Furnished|Semi-Furnished|Unfurnished)\s*(?=Society|Bathroom|Carpet|$)", text)
            all_data['Furnished Status'].append(furnishing[0] if furnishing else np.nan)
            
            # Bathroom
            bathroom = re.findall(r"Bathroom\s*(\d)", text)
            all_data['Bathroom'].append(f"Bathroom {bathroom[0]}" if bathroom else np.nan)
            
            # Society
            society = re.findall(r"Society\s*([A-Za-z0-9\s]+?)(?=\s*(?:Ownership|Bathroom|Car Parking|$))", text)
            all_data['Society'].append(society[0].strip() if society else np.nan)
            price = re.findall(r"₹\s*(\d+\.?\d*\s*(?:Lac|Cr))", text)
            all_data['Price'].append(price[0] if price else np.nan)
            if not price:
                print(f"No price found for listing on page {page}: {text[:200]}...")

            
            # Floor
            floor = re.findall(r"Floor\s*(Ground|\d+)\s*out\s*of\s*(\d+)", text)
            all_data['Floor'].append(f"{floor[0][0]} out of {floor[0][1]}" if floor else np.nan)
            
            # Status
            status = re.findall(r"Status\s*(Ready to Move|Under Construction|New Launch)", text)
            all_data['Status'].append(status[0] if status else np.nan)
        print(f"Page {page} scraped successfully. Found {len(sections)} listings.")
        time.sleep(2)  
    
    except requests.RequestException as e:
        print(f"Error scraping page {page}: {e}")
        continue

# Create a DataFrame from the collected data
df = pd.DataFrame(all_data)

# Display the DataFrame
print("\nDataFrame Preview:")
print(df.head())
print(f"\nTotal listings scraped: {len(df)}")

# Save the DataFrame to a CSV file
df.to_csv('magicbricks_Ready_to_Move_Flats_50_Pages.csv', index=False)
print(f"Data saved to 'magicbricks_Ready_to_Move_Flats_50_Pages.csv' with {len(df)} listings.")

Scraping page 1: https://www.magicbricks.com/ready-to-move-flats-in-pune-pppfs
Sample raw text for first listing: 13 + Photos Posted: May 29, '25 Builder: Sudarshan Group 2 BHK Ready to Occupy Flat for sale in Sudarshan Paradise Pimple Nilakh, Pimpri Chinchwad Sudarshan Paradise Carpet Area 807 sqft Status Ready to Move Transaction New Property Furnishing Unfurnished Society Sudarshan Paradise Car Parking 1 Covered, Bathroom 2 The modern amenities provided by this society will surely offer you a high class lifestyle with your beloved family members 2 BHK flat is offered for sale in Pimple Nilakh, Pune. It h...
Page 1 scraped successfully. Found 30 listings.
Scraping page 2: https://www.magicbricks.com/ready-to-move-flats-in-pune-pppfs?page=2
Page 2 scraped successfully. Found 30 listings.
Scraping page 3: https://www.magicbricks.com/ready-to-move-flats-in-pune-pppfs?page=3
Page 3 scraped successfully. Found 30 listings.
Scraping page 4: https://www.magicbricks.com/ready-to-move-flats-i