In [None]:
import re
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def parse_projects(html):
    soup = BeautifulSoup(html, 'html.parser')
    project_elements = soup.find_all(class_="row shadow p-3 mb-5 bg-body rounded")  
    
    project_data = []
    for element in project_elements:
        raw_text = element.get_text(separator="\n").strip()
        #print("Raw Text:", raw_text)
        
        rera_certificate_no = re.search(r"# (\w+)", raw_text)
        project_name = re.search(r"# \w+\s+(.+?)\n", raw_text)
        developer = re.search(r"# \w+\s+.+\s+(.+?)\n", raw_text)
        location = re.search(r"\n\s+(.+?)\s*\n\s*Find Route", raw_text)
        state = re.search(r"State\s+(.+?)\n", raw_text)
        pincode = re.search(r"Pincode\s+(\d+)", raw_text)
        district = re.search(r"District\s+(.+?)\n", raw_text)
        last_modified = re.search(r"Last Modified\s+(.+?)\n", raw_text)
        
        project_data.append({
            "Rera_Certificate_No": rera_certificate_no.group(1) if rera_certificate_no else "N/A",
            "Project Name": project_name.group(1) if project_name else "N/A",
            "Developer": developer.group(1) if developer else "N/A",
            "Location": location.group(1) if location else "N/A",
            "State": state.group(1) if state else "N/A",
            "Pincode": pincode.group(1) if pincode else "N/A",
            "District": district.group(1) if district else "N/A",
            "Last Modified": last_modified.group(1) if last_modified else "N/A"
        })
    
    return project_data


driver = webdriver.Chrome()
driver.get("https://maharera.maharashtra.gov.in/projects-search-result")  
all_project_data = []

while True:
    html = driver.page_source
    project_data = parse_projects(html)
    all_project_data.extend(project_data)

    try:
        next_button = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//a[@class = 'next']")))
        webdriver.ActionChains(driver).move_to_element(next_button).perform()
        next_button.click()
        
    except Exception as e:
        print("No more pages or an error occurred: ", e)
        break

driver.quit()


csv_file = "projects.csv"


headers = ["Rera_Certificate_No", "Project Name", "Developer", "Location", "State", "Pincode", "District", "Last Modified"]


with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(all_project_data)

print(f"Data has been saved to {csv_file}")
