In [2]:
from bs4 import BeautifulSoup
import os
import pandas as pd
from tqdm import tqdm

data = {'Society_Name': [], 'Locality': [], 'Price': [], 'Average Price': [], 'BHK Type': [], 'Area': [], 'Possession Status': []}

base_dir = r"C:\Users\priya\Magicbricks_1_data"

if not os.path.exists(base_dir):
    print(f"Directory not found: {base_dir}")
else:
    files = []
    for root, dirs, files_in_dir in os.walk(base_dir):
        for file in files_in_dir:
            files.append(os.path.join(root, file))

    for file_path in tqdm(files, desc="Processing Files", unit="file"):
        try:
            with open(file_path, "r", encoding='utf-8') as f:
                html_doc = f.read()
                soup = BeautifulSoup(html_doc, 'html.parser')
                
                title = soup.find("div", class_="mb-srp__card__developer")
                society_name = title.get_text(strip=True) if title else "N/A"
                data['Society_Name'].append(society_name)
              
                locality_tag = soup.find("h2", class_="mb-srp__card--title")
                locality = locality_tag.get_text(strip=True).split(" in ")[-1] if locality_tag else "N/A"
                data['Locality'].append(locality)

                price_tag = soup.find("div", class_="mb-srp__card__price--amount")
                price = price_tag.get_text(strip=True) if price_tag else "N/A"
                data['Price'].append(price)

                avg_price_tag = soup.find("div", class_="mb-srp__card__price--size")
                avg_price = avg_price_tag.get_text(strip=True) if avg_price_tag else "N/A"
                data['Average Price'].append(avg_price)

                bhk_tag = soup.find("h2", class_="mb-srp__card--title")
                bhk_type = bhk_tag.get_text(strip=True).split("Flat")[0].strip() if bhk_tag else "N/A"
                data['BHK Type'].append(bhk_type)

                area_tag = soup.find("div", class_="mb-srp__card__summary__list").find("div", class_="mb-srp__card__summary--value")
                area = area_tag.get_text(strip=True) if area_tag else "N/A"
                data['Area'].append(area)
                
                summary_list = soup.find("div", class_="mb-srp__card__summary__list")
                if summary_list:
                    status_tag = summary_list.find("div", {'data-summary': 'status'})
                    possession_status = status_tag.find("div", class_="mb-srp__card__summary--value").get_text(strip=True) if status_tag else "N/A"
                    data['Possession Status'].append(possession_status)
                else:
                    data['Possession Status'].append("N/A")

        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

    df = pd.DataFrame(data)
    csv_path = "magicbricks_data.csv"

    try:
        df.to_csv(csv_path, index=False)
        print(f"CSV file created successfully: {csv_path}")
    except Exception as e:
        print(f"Error saving CSV file: {e}")

    print(df)


Processing Files: 100%|██████████████████████████████████████████████████████████| 2454/2454 [00:36<00:00, 66.41file/s]

CSV file created successfully: magicbricks_data.csv
                   Society_Name  \
0                  Marina Skies   
1                Lumbini Elysee   
2              Hivision Heights   
3              Hallmark Treasor   
4              Lansum EL Dorado   
...                         ...   
2449               Park Infinia   
2450             Imperial Ville   
2451                   The Icon   
2452  Chordias Gulmohar Heights   
2453        Manglam Garden City   

                                               Locality     Price  \
0                                Hitech City, Hyderabad  ₹2.10 Cr   
1          Financial District, Nanakram Guda, Hyderabad  ₹2.25 Cr   
2                                   Kompally, Hyderabad  ₹1.23 Cr   
3                   Narsingi, Outer Ring Road Hyderabad  ₹1.63 Cr   
4                   Narsingi, Outer Ring Road Hyderabad  ₹1.65 Cr   
...                                                 ...       ...   
2449                    Park Infinia, Raja P


