In [None]:
!pip install openpyxl




In [None]:
import pandas as pd
import numpy as np
import re

In [None]:
file_path = "/content/Residential_raw.csv"   # change if needed
df = pd.read_csv(file_path)

In [None]:
df.columns = (
    df.columns.str.strip()
              .str.lower()
              .str.replace(" ", "_")
)


In [None]:
def clean_price(x):
    if pd.isna(x):
        return None

    x = str(x).replace(",", "").replace("₹","").strip().lower()

    # lakh / l
    if "lakh" in x or "lac" in x or x.endswith("l"):
        nums = re.findall(r"[\d\.]+", x)
        if nums:
            return float(nums[0]) * 100000  # 1 lakh = 1,00,000

    # crore / cr
    if "cr" in x or "crore" in x:
        nums = re.findall(r"[\d\.]+", x)
        if nums:
            return float(nums[0]) * 10000000  # 1 crore = 1,00,00,000

    # plain numeric values
    nums = re.findall(r"[\d\.]+", x)
    return float(nums[0]) if nums else None

df["price_clean"] = df["price"].apply(clean_price)

# ===============================
# STEP 4 — CLEAN TOTAL AREA
# ===============================
def clean_area(x):
    if pd.isna(x):
        return None
    x = str(x).lower().replace(",", "")
    nums = re.findall(r"[\d\.]+", x)
    return float(nums[0]) if nums else None

df["total_area_clean"] = df["total_area"].apply(clean_area)

In [None]:
df["location_clean"] = (
    df["location"]
      .astype(str)
      .str.strip()
      .str.title()
)

In [None]:
df["description_clean"] = (
    df["description"]
      .astype(str)
      .str.replace("\n", " ")
      .str.strip()
)

In [None]:
df["price_per_sqft_clean"] = (
    df["price_clean"] / df["total_area_clean"]
)

In [None]:
final_df = df[[
    "name",
    "property_title",
    "price_clean",
    "location_clean",
    "total_area_clean",
    "price_per_sqft_clean",
    "description_clean",
    "baths",
    "balcony"
]]

final_df = final_df.rename(columns={
    "price_clean": "price",
    "location_clean": "location",
    "total_area_clean": "total_area",
    "price_per_sqft_clean": "price_per_sqft",
    "description_clean": "description"
})


In [None]:
final_df["price"] = pd.to_numeric(final_df["price"], errors="coerce")
final_df["total_area"] = pd.to_numeric(final_df["total_area"], errors="coerce")
final_df["price_per_sqft"] = pd.to_numeric(final_df["price_per_sqft"], errors="coerce")


In [None]:
output_path = "/content/residential_cleaned.csv"
final_df.to_csv(output_path, index=False)

print("Cleaning complete!")
print("Saved file to:", output_path)

# SHOW FIRST 15 ROWS
final_df.head(15)

Cleaning complete!
Saved file to: /content/residential_cleaned.csv


Unnamed: 0,name,property_title,price,location,total_area,price_per_sqft,description,baths,balcony
0,Casagrand ECR 14,"4 BHK Flat for sale in Kanathur Reddikuppam, C...",19900000.0,"Kanathur Reddikuppam, Chennai",2583.0,7704.219899,Best 4 BHK Apartment for modern-day lifestyle ...,4,Yes
1,"Ramanathan Nagar, Pozhichalur,Chennai",10 BHK Independent House for sale in Pozhichal...,22500000.0,"Ramanathan Nagar, Pozhichalur,Chennai",7000.0,3214.285714,Looking for a 10 BHK Independent House for sal...,6,Yes
2,DAC Prapthi,"3 BHK Flat for sale in West Tambaram, Chennai",10000000.0,"Kasthuribai Nagar, West Tambaram,Chennai",1320.0,7575.757576,"Property for sale in Tambaram, Chennai. This 3...",3,No
3,"Naveenilaya,Chepauk, Triplicane,Chennai",7 BHK Independent House for sale in Triplicane...,33300000.0,"Naveenilaya,Chepauk, Triplicane,Chennai",4250.0,7835.294118,Entire Building for sale with 7 units of singl...,5,Yes
4,VGN Spring Field Phase 1,"2 BHK Flat for sale in Avadi, Chennai",4800000.0,"Avadi, Chennai",960.0,5000.0,"Property for sale in Avadi, Chennai. This 2 BH...",3,Yes
5,KG Earth Homes,"2 BHK Flat for sale in Siruseri, Chennai",4000000.0,"Siruseri, Chennai",940.0,4255.319149,"Price negotiable. Big hall, big balcony, gated...",3,No
6,"THIRAN FLATS ,Gowrivakkam, Sembakkam,Chennai","2 BHK Flat for sale in Sembakkam, Chennai",6000000.0,"Thiran Flats ,Gowrivakkam, Sembakkam,Chennai",880.0,6818.181818,2 BHK Apartment for sale in Chennai. This prop...,3,Yes
7,TK Jasmine Grove,3 BHK Independent House for sale in Mahindra W...,7235000.0,"Mahindra World City, Chennai",1700.0,4255.882353,A 3 BHK Independent House for sale in Mahindra...,3,Yes
8,Avenue,"2 BHK Flat for sale in West Tambaram, Chennai",4200000.0,"Brindavan Colony, West Tambaram,Chennai",840.0,5000.0,Check out this 2 BHK Apartment for sale in Tam...,3,No
9,Guru Kothai Aparts,"1 BHK Flat for sale in Chromepet, Chennai",3000000.0,"New Colony, Chromepet,Chennai",535.0,5607.476636,We are the proud owners of this 1 bhk apartmen...,2,No
