In [6]:
import pandas as pd

# Load your dataset
df = pd.read_csv("./cleaned_real_estate_data_trimmed.csv")

# Rename columns for consistency (if needed)
df.rename(columns={
    'price_per_sqft_num': 'Price_per_Sqft',
    'price_num': 'Price',
    'Locality': 'Area_Name'
}, inplace=True)

# If you have area (sqft) column, rename it accordingly
# If not, create it using price / price_per_sqft
if 'Sqft' not in df.columns:
    df['Sqft'] = df['Price'] / df['Price_per_Sqft']

# Define locality factor mapping
locality_factor = {
    'vesu': 0.040,
    'citylight': 0.038,
    'bhatar road': 0.036,
    'pal': 0.034,
    'adajan': 0.032,
    'Katargam': 0.028,
    'varacha': 0.026,
    'udhna': 0.024,
    'dindoli': 0.022
}

# Apply locality factor, default = 0.03
df['Locality_Factor'] = df['Main_Locality'].map(locality_factor).fillna(0.03)

# Define adjustment factor function
def get_adjustment(bhk):
    bhk_str = str(bhk).upper()
    if 'OFFICE' in bhk_str:
        return 1.5
    elif '1' in bhk_str or '2' in bhk_str:
        return 1.0
    elif '3' in bhk_str or '4' in bhk_str:
        return 1.1
    elif any(x in bhk_str for x in ['5', '6', '7', '8', '9']):
        return 1.2
    else:
        return 1.0

# Apply adjustment factor
df['Adjustment_Factor'] = df['BHK'].apply(get_adjustment)

# Calculate rent
df['Rent'] = (df['Sqft'] * df['Price_per_Sqft'] * df['Locality_Factor'] / 12) * df['Adjustment_Factor']

# Round the rent values
df['Rent'] = df['Rent'].round(2)

# Display result
print(df[['BHK', 'Main_Locality', 'Sqft', 'Price_per_Sqft', 'Locality_Factor', 'Adjustment_Factor', 'Rent']].head())

# Save to new CSV
df.to_csv("house_rent_predicted.csv", index=False)
print("✅ Rent column generated and saved to 'house_rent_predicted.csv'")


   BHK Main_Locality         Sqft  Price_per_Sqft  Locality_Factor  \
0    2           pal  1173.684211          3800.0            0.034   
1    2      palanpur  1250.000000          3600.0            0.030   
2    3          vesu  1300.117925          3392.0            0.040   
3    2      palanpur  1181.018395          3751.0            0.030   
4    2      palanpur  1250.000000          3200.0            0.030   

   Adjustment_Factor      Rent  
0                1.0  12636.67  
1                1.0  11250.00  
2                1.1  16170.00  
3                1.0  11075.00  
4                1.0  10000.00  
✅ Rent column generated and saved to 'house_rent_predicted.csv'


In [7]:
data = pd.read_csv('./house_rent_predicted.csv')
data.head()

Unnamed: 0,square_feet,BHK,Price_per_Sqft,Price,Main_Locality,Sqft,Locality_Factor,Adjustment_Factor,Rent
0,1173,2,3800.0,4460000,pal,1173.684211,0.034,1.0,12636.67
1,1250,2,3600.0,4500000,palanpur,1250.0,0.03,1.0,11250.0
2,700,3,3392.0,4410000,vesu,1300.117925,0.04,1.1,16170.0
3,1180,2,3751.0,4430000,palanpur,1181.018395,0.03,1.0,11075.0
4,720,2,3200.0,4000000,palanpur,1250.0,0.03,1.0,10000.0


In [8]:
data.columns

Index(['square_feet', 'BHK', 'Price_per_Sqft', 'Price', 'Main_Locality',
       'Sqft', 'Locality_Factor', 'Adjustment_Factor', 'Rent'],
      dtype='object')

In [13]:
data.drop(columns=['square_feet', 'Price', 'Locality_Factor', 'Adjustment_Factor'], inplace=True)


In [14]:
data.head()

Unnamed: 0,BHK,Price_per_Sqft,Main_Locality,Sqft,Rent
0,2,3800.0,pal,1173.684211,12636.67
1,2,3600.0,palanpur,1250.0,11250.0
2,3,3392.0,vesu,1300.117925,16170.0
3,2,3751.0,palanpur,1181.018395,11075.0
4,2,3200.0,palanpur,1250.0,10000.0


In [16]:
data.to_csv('house_rent_final.csv', index=False)