In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the data
data = pd.read_csv('New_Housing_clean_data .csv')

# Step 2: Clean and fill missing values
data['area'] = data['area'].fillna(data['area'].mean())
data['status'] = data['status'].fillna('None')
data['bhk'] = data['bhk'].fillna(data['bhk'].mean())
data['bathroom'] = data['bathroom'].fillna(data['bathroom'].mean())
data['age'] = data['age'].fillna(data['age'].mean())

# Prepare the data for modeling
X = data[['area', 'status', 'bhk', 'bathroom', 'age']]
y = data['price']

# Perform one-hot encoding for the 'status' column
X_encoded = pd.get_dummies(X, columns=['status'], drop_first=True)

# Train the linear regression model
model = LinearRegression()
model.fit(X_encoded, y)

# Function to predict house value
def predict_house_value(area, status, bhk, bathroom, age):
    # Prepare the input data for prediction
    input_data = {'area': [area], 'status': [status], 'bhk': [bhk], 'bathroom': [bathroom], 'age': [age]}
    input_df = pd.DataFrame(input_data)
    
    # Fill missing values in the input data
    input_df['area'] = input_df['area'].fillna(data['area'].mean())
    input_df['status'] = input_df['status'].fillna('None')
    input_df['bhk'] = input_df['bhk'].fillna(data['bhk'].mean())
    input_df['bathroom'] = input_df['bathroom'].fillna(data['bathroom'].mean())
    input_df['age'] = input_df['age'].fillna(0)
    
    # Perform one-hot encoding for the input data
    input_encoded = pd.get_dummies(input_df, columns=['status'], drop_first=True)
    
    # Align input data with the training data columns
    input_encoded = input_encoded.reindex(columns=X_encoded.columns, fill_value=0)
    
    # Check if data is available for the given input combination
    data_available = data[(data['area'] == area) & (data['status'] == status) & (data['bhk'] == bhk) & (data['bathroom'] == bathroom) & (data['age'] == age)]
    
    if len(data_available) > 0:
        # Data available in the dataset for the given input combination
        location = data_available['location'].iloc[0]
        builder = data_available['builder'].iloc[0]
        house_value = data_available['price'].iloc[0]
        print("House found with the given input combination:")
        print("Location:", location)
        print("Builder:", builder)
        print("House Value: ₹", round(house_value))
        
        # Find the most matching location for predicted house value
        matching_location = data[data['location'] != location]
        matching_location['predicted_value'] = model.predict(matching_location[X_encoded.columns])
        matching_location['matching_score'] = abs(matching_location['predicted_value'] - house_value)
        matching_location = matching_location.sort_values('matching_score').head(1)
        
        print("\nMost Matching Location:")
        print("Location:", matching_location['location'].iloc[0])
        print("Approximate House Value: ₹", round(matching_location['predicted_value'].iloc[0]))
        print("---------------------------------")
        
        # Suggest nearby matching areas with approximate price
        nearby_area_data = data[data['location'] == location]
        nearby_area_data = nearby_area_data.groupby('area').agg({'price': 'mean'}).reset_index()
        nearby_area_data = nearby_area_data.sort_values('price', ascending=False).head(3)
        
        print("\nNearby Matching Areas:")
        for index, row in nearby_area_data.iterrows():
            nearby_area = row['area']
            approximate_price = row['price']
            print("Area:", nearby_area)
            print("Approximate Price: ₹", round(approximate_price))
            print("---------------------------------")
    else:
        # No data available in the dataset for the given input combination
        print("No data available for the given input combination.")
        print("Calculating predicted house value...")
        
        # Predict house value using the trained model
        predicted_value = model.predict(input_encoded)
        print("Predicted House Value: ₹", round(predicted_value[0]))
        
        # Find the most nearby location for the predicted house value
        nearby_location = data.copy()
        nearby_location['predicted_value'] = model.predict(X_encoded)
        nearby_location['difference'] = abs(nearby_location['predicted_value'] - predicted_value)
        nearby_location = nearby_location.sort_values('difference').head(1)
        
        print("\nMost Nearby Location:")
        print("Location:", nearby_location['location'].iloc[0])
        print("---------------------------------")

# Ask for user inputs
area = float(input("Enter the area of the house: "))
status = input("Enter the status of the house (Ready to move or Under Construction): ")
bhk = float(input("Enter the number of BHK (Bedrooms, Hall, Kitchen): "))
bathroom = float(input("Enter the number of bathrooms: "))
age = int(input("Enter the age of the house: "))

# Predict or calculate house value based on input combination
predict_house_value(area, status, bhk, bathroom, age)
