In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load and prepare the data
data = pd.read_csv('House_Data.csv')

# Convert numeric columns to float, replacing non-numeric values with NaN
numeric_columns = ['total_sqft', 'bath', 'balcony', 'price']
for col in numeric_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Remove rows with NaN values
data = data.dropna()

# Separate features and target
X = data.drop('price', axis=1)
y = data['price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the preprocessing for the numeric and categorical features
numeric_features = ['total_sqft', 'bath', 'balcony']
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])


# Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Fit the model
model.fit(X_train, y_train)


# Function to get user input and predict price for a new house
def get_user_input():
    area_type = input("Area Type (e.g., Super built-up Area): ")
    availability = input("Availability (e.g., Ready To Move): ")
    location = input("Location (e.g., Electronic City Phase II): ")
    size = input("Size (e.g., 2 BHK): ")
    society = input("Society: ")
    total_sqft = float(input("Total Square Feet: "))
    bath = float(input("Number of Bathrooms: "))
    balcony = float(input("Number of Balconies: "))
    
    # Create a DataFrame from user input
    user_input = pd.DataFrame({
        'area_type': [area_type],
        'availability': [availability],
        'location': [location],
        'size': [size],
        'society': [society],
        'total_sqft': [total_sqft],
        'bath': [bath],
        'balcony': [balcony]
    })
    
    return user_input

# Get user input for a new house and predict the price
new_house = get_user_input()
predicted_price = model.predict(new_house)
print(f"Predicted Price: {predicted_price[0]:.2f}")

Area Type (e.g., Super built-up Area):  posh
Availability (e.g., Ready To Move):  dec-12
Location (e.g., Electronic City Phase II):  sarjapura
Size (e.g., 2 BHK):  3 BHK
Society:  
Total Square Feet:  2300
Number of Bathrooms:  2
Number of Balconies:  1


Predicted Price: 140.99
