In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

file_path = '/content/drive/MyDrive/MachineLearning Project/Gurgaon_RealEstate.csv'
data = pd.read_csv(file_path)

categorical_features = data.select_dtypes(include='object').columns.tolist()
numerical_features = data.select_dtypes(exclude='object').columns.tolist()

data_copy = data.copy()

imputer = SimpleImputer(strategy='most_frequent')
data_copy[numerical_features] = imputer.fit_transform(data_copy[numerical_features])

encoder = LabelEncoder()
for col in categorical_features:
    data_copy[col] = encoder.fit_transform(data_copy[col].astype(str))

scaler = StandardScaler()
data_copy[numerical_features] = scaler.fit_transform(data_copy[numerical_features])

target = 'price'

X = data_copy.drop(columns=[target])
y = data_copy[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestRegressor(n_estimators=100, max_depth=5)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE = {mse:.4f}, R^2 = {r2:.4f}")

while True:
    choice = input("\nDo you want to predict on new data? (yes/no): ").lower()
    if choice == 'yes':
        new_data = {}
        for feature in X.columns:
            value = input(f"Enter {feature}: ").strip()
            new_data[feature] = [value]

        new_data_df = pd.DataFrame(new_data)

        for col in categorical_features:
            if col in new_data_df.columns:
                new_data_df[col] = new_data_df[col].apply(lambda x: x if x in encoder.classes_ else 'UNKNOWN')
                new_data_df[col] = encoder.transform(new_data_df[col].astype(str))

        for col in numerical_features:
            if col in new_data_df.columns:
                value = new_data_df[col].iloc[0]
                if value == '':
                    new_data_df[col] = np.nan
                else:
                    new_data_df[col] = scaler.transform(np.array([[float(value)]]))

        prediction = model.predict(new_data_df.fillna(data_copy[numerical_features].median()))
        print(f"\nPredicted price: {prediction[0]}")
    elif choice == 'no':
        print("Exiting...")
        break
    else:
        print("Invalid choice. Please enter 'yes' or 'no'.")


MSE = 0.0743, R^2 = 0.9186
