In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split





In [2]:
df = pd.read_csv(r'C:\Users\Krishna\Documents\codes\House Price Prediction Mumbai\data\mumbai_cleaned.csv')

In [3]:
df.head()

Unnamed: 0,price,area,Bedrooms,Bathrooms,Furnished_status
0,22400000.0,629.0,2,2,1
1,35000000.0,974.0,3,2,1
2,31700000.0,968.0,3,3,1
3,18700000.0,629.0,2,2,1
4,13500000.0,1090.0,2,2,1


In [4]:
# Separate the target variable
target = df['price']

# Separate the other features
features = df.drop('price', axis=1)
print(features.columns)

Index(['area', 'Bedrooms', 'Bathrooms', 'Furnished_status'], dtype='object')


In [5]:


# Split the columns into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.17, random_state=42)

# Print the shapes of the training and test sets
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)


Training set shape: (4943, 4)
Test set shape: (1013, 4)


In [6]:

def linear_regression(X_train, X_test, y_train, y_test, X_pred):
    # Create a linear regression model
    model = LinearRegression()
    
    # Fit the model on the training data
    model.fit(X_train, y_train)
    
    # Predict the target variable for the test data
    y_pred = model.predict(X_test)
    
    # Predict the target variable for the user-provided data
    y_pred_user = model.predict(X_pred)
    
    mse = r2_score(y_test, y_pred)
    
    
    
    return mse , y_pred_user


In [7]:


def decision_tree_regressor(X_train, X_test, y_train, y_test, X_pred):
    # Define the hyperparameters grid
    param_grid = {
        'max_depth': [None, 5, 10, 15],
        'min_samples_split': [2, 5, 10],  
        'min_samples_leaf': [1, 2, 4]  
    }

    # Create a decision tree regressor model
    dt_regressor = DecisionTreeRegressor()

    # Create GridSearchCV object
    grid_search = GridSearchCV(estimator=dt_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

    # Fit the model on the training data with hyperparameter tuning
    grid_search.fit(X_train, y_train)

    # Get the best parameters and the best estimator
    best_regressor = grid_search.best_estimator_

    # Predict the target variable for the test data
    y_pred = best_regressor.predict(X_test)
    
    # Predict the target variable for the user-provided data
    y_pred_user = best_regressor.predict(X_pred)

    # Calculate Mean Squared Error for test data
    mse = r2_score(y_test, y_pred)

    return mse, y_pred_user


In [8]:

def random_forest_regressor(X_train, X_test, y_train, y_test, X_pred):
    # Create a random forest regressor model
    model = RandomForestRegressor()
    
    # Fit the model on the training data
    model.fit(X_train, y_train)
    
    # Predict the target variable for the test data
    y_pred = model.predict(X_test)
    
    # Predict the target variable for the user-provided data
    y_pred_user = model.predict(X_pred)
    
    mse = r2_score(y_test, y_pred)
    
    return mse, y_pred_user


In [9]:

def support_vector_regressor(X_train, X_test, y_train, y_test, X_pred):
    # Define the hyperparameters grid
    param_grid = {
        'kernel': ['linear', 'poly', 'rbf']
    }

    # Create a support vector regressor model
    model = SVR()

    # Create GridSearchCV object
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

    # Fit the model on the training data with hyperparameter tuning
    grid_search.fit(X_train, y_train)

    # Get the best parameters and the best estimator
    best_regressor = grid_search.best_estimator_

    # Predict the target variable for the test data
    y_pred = best_regressor.predict(X_test)

    # Predict the target variable for the user-provided data
    y_pred_user = best_regressor.predict(X_pred)

    mse = r2_score(y_test, y_pred)

    return mse, y_pred_user
