In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [3]:
from google.colab import files
# Upload the CSV file
uploaded = files.upload()

Saving House_Price_Prediction.csv to House_Price_Prediction.csv


In [4]:
# Load the uploaded CSV file into a DataFrame
df = pd.read_csv(list(uploaded.keys())[0])

# Show the first few rows of the dataset to verify
df.head()

Unnamed: 0,Bedroom,Hall,Kitchen,Bathrooms,Balcony,Size (sq ft),Location,Price
0,3,1,1,2,1,1292,Rural,350000
1,4,1,1,2,1,1614,Urban,500000
2,2,1,1,1,1,968,Suburban,220000
3,5,2,1,3,1,2152,Rural,650000
4,3,1,1,2,1,1184,Urban,380000


In [5]:
# Separate Features and Target
X = df.drop('Price', axis=1)  # Drop target column 'Price'
y = df['Price']  # Target column

In [6]:
# Preprocess Features: Apply One-Hot Encoding to 'Location' and Standard Scaling
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Bedroom', 'Hall', 'Kitchen', 'Bathrooms', 'Balcony', 'Size (sq ft)']),
        ('cat', OneHotEncoder(), ['Location'])
    ])


In [7]:
# Apply transformations
X_processed = preprocessor.fit_transform(X)

In [8]:
# Step 3: Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [9]:
# Step 4: Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

In [10]:
# Step 5: Make predictions on the test set
y_pred = model.predict(X_test)

In [11]:
# Step 6: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [12]:
# Print results
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 1371457635.3090575
R-squared: 0.9080220029014285


In [16]:
# Step 7: User Input for New House
print("Please enter the details of the new house:")

# User input for house features
bedroom = int(input("Enter number of bedrooms: "))
hall = int(input("Enter number of halls: "))
kitchen = int(input("Enter number of kitchens: "))
bathrooms = int(input("Enter number of bathrooms: "))
balcony = int(input("Enter number of balconies: "))
size_sqft = int(input("Enter size of the house in square feet: "))
location = input("Enter location (Rural, Urban, Suburban): ")

# Create a DataFrame with the user input
new_house = pd.DataFrame([[bedroom, hall, kitchen, bathrooms, balcony, size_sqft, location]],
                         columns=['Bedroom', 'Hall', 'Kitchen', 'Bathrooms', 'Balcony', 'Size (sq ft)', 'Location'])

# Process the new house data
new_house_processed = preprocessor.transform(new_house)

# Predict the price for the new house
predicted_price = model.predict(new_house_processed)

# Display the predicted price
print(f"Predicted Price for the new house: ${predicted_price[0]:,.2f}")


Please enter the details of the new house:
Enter number of bedrooms: 2
Enter number of halls: 1
Enter number of kitchens: 1
Enter number of bathrooms: 2
Enter number of balconies: 2
Enter size of the house in square feet: 1200
Enter location (Rural, Urban, Suburban): Rural
Predicted Price for the new house: $345,539.10


In [18]:
import pickle

# Save the model to a file
with open('house_price_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Save the preprocessor (scaler and encoder)
with open('preprocessor.pkl', 'wb') as vectorizer_file:
    pickle.dump(preprocessor, vectorizer_file)

print("\nModel and preprocessor saved successfully!")



Model and preprocessor saved successfully!
