In [10]:
import pandas as pd

data = pd.read_csv("house_price_dataset.csv.csv")
data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [11]:
X = data[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = data['SalePrice']

X.head(), y.head()

(   GrLivArea  BedroomAbvGr  FullBath
 0       1710             3         2
 1       1262             3         2
 2       1786             3         2
 3       1717             3         1
 4       2198             4         2,
 0    208500
 1    181500
 2    223500
 3    140000
 4    250000
 Name: SalePrice, dtype: int64)

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [13]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)

In [16]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

MAE: 35788.061292436294
RMSE: 52975.71771338122
R2 Score: 0.6341189942328371


In [17]:
new_house = [[2000, 3, 2]]  # 2000 sq ft, 3 bedrooms, 2 bathrooms
predicted_price = model.predict(new_house)

print("Predicted House Price:", predicted_price[0])

Predicted House Price: 240377.51479736282




In [18]:
results = pd.DataFrame({
    'Actual_Price': y_test.values,
    'Predicted_Price': y_pred
})

results.head(10)


Unnamed: 0,Actual_Price,Predicted_Price
0,154500,113410.672553
1,325000,305081.877759
2,115000,135904.78563
3,159000,205424.675641
4,315500,227502.68349
5,75500,121157.480796
6,311500,205577.980566
7,146000,183787.203783
8,84500,121157.480796
9,135500,147219.222332


In [20]:
# Take input from user
sqft = float(input("Enter house area in square feet: "))
bedrooms = int(input("Enter number of bedrooms: "))
bathrooms = int(input("Enter number of bathrooms: "))

# Create DataFrame with correct feature names
user_house = pd.DataFrame(
    [[sqft, bedrooms, bathrooms]],
    columns=['GrLivArea', 'BedroomAbvGr', 'FullBath']
)

# Predict price
predicted_price = model.predict(user_house)

print("\nPredicted House Price:", round(predicted_price[0], 2))



Enter house area in square feet: 15000
Enter number of bedrooms: 3
Enter number of bathrooms: 2

Predicted House Price: 1592719.51
