In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
# Load the dataset
data = pd.read_csv('house_prices.csv')

In [3]:
data.head()

Unnamed: 0,HouseID,Location,Size_sqft,Bedrooms,Bathrooms,YearBuilt,HasGarage,HasGarden,ProximityToCityCenter_miles,Price
0,H001,Suburban,2000,3,2,1995,Yes,Yes,5,350000
1,H002,Urban,850,2,1,2010,No,No,1,220000
2,H003,Rural,2500,4,3,1985,Yes,Yes,20,275000
3,H004,Suburban,1500,3,2,2000,Yes,No,8,300000
4,H005,Urban,1200,2,2,2015,No,Yes,3,320000


In [4]:
# Preprocess the data
# Encode categorical variables (Location, HasGarage, HasGarden)
label_encoder = LabelEncoder()
data['Location'] = label_encoder.fit_transform(data['Location'])  # Urban=2, Suburban=1, Rural=0
data['HasGarage'] = label_encoder.fit_transform(data['HasGarage'])  # Yes=1, No=0
data['HasGarden'] = label_encoder.fit_transform(data['HasGarden'])  # Yes=1, No=0

In [5]:
# Calculate house age
data['HouseAge'] = 2024 - data['YearBuilt']

In [6]:
# Drop unnecessary columns
data = data.drop(columns=['HouseID', 'YearBuilt'])

In [7]:
data.head(3)

Unnamed: 0,Location,Size_sqft,Bedrooms,Bathrooms,HasGarage,HasGarden,ProximityToCityCenter_miles,Price,HouseAge
0,1,2000,3,2,1,1,5,350000,29
1,2,850,2,1,0,0,1,220000,14
2,0,2500,4,3,1,1,20,275000,39


In [8]:
# Split the data into training and testing sets
X = data.drop('Price', axis=1)
y = data['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
# Train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [10]:
# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [11]:
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Absolute Error: 29800.609560888628
Mean Squared Error: 1248643071.6132915
R-squared: 0.6019380068357008


In [12]:
# Predict the Price of a new house
new_house = pd.DataFrame({
    'Location': [1],  # Suburban
    'Size_sqft': [1800],
    'Bedrooms': [3],
    'Bathrooms': [2],
    'HasGarage': [1],  # Yes
    'HasGarden': [1],  # Yes
    'ProximityToCityCenter_miles': [7],
    'HouseAge': [27]  # 2024 - 1997
})

In [15]:
predicted_price = model.predict(new_house)
print(f'Predicted House Price: ${predicted_price[0]:,.2f}')

Predicted House Price: $308,092.12
