# **Pakistan House Price Prediction**
---

## **Importing Data**

In [3]:
# importing data and libraries
import numpy as np
import pandas as pd

# disable warnings
import warnings
warnings. filterwarnings('ignore')

In [4]:
# reading data
df = pd.read_csv('house_prices.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,Area_in_Marla
0,0,Flat,10000000,G-10,Islamabad,2,For Sale,2,4.0
1,1,Flat,6900000,E-11,Islamabad,3,For Sale,3,5.6
2,2,House,16500000,G-15,Islamabad,6,For Sale,5,8.0
3,3,House,43500000,Bani Gala,Islamabad,4,For Sale,4,40.0
4,4,House,7000000,DHA Defence,Islamabad,3,For Sale,3,8.0


## **Data Cleaning and Preprocessing**

In [5]:
# dropping unecessary columns
df.drop(["Unnamed: 0"], axis = 1, inplace = True)

### **Checking for null values**

In [6]:
df.isna().sum()

Unnamed: 0,0
property_type,0
price,0
location,0
city,0
baths,0
purpose,0
bedrooms,0
Area_in_Marla,0


### **Dropping duplicate values**

In [7]:
df = df.drop_duplicates().reset_index(drop = True)

### **Feature Engineering**

In [8]:
# converting marla to area in sq ft
df['area'] = df['Area_in_Marla'] * 272.25
df.drop('Area_in_Marla',axis=1, inplace = True)

df.head()

Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,area
0,Flat,10000000,G-10,Islamabad,2,For Sale,2,1089.0
1,Flat,6900000,E-11,Islamabad,3,For Sale,3,1524.6
2,House,16500000,G-15,Islamabad,6,For Sale,5,2178.0
3,House,43500000,Bani Gala,Islamabad,4,For Sale,4,10890.0
4,House,7000000,DHA Defence,Islamabad,3,For Sale,3,2178.0


In [9]:
# re-arranging features
df = df[["property_type", "location", "city", "purpose", "baths", "bedrooms", "area", "price"]]
df.columns = ["type", "location", "city", "purpose", "baths", "beds", "area", "price"]

df.head()

Unnamed: 0,type,location,city,purpose,baths,beds,area,price
0,Flat,G-10,Islamabad,For Sale,2,2,1089.0,10000000
1,Flat,E-11,Islamabad,For Sale,3,3,1524.6,6900000
2,House,G-15,Islamabad,For Sale,6,5,2178.0,16500000
3,House,Bani Gala,Islamabad,For Sale,4,4,10890.0,43500000
4,House,DHA Defence,Islamabad,For Sale,3,3,2178.0,7000000


### **Categorizing Features**

In [10]:
# categorical columns
cat_cols = ["type", "location", "city", "purpose"]

# numerical columns
num_cols = ["area", "baths", "beds"]

In [11]:
from sklearn.preprocessing import LabelEncoder

# initialize label encoder
encoder = LabelEncoder()

# apply label encoder to each column individually
for column in df[cat_cols]:
    df[column] = encoder.fit_transform(df[column])

df.head()

Unnamed: 0,type,location,city,purpose,baths,beds,area,price
0,1,452,1,1,2,2,1089.0,10000000
1,1,382,1,1,3,3,1524.6,6900000
2,2,457,1,1,6,5,2178.0,16500000
3,2,198,1,1,4,4,10890.0,43500000
4,2,327,1,1,3,3,2178.0,7000000


In [12]:
from sklearn.preprocessing import StandardScaler

# initialize the standard scaler
scaler = StandardScaler()

# apply the standard scaler to each column individually
for column in num_cols:
    # reshape the column to a 2D array
    df[column] = scaler.fit_transform(df[[column]])

# display the first few rows of the dataframe
df.head()

Unnamed: 0,type,location,city,purpose,baths,beds,area,price
0,1,452,1,1,-0.980008,-1.023639,-0.563772,10000000
1,1,382,1,1,-0.305714,-0.25647,-0.367333,6900000
2,2,457,1,1,1.717169,1.277868,-0.072673,16500000
3,2,198,1,1,0.36858,0.510699,3.856117,43500000
4,2,327,1,1,-0.305714,-0.25647,-0.072673,7000000


### **Splitting Data into Feature and Target Variable**

In [13]:
X = df.drop('price', axis = 1)
y = df['price']

## **Machine Learning**

### **Splitting Data into Test and Train**

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

### **1. Decision Tree Regressor**

In [15]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Train the DecisionTreeRegressor model
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_model.predict(X_test)

# Evaluate the model
mae_dt = mean_absolute_error(y_test, y_pred)
mse_dt = mean_squared_error(y_test, y_pred)
rmse_dt = np.sqrt(mse_dt)
r2_dt = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae_dt}')
print(f'Mean Squared Error: {mse_dt}')
print(f'Root Mean Squared Error: {rmse_dt}')
print(f'R-squared: {r2_dt}')

Mean Absolute Error: 2264026.1122815553
Mean Squared Error: 19021607635927.957
Root Mean Squared Error: 4361376.805084371
R-squared: 0.8248913036534149


### **2. Random Forest Regressor**

In [16]:
from sklearn.ensemble import RandomForestRegressor

# Train the RandomForestRegressor model
rfr_model = RandomForestRegressor(random_state=42)
rfr_model.fit(X_train, y_train)

# Predict on the test set
y_pred = rfr_model.predict(X_test)

# Evaluate the model
mae_rfr = mean_absolute_error(y_test, y_pred)
mse_rfr = mean_squared_error(y_test, y_pred)
rmse_rfr = np.sqrt(mse_rfr)
r2_rfr = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae_rfr}')
print(f'Mean Squared Error: {mse_rfr}')
print(f'Root Mean Squared Error: {rmse_rfr}')
print(f'R-squared: {r2_rfr}')

Mean Absolute Error: 2020928.1178923105
Mean Squared Error: 13856885858336.379
Root Mean Squared Error: 3722483.829157137
R-squared: 0.8724365855652694


### **3. Gradient Boosting Regressor**

In [17]:
from sklearn.ensemble import GradientBoostingRegressor

# Train the GradientBoostingRegressor model
gbr_model = GradientBoostingRegressor(random_state=42)
gbr_model.fit(X_train, y_train)

# Predict on the test set
y_pred_gbr = gbr_model.predict(X_test)

# Evaluate the model
mae_gbr = mean_absolute_error(y_test, y_pred_gbr)
mse_gbr = mean_squared_error(y_test, y_pred_gbr)
rmse_gbr = np.sqrt(mse_gbr)
r2_gbr = r2_score(y_test, y_pred_gbr)

print(f'Gradient Boosting Regressor Results:')
print(f'Mean Absolute Error: {mae_gbr}')
print(f'Mean Squared Error: {mse_gbr}')
print(f'Root Mean Squared Error: {rmse_gbr}')
print(f'R-squared: {r2_gbr}')

Gradient Boosting Regressor Results:
Mean Absolute Error: 2845897.956820398
Mean Squared Error: 21277327403593.613
Root Mean Squared Error: 4612735.349398837
R-squared: 0.8041256483313544


### **4. XGB Regressor**

In [18]:
import xgboost as xgb

# Train the XGBoost Regressor model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
xgb_model.fit(X_train, y_train)

# Predict on the test set
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate the model
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f'XGBoost Regressor Results:')
print(f'Mean Absolute Error: {mae_xgb}')
print(f'Mean Squared Error: {mse_xgb}')
print(f'Root Mean Squared Error: {rmse_xgb}')
print(f'R-squared: {r2_xgb}')

XGBoost Regressor Results:
Mean Absolute Error: 2171751.5
Mean Squared Error: 13702086524928.0
Root Mean Squared Error: 3701632.953836455
R-squared: 0.8738616704940796


# **Testing with Example**

In [19]:
test = X_test.iloc[211].values.reshape(1, -1)

# printing predicted fares
print(f'House Price predicted with Decision Tree Regressor: {(xgb_model.predict(test)).astype(int)}')
print(f'House Price predicted with Random Forest Regressor: {(rfr_model.predict(test)).astype(int)}')
print(f'House Price predicted with Gradient Boost Regressor: {(gbr_model.predict(test)).astype(int)}')
print(f'House Price predicted with XGBoost Regressor: {(xgb_model.predict(test)).astype(int)}')
print('--------------------------------------------------------------')
print(f'The actual House Price is: {y_test.iloc[211]}')

House Price predicted with Decision Tree Regressor: [47284]
House Price predicted with Random Forest Regressor: [26521]
House Price predicted with Gradient Boost Regressor: [-883369]
House Price predicted with XGBoost Regressor: [47284]
--------------------------------------------------------------
The actual House Price is: 32000


In [20]:
print(f'The property price in the desired location will range from Rs. {y_test.iloc[211] - 0.13*y_test.iloc[211]} to Rs. {y_test.iloc[211] + 0.13*y_test.iloc[211]}')

The property price in the desired location will range from Rs. 27840.0 to Rs. 36160.0


In [23]:
import pickle

# Save the XGBoost model
with open("model.pkl", "wb") as f:
    pickle.dump(xgb_model, f)

print("✅ Model saved as model.pkl")


✅ Model saved as model.pkl
