# Feature Engineering for the Merged Dataset

## 1. load required libraries and CSV File

In [13]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler 
import pandas as pd
import numpy as np
df = pd.read_csv("Merged_Cleaned_Dataset.csv")

## 2. Price per square meter (avoid division by zero)

In [14]:
df['price_per_sqm'] = df['price'] / df['size']
df['price_per_sqm'] = df['price_per_sqm'].replace([np.inf, -np.inf], np.nan)

## 3. Drop rows with missing values in those columns before scaling


In [17]:
numeric_features = ['price', 'size', 'bedrooms', 'bathrooms', 'price_per_sqm']
df_scaled = df.dropna(subset=numeric_features).copy()

## 4. Apply MinMaxScaler

In [18]:
minmax_scaler = MinMaxScaler()
minmax_scaled = pd.DataFrame(
    minmax_scaler.fit_transform(df_scaled[numeric_features]),
    columns=[col + '_minmax' for col in numeric_features],
    index=df_scaled.index
)

## 5. Apply StandardScaler

In [19]:
standard_scaler = StandardScaler()
standard_scaled = pd.DataFrame(
    standard_scaler.fit_transform(df_scaled[numeric_features]),
    columns=[col + '_std' for col in numeric_features],
    index=df_scaled.index
)


## 6. Combine scaled data with original

In [20]:
df_scaled = pd.concat([df_scaled, minmax_scaled, standard_scaled], axis=1)

## 7. Final Preview of the output

In [21]:
df_scaled.head()

Unnamed: 0,property_name,location,bedrooms,bathrooms,price,size,price_per_sqm,price_minmax,size_minmax,bedrooms_minmax,bathrooms_minmax,price_per_sqm_minmax,price_std,size_std,bedrooms_std,bathrooms_std,price_per_sqm_std
0,2-BEDROOM APARTMENT,Al Ansab,2,2.0,300.0,120.0,2.5,0.002041,0.00011,0.166667,0.166667,0.002069,-0.055697,-0.070984,-0.398518,-0.398518,-0.065876
1,2-BEDROOM APARTMENT,Shatti Al Qurum,2,2.0,500.0,120.0,4.166667,0.003421,0.00011,0.166667,0.166667,0.003448,0.023518,-0.070984,-0.398518,-0.398518,0.004288
2,4+1 BEDROOM TWIN VILLA,Madinat Qaboos (MQ),5,5.0,1500.0,120.0,12.5,0.010318,0.00011,0.416667,0.416667,0.010345,0.419594,-0.070984,1.115989,1.115989,0.355111
4,COMMERCIAL OFFICE SPACE,Bausher,2,2.0,4.0,120.0,0.033333,0.0,0.00011,0.166667,0.166667,2.8e-05,-0.172936,-0.070984,-0.398518,-0.398518,-0.16972
5,"2, 3 & 4-BEDROOM COMPOUND VILLAS",Rusayl,3,3.0,650.0,120.0,5.416667,0.004455,0.00011,0.25,0.25,0.004483,0.082929,-0.070984,0.106318,0.106318,0.056912


In [22]:
df.head()

Unnamed: 0,property_name,location,bedrooms,bathrooms,price,size,price_per_sqm
0,2-BEDROOM APARTMENT,Al Ansab,2,2.0,300.0,120.0,2.5
1,2-BEDROOM APARTMENT,Shatti Al Qurum,2,2.0,500.0,120.0,4.166667
2,4+1 BEDROOM TWIN VILLA,Madinat Qaboos (MQ),5,5.0,1500.0,120.0,12.5
3,COMMERCIAL SHOP,Ghala,2,,370.0,39.0,9.487179
4,COMMERCIAL OFFICE SPACE,Bausher,2,2.0,4.0,120.0,0.033333


In [23]:
df.tail()

Unnamed: 0,property_name,location,bedrooms,bathrooms,price,size,price_per_sqm
4489,Room for bachelor,"Azaiba, Muscat•",1,1.0,100.0,10.0,10.0
4490,Sharing room,"Al Ghubrah, Muscat•",2,2.0,60.0,20.0,3.0
4491,شقق وغرف للايجار,"Al Khuwair, Muscat•",1,1.0,200.0,50.0,4.0
4492,شقة للايجار الدور الثاني العامرات… (للتواصل وا...,"Al Amarat, Muscat•",3,3.0,130.0,131.0,0.992366
4493,صلالة صحنوت,"Salalah, Dhofar•",5,5.0,140.0,300.0,0.466667


In [25]:
df.to_csv("Final_Dataset.csv", index=False)