**Import necessary Libraries**

In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pickle as pkl

**Initialize RandomForestRegressor**

In [2]:
rfc = RandomForestRegressor()

**Load the Dataset**

In [3]:
data = pd.read_excel("Cleaned-Delhi-Prices.xlsx")

**Encode 'Location' column**

In [4]:
le = LabelEncoder()
data['Location'] = le.fit_transform(data['Location'])

**Log-transform 'Price' column**

In [5]:
data['Price'] = np.log(data['Price'])

**Split into `features (x)` and `target variable (y)`**

In [6]:
x = data.drop('Price', axis='columns')
y = data['Price']

**Handle outliers in the 'Area' column**

In [7]:
q1 = x['Area'].quantile(0.25)
q3 = x['Area'].quantile(0.75)
iqr = q3 - q1
u = q3 + 1.5 * iqr
l = q1 - 1.5 * iqr
out1 = x[x['Area'] < l].values
out2 = x[x['Area'] > u].values
x['Area'].replace(out1, l, inplace=True)
x['Area'].replace(out2, u, inplace=True)

**Handle outliers in the 'Price' column**

In [8]:
q1 = y.quantile(0.25)
q3 = y.quantile(0.75)
iqr = q3 - q1
u = q3 + 1.5 * iqr
l = q1 - 1.5 * iqr
out1 = y[y < l].values
out2 = y[y > u].values
y.replace(out1, l, inplace=True)
y.replace(out2, u, inplace=True)

**Split the data into Training and Testing sets**

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, train_size=0.3)

**Train the RandomForestRegressor model**

In [10]:
rfc.fit(x_train, y_train)

**Make predictions on the Test Set**

In [11]:
y_pred = rfc.predict(x_test)

**Evaluate the Model**

In [12]:
print("R-squared Score:", r2_score(y_test, y_pred))

R-squared Score: 0.3844963003651065


**Save the Trained Model to Pickle File**

In [13]:
pkl.dump(rfc, open('model.pkl', 'wb'))