In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("magic_file.csv")

In [3]:
df.head()

Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [5]:
X = df[['BHK', 'Carpet area (sqft)','Price' ]]
y = df['Price per sqft']

# Spliting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Linear regression
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)

df['Linear Regression Predictions'] = linear_model.predict(scaler.transform(X))
print("_______________________________________________________________________________________")
print("Mean Square Error",mean_squared_error(y_test, linear_model.predict(X_test_scaled)))
print("R2 score",r2_score(y_test, linear_model.predict(X_test_scaled)))
print("_______________________________________________________________________________________")
df.head()

_______________________________________________________________________________________
Mean Square Error 4169455.1334506585
R2 score 0.6964328765461882
_______________________________________________________________________________________


Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK,Linear Regression Predictions
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4,9633.902227
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2,7183.351981
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4,22264.640433
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3,7705.22589
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1,6568.006878


In [7]:
# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

df['Random Forest Predictions'] = rf_model.predict(scaler.transform(X))
print("_______________________________________________________________________________________")
print("Mean Square error",mean_squared_error(y_test, rf_model.predict(X_test_scaled)))
print("R2 score",r2_score(y_test, rf_model.predict(X_test_scaled)))
print("_______________________________________________________________________________________")
df.head()

_______________________________________________________________________________________
Mean Square error 0.0
R2 score 1.0
_______________________________________________________________________________________


Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK,Linear Regression Predictions,Random Forest Predictions
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4,9633.902227,9780.0
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2,7183.351981,7455.0
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4,22264.640433,19569.0
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3,7705.22589,7692.0
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1,6568.006878,7858.0


In [8]:
# Decision Tree
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train_scaled, y_train)
df['Decision Tree Predictions'] = dt_model.predict(scaler.transform(X))
print("_______________________________________________________________________________________")
print("Mean Square error",mean_squared_error(y_test, dt_model.predict(X_test_scaled)))
print("R2 score",r2_score(y_test, dt_model.predict(X_test_scaled)))
print("_______________________________________________________________________________________")
df.head()

_______________________________________________________________________________________
Mean Square error 0.0
R2 score 1.0
_______________________________________________________________________________________


Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK,Linear Regression Predictions,Random Forest Predictions,Decision Tree Predictions
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4,9633.902227,9780.0,9780.0
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2,7183.351981,7455.0,7455.0
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4,22264.640433,19569.0,19569.0
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3,7705.22589,7692.0,7692.0
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1,6568.006878,7858.0,7858.0


In [9]:
# Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train_scaled, y_train)
df['Gradient Boosting Predictions'] = gb_model.predict(scaler.transform(X))
print("_______________________________________________________________________________________")
print("Mean Square error",mean_squared_error(y_test, gb_model.predict(X_test_scaled)))
print("R2 score",r2_score(y_test, gb_model.predict(X_test_scaled)))
print("_______________________________________________________________________________________")
df.head()

_______________________________________________________________________________________
Mean Square error 13791.517153567669
R2 score 0.9989958757064481
_______________________________________________________________________________________


Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK,Linear Regression Predictions,Random Forest Predictions,Decision Tree Predictions,Gradient Boosting Predictions
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4,9633.902227,9780.0,9780.0,9893.922849
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2,7183.351981,7455.0,7455.0,7456.187382
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4,22264.640433,19569.0,19569.0,19513.181441
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3,7705.22589,7692.0,7692.0,7557.195652
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1,6568.006878,7858.0,7858.0,7813.151868


In [10]:
df.to_csv('prediction_test_data.csv', index=False)

In [11]:
pd.read_csv('prediction_test_data.csv').head()

Unnamed: 0,Title,Price,Price per sqft,Carpet area (sqft),Super area (sqft),Location,Project,BHK,Linear Regression Predictions,Random Forest Predictions,Decision Tree Predictions,Gradient Boosting Predictions
0,"4 BHK Flat for Sale in Rahul Arcus, Baner, Pune",26700000.0,9780,2100,0,Baner,Rahul Arcus,4,9633.902227,9780.0,9780.0,9893.922849
1,"2 BHK Flat for Sale in Vishal Leela Heights, W...",8200000.0,7455,786,0,Wakad,Vishal Leela Heights,2,7183.351981,7455.0,7455.0,7456.187382
2,"4 BHK Flat for Sale in Amar Landmark, Baner, Pune",90000000.0,19569,3407,0,Baner,Amar Landmark,4,22264.640433,19569.0,19569.0,19513.181441
3,"3 BHK Flat for Sale in Godrej Hillside, Mahalu...",10000000.0,7692,980,0,Mahalunge,Godrej Hillside,3,7705.22589,7692.0,7692.0,7557.195652
4,"1 BHK Flat for Sale in Paranjape Blue Ridge, H...",4500000.0,7858,440,0,Hinjawadi,Paranjape Blue Ridge,1,6568.006878,7858.0,7858.0,7813.151868
