In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import RidgeCV
from LLS_main import LLS

In [3]:
data  = pd.read_csv("HousePrice.csv")
data.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,61666.67
1,60,1,True,True,True,Shahran,1850000000.0,61666.67
2,79,2,True,True,True,Pardis,550000000.0,18333.33
3,95,2,True,True,True,Shahrake Qods,902500000.0,30083.33
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,233333.33


In [4]:
data["Price(USD)_2023"] = data["Price(USD)"].multiply(1.633)
data["Price"] = data["Price"].map('{:,.0f}'.format)

data

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD),Price(USD)_2023
0,63,1,True,True,True,Shahran,1850000000,61666.67,100701.67211
1,60,1,True,True,True,Shahran,1850000000,61666.67,100701.67211
2,79,2,True,True,True,Pardis,550000000,18333.33,29938.32789
3,95,2,True,True,True,Shahrake Qods,902500000,30083.33,49126.07789
4,123,2,True,True,True,Shahrake Gharb,7000000000,233333.33,381033.32789
...,...,...,...,...,...,...,...,...,...
3260,63,1,True,True,False,Feiz Garden,1890000000,63000.00,102879.00000
3261,86,2,True,True,True,Southern Janatabad,3500000000,116666.67,190516.67211
3262,83,2,True,True,True,Niavaran,6800000000,226666.67,370146.67211
3263,105,2,True,True,True,Dorous,5600000000,186666.67,304826.67211


In [5]:
MEH = data.groupby(['Address'])['Price(USD)_2023'].sum().sort_values(ascending=False).head().reset_index()
MEH

Unnamed: 0,Address,Price(USD)_2023
0,Gheitarieh,77050170.0
1,Niavaran,72431060.0
2,Farmanieh,62994880.0
3,Saadat Abad,60266410.0
4,Shahrake Gharb,37211880.0


In [6]:
data.corr()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Price(USD),Price(USD)_2023
Area,1.0,0.670137,0.199765,0.07308,0.047987,0.72099,0.72099
Room,0.670137,1.0,0.279249,0.126244,0.183216,0.569476,0.569476
Parking,0.199765,0.279249,1.0,0.428073,0.429527,0.192993,0.192993
Warehouse,0.07308,0.126244,0.428073,1.0,0.204242,0.109675,0.109675
Elevator,0.047987,0.183216,0.429527,0.204242,1.0,0.110417,0.110417
Price(USD),0.72099,0.569476,0.192993,0.109675,0.110417,1.0,1.0
Price(USD)_2023,0.72099,0.569476,0.192993,0.109675,0.110417,1.0,1.0


In [16]:
X = np.array((data["Area"] , data["Room"] , data["Parking"] , data["Elevator"])).T
Y = np.array(data[["Price(USD)_2023"]])

In [8]:
X_train , X_test , Y_train , Y_test = train_test_split(X,Y , test_size=0.2 , random_state=40)

In [9]:
# train
lls = LLS()
lls.fit(X_train , Y_train)
y_pred = lls.predict(X_test)

In [10]:
MAE_lls = lls.evaluate(X_test , Y_test , "mae")
MSE_lls = lls.evaluate(X_test , Y_test , "mse")
RMSE_lls = lls.evaluate(X_test , Y_test , "rmse")

print("LLS MAE =" , MAE_lls)
print("LLS MSE =" , MSE_lls)
print("LLS RMSE =" , RMSE_lls)

LLS MAE = 161371.087771871
LLS MSE = 92193498222.78737
LLS RMSE = 303633.82259357633


In [11]:
lls_sk = LinearRegression()
lls_sk.fit(X_train , Y_train)
y_pred_sk = lls_sk.predict(X_test)

In [12]:
MAE_linreg = mean_absolute_error(Y_test , y_pred_sk)
MSE_linreg = mean_squared_error(Y_test , y_pred_sk)
RMSE_linreg = np.sqrt(MSE_linreg)

print(MAE_linreg)
print(MSE_linreg)
print(RMSE_linreg)

152796.901678096
84232115103.3494
290227.69527277956


In [13]:
ridge = RidgeCV()
ridge.fit(X_train , Y_train)
Y_pred_ridge = ridge.predict(X_test)

In [14]:
MAE_ridge = mean_absolute_error(Y_test , Y_pred_ridge)
MSE_ridge = mean_squared_error(Y_test , Y_pred_ridge)
RMSE_ridge = np.sqrt(MSE_ridge)

print("RidgeCV MAE =" , MAE_ridge)
print("RidgeCV MSE =" , MSE_ridge)
print("RidgeCV RMSE =" , RMSE_ridge)

RidgeCV MAE = 152564.49227009708
RidgeCV MSE = 84201064451.13292
RidgeCV RMSE = 290174.1967355694
