In [89]:
import pandas as pd

In [90]:
df = pd.read_csv("housePrice.csv")

In [91]:
df.sample(5)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
1176,90,2,True,True,True,Qalandari,6560000000.0,218666.67
3195,75,2,False,False,False,Parand,395000000.0,13166.67
2171,3600,2,False,False,False,Shahryar,9720000000.0,324000.0
3056,270,4,True,True,False,Jordan,13000000000.0,433333.33
979,145,3,True,True,True,Punak,8990000000.0,299666.67


In [92]:
df.isnull().sum()

Area           0
Room           0
Parking        0
Warehouse      0
Elevator       0
Address       23
Price          0
Price(USD)     0
dtype: int64

In [93]:
df.drop(["Address", "Price"], axis="columns", inplace=True)
df.sample(5)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Price(USD)
2027,56,2,True,True,True,50000.0
409,48,1,False,True,False,32666.67
103,40,0,False,False,False,8266.67
1271,200,3,True,True,True,166666.67
2252,60,2,False,True,False,21666.67


In [94]:
df.isnull().sum()

Area          0
Room          0
Parking       0
Warehouse     0
Elevator      0
Price(USD)    0
dtype: int64

In [95]:
df["Parking"] = df["Parking"].astype(int)
df["Warehouse"] = df["Warehouse"].astype(int)
df["Elevator"] = df["Elevator"].astype(int)
df.sample(5)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Price(USD)
2752,110,2,1,1,1,168333.33
1705,47,1,1,1,1,70000.0
1820,100,2,1,1,1,93333.33
688,145,4,1,1,1,170000.0
1583,66,2,1,0,1,52666.67


In [96]:
df.columns

Index(['Area', 'Room', 'Parking', 'Warehouse', 'Elevator', 'Price(USD)'], dtype='object')

In [97]:
for i in df.columns:
    print(f'{i}, {df[i].isnull().sum()}, {df[i].dtype}')

Area, 0, object
Room, 0, int64
Parking, 0, int64
Warehouse, 0, int64
Elevator, 0, int64
Price(USD), 0, float64


In [98]:
df["Area"] = pd.to_numeric(df["Area"], errors="coerce")

In [99]:
df.sample(5)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Price(USD)
2603,57.0,2,0,0,0,36333.33
942,77.0,2,1,1,1,71666.67
361,76.0,2,1,1,0,85666.67
2671,65.0,2,1,1,1,83333.33
502,90.0,2,1,1,1,15166.67


In [100]:
df.dropna(inplace=True)

In [101]:
x = df[["Area", "Room", "Parking", "Warehouse", "Elevator"]]
y = df["Price(USD)"]

In [102]:
x.sample(5)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator
2210,75.0,2,1,1,1
784,78.0,2,1,1,0
3080,64.0,2,0,1,0
155,65.0,2,1,1,1
2630,66.0,2,0,0,0


In [103]:
y.sample(5)

2806     30000.00
1164    146666.67
2924    266666.67
1532     18666.67
1518      8333.33
Name: Price(USD), dtype: float64

In [104]:
from sklearn.model_selection import train_test_split

In [105]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=1)

In [106]:
from sklearn.linear_model import LinearRegression

In [107]:
lr = LinearRegression()

In [108]:
lr.fit(xtrain, ytrain)

In [109]:
lr.score(xtest, ytest)

0.569281513282758

In [110]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge

In [111]:
lr = LinearRegression()
llr = Lasso(max_iter=10000000)
rlr = Ridge(max_iter=10000000)

In [112]:
from sklearn.model_selection import GridSearchCV

In [113]:
model = {
    'lr': {
        'model': LinearRegression(), 
        'params': {
            'normalize': [True, False]
        }
    }, 
    'llr': {
        'model': Lasso(), 
        'params': {
            'max_iter': [10000000], 
            'alpha': [0.5, 2.0], 
            'selection': ['cyclic', 'random']
        }
    }, 
    'rlr': {
        'model': Ridge(), 
        'params': {
            'max_iter': [10000000], 
            'alpha': [0.5, 2.0], 
            'solver' : ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
        }
    }
}

In [114]:
from sklearn.model_selection import ShuffleSplit, cross_val_score

In [115]:
ss = ShuffleSplit(n_splits=5, test_size=0.2, random_state=1)

In [116]:
cross_val_score(lr, x, y, cv=ss)

array([0.56616734, 0.61659496, 0.36338252, 0.53269317, 0.53432509])

In [117]:
cross_val_score(llr, x, y, cv=ss)

array([0.56616789, 0.61659585, 0.36337955, 0.53269367, 0.53432696])

In [118]:
cross_val_score(rlr, x, y, cv=ss)

array([0.56618281, 0.61662327, 0.36335385, 0.53270015, 0.5343466 ])

In [119]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
dtr = DecisionTreeRegressor()
rfr = RandomForestRegressor()
svr = SVR()
knr = KNeighborsRegressor()

In [120]:
cross_val_score(dtr, x, y, cv=ss)

array([0.42094532, 0.62084157, 0.56889559, 0.4531208 , 0.7015515 ])

In [121]:
cross_val_score(rfr, x, y, cv=ss)

array([0.564171  , 0.66011314, 0.50393126, 0.55976485, 0.66977199])

In [122]:
cross_val_score(svr, x, y, cv=ss)

array([-0.09102954, -0.07048212, -0.09588801, -0.08515065, -0.11036035])

In [123]:
cross_val_score(knr, x, y, cv=ss)

array([0.54700487, 0.56207886, 0.3888361 , 0.5514604 , 0.64728184])

In [124]:
dtr.fit(xtrain, ytrain)

In [125]:
dtr.score(xtest, ytest)

0.4643338729490296

In [126]:
llr.fit(xtrain, ytrain)

In [127]:
llr.score(xtest, ytest)

0.5692824618235701

In [128]:
rfr.fit(xtrain, ytrain)

In [129]:
rfr.score(xtest, ytest)

0.5944885101057877