# Loading DataSet

In [117]:
import pandas as pd

In [118]:
df = pd.read_csv('All_CSV/Housing.csv')

In [119]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


# Splitting 

In [120]:
from sklearn.model_selection import train_test_split as split 

In [121]:
train_x, test_x, train_y, test_y = split(df.drop(columns=['price']), df['price'], test_size=0.2)

In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [123]:
columns = ['mainroad', 'guestroom', 'basement',  'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']

# Encoding

In [124]:
from sklearn.preprocessing import OneHotEncoder 
from sklearn.compose import ColumnTransformer 

In [125]:
transformer = ColumnTransformer(transformers=[
    ('encode', OneHotEncoder(sparse_output=False, drop='first'), columns)
], remainder='passthrough')

In [126]:
train_x = transformer.fit_transform(train_x)
test_x = transformer.transform(test_x)

# Algorithm

In [135]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor 
from sklearn.linear_model import Ridge

In [136]:
model_linear_regression = LinearRegression()
model_ridge_regression = Ridge(alpha=0.2)
model_decision_regressor = DecisionTreeRegressor(max_depth=4)
model_random_forest = RandomForestRegressor(max_depth=5)

In [137]:
model_linear_regression.fit(train_x, train_y)
model_decision_regressor.fit(train_x, train_y)
model_ridge_regression.fit(train_x, train_y)
model_random_forest.fit(train_x, train_y)

# Prediction

In [138]:
predict = model_linear_regression.predict(test_x)
predict_ridge = model_ridge_regression.predict(test_x)
predict_decision = model_decision_regressor.predict(test_x)
predict_random_forest = model_random_forest.predict(test_x)

In [131]:
from sklearn.metrics import r2_score as score

In [132]:
score(test_y, predict)

0.6965931722317967

In [133]:
score(test_y, predict_ridge)

0.6964963863972747

In [134]:
score(test_y, predict_decision)

0.5947074864626662

In [139]:
score(test_y, predict_random_forest)

0.6542979913999749