In [56]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



In [32]:
data = pd.read_csv('/content/Housing (1).csv')
data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [33]:
data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [34]:
print(data.head())

      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


In [35]:
x = data.drop(columns=['price'])
y = data['price']

In [36]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [37]:
numerical_cols = x.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = x.select_dtypes(include=['object']).columns
#

In [38]:
numerical_transformer = StandardScaler()


In [39]:
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


In [40]:
preprocessar = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [65]:
from sklearn.pipeline import Pipeline

In [67]:
model = Pipeline(steps=[('preprocessor', preprocessar),
                      ('regressor', LinearRegression())])

In [69]:
model.fit(x_train, y_train)

In [70]:
y_pred = model.predict(x_test)

In [76]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'R-squared: {r2}')

Mean Squared Error: 1754318687330.6682
Root Mean Squared Error: 1324506.9600914402
R-squared: 0.6529242642153175


In [79]:
regressor = model.named_steps['regressor']
if hasattr(regressor, 'coef_'):
    feature_names = list(numerical_cols) + list(model.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out())
    coefficients = regressor.coef_
    feature_importance = pd.Series(coefficients, index=feature_names)
    feature_importance = feature_importance.sort_values(ascending=False)
    print("Feature Importance:")
    print(feature_importance)


Feature Importance:
bathrooms                          521879.027748
area                               519552.416340
airconditioning_yes                395713.367922
stories                            349251.438906
hotwaterheating_yes                342324.942669
prefarea_yes                       314945.282657
basement_yes                       195125.588091
parking                            192005.953667
mainroad_yes                       183959.973840
furnishingstatus_furnished         180175.626604
guestroom_yes                      115805.018588
bedrooms                            57349.559419
furnishingstatus_semi-furnished     53293.809019
guestroom_no                      -115805.018588
mainroad_no                       -183959.973840
basement_no                       -195125.588091
furnishingstatus_unfurnished      -233469.435624
prefarea_no                       -314945.282657
hotwaterheating_no                -342324.942669
airconditioning_no                -395713.367922
