In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

%matplotlib inline

In [None]:
df=pd.read_csv('/kaggle/input/housing-price-prediction/Housing.csv')
df.head()

In [None]:
#data preprocessing
df.isnull().sum()

In [None]:
#convert categorical columns
df=pd.get_dummies(df,drop_first=True)
df.head()
#as the furnishing status is unordered, (no clear rank),label encoding could misleasd
#so, use One-Hot encoding
#to avoid redundancy, drop first col

In [None]:
#define features and target
X=df.drop('price',axis=1)
y=df['price']

In [None]:
#split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#train the Linear Regression Model
model=LinearRegression()
model.fit(X_train,y_train)

In [None]:
y_pred=model.predict(X_test)

mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"R2 Score: {r2}")

In [None]:
#predictions vs actual
plt.figure(figsize=(10,6))
plt.scatter(y_test,y_pred,alpha=0.7)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Actual vs Predicted Prices")
plt.plot([y.min(),y.max()],[y.min(),y.max()],'k--',lw=2)
plt.show()

In [None]:
#coefficients interpretation
coeff=pd.DataFrame(model.coef_,X.columns,columns=['Coefficient'])
print(coeff)