# Importing Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing Dataset

In [None]:
dataset = pd.read_csv("Housing.csv")
X = dataset.iloc[:, 1: ].values
y = dataset.iloc[:, 0].values

# Checking Dataset for missing values

In [None]:
dataset.isnull().sum()

Unnamed: 0,0
price,0
area,0
bedrooms,0
bathrooms,0
stories,0
mainroad,0
guestroom,0
basement,0
hotwaterheating,0
airconditioning,0


# Encoding Categorical Data

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
label_cols = [4, 5, 6, 7, 8, 10]

for col in label_cols:
    X[:, col] = le.fit_transform(X[:, col])

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [11])], remainder = 'passthrough')
X = np.array(ct.fit_transform(X))

# Splitting the Dataset into Training Set and Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 0 : 1] = sc.fit_transform(X_train[:, 0 : 1])
X_test[:, 0 : 1] = sc.transform(X_test[:, 0 : 1])

# Training the Multiple Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predicting the Values on Test Set

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision = 2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[ 5164653.9   4060000.  ]
 [ 7224722.3   6650000.  ]
 [ 3109863.24  3710000.  ]
 [ 4612075.33  6440000.  ]
 [ 3294646.26  2800000.  ]
 [ 3532275.1   4900000.  ]
 [ 5611774.57  5250000.  ]
 [ 6368145.99  4543000.  ]
 [ 2722856.96  2450000.  ]
 [ 2629405.62  3353000.  ]
 [ 9617039.5  10150000.  ]
 [ 2798087.3   2660000.  ]
 [ 3171096.77  3360000.  ]
 [ 3394639.09  3360000.  ]
 [ 3681088.65  2275000.  ]
 [ 5263187.75  2660000.  ]
 [ 3035963.48  2660000.  ]
 [ 4786122.8   7350000.  ]
 [ 4349551.92  2940000.  ]
 [ 3572362.1   2870000.  ]
 [ 5774875.21  6720000.  ]
 [ 5886993.58  5425000.  ]
 [ 2730836.2   1890000.  ]
 [ 4727316.47  5250000.  ]
 [ 5244847.53  4193000.  ]
 [ 7555324.22 12250000.  ]
 [ 3220790.85  3080000.  ]
 [ 5191898.8   5110000.  ]
 [ 8143726.91  9800000.  ]
 [ 3398814.1   2520000.  ]
 [ 6490693.05  6790000.  ]
 [ 3315105.91  3500000.  ]
 [ 6708457.37  6650000.  ]
 [ 4201738.21  2940000.  ]
 [ 3557571.07  3325000.  ]
 [ 5836974.5   4200000.  ]
 [ 4808660.67  4900000.  ]
 

# Evaluating the model

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mse)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R² Score:", r2)

Mean Squared Error: 1754318687330.7742
Root Mean Squared Error: 1324506.9600914803
R² Score: 0.6529242642152966
