## Importing the Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Importing the Datasets

In [None]:
dataset = pd.read_csv('HousingRentDataset.csv')
dataset.head()

Unnamed: 0,Size,BHK,Bathroom,City,FurnishingStatus,AreaType,Rent
0,1100,2,2,Kolkata,Unfurnished,Super Area,10000
1,800,2,1,Kolkata,Semi-Furnished,Super Area,20000
2,1000,2,1,Kolkata,Semi-Furnished,Super Area,17000
3,800,2,1,Kolkata,Unfurnished,Super Area,10000
4,850,2,1,Kolkata,Unfurnished,Carpet Area,7500


In [None]:
# dataset.isna().sum()
# There is no missing data in this dataset. So, no need to perform imputation.
dataset['Bathroom'].max()

10

In [None]:
X = dataset.iloc[:, :].values
y = dataset.iloc[:, -1].values

In [None]:
print(X)

[[1100 2 2 'Kolkata' 'Unfurnished']
 [800 2 1 'Kolkata' 'Semi-Furnished']
 [1000 2 1 'Kolkata' 'Semi-Furnished']
 ...
 [1750 3 3 'Hyderabad' 'Semi-Furnished']
 [1500 3 2 'Hyderabad' 'Semi-Furnished']
 [1000 2 2 'Hyderabad' 'Unfurnished']]


In [None]:
print(y)

[10000 20000 17000 ... 35000 45000 15000]


## Encoding the Categorical Data

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
transformer = ColumnTransformer(transformers=[('encode', OneHotEncoder(), [3, 4])], remainder='passthrough')
"""There are two method to encode multiple columns at once using ColumnTransformer
1. Using the list of column indices like [3, 4, 5]
2. Using slice method slice(3, 6) """
X = transformer.fit_transform(X)

In [None]:
len(X[0])

12

## Spliting the dataset into Training and Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
X_train

array([[0.0, 0.0, 0.0, ..., 700, 2, 2],
       [0.0, 0.0, 0.0, ..., 650, 2, 2],
       [1.0, 0.0, 0.0, ..., 1200, 3, 2],
       ...,
       [1.0, 0.0, 0.0, ..., 1100, 2, 2],
       [0.0, 0.0, 1.0, ..., 1800, 2, 2],
       [0.0, 0.0, 1.0, ..., 1000, 2, 1]], dtype=object)

In [None]:
X_test

array([[0.0, 0.0, 0.0, ..., 410, 1, 1],
       [0.0, 0.0, 0.0, ..., 1300, 2, 2],
       [0.0, 0.0, 0.0, ..., 900, 2, 2],
       ...,
       [0.0, 0.0, 1.0, ..., 1000, 2, 2],
       [1.0, 0.0, 0.0, ..., 700, 2, 2],
       [0.0, 0.0, 0.0, ..., 1050, 2, 2]], dtype=object)

In [None]:
y_train

array([20000, 38000, 16000, ..., 22000, 30000, 20000])

In [None]:
y_test

array([  50000,   15000,   12000,   13000,   48000,   17000,   18000,
         12000,   14000,   75000,    8500,   60000,    9000,   15000,
         15000,   12000,    8000,   13500,   14500,   12000,   50000,
         20000,   11000,  130000,    7000,   70000,   60000,   46000,
         17000,   30000,    5000,   45000,    8000,   31000,   60000,
         35000,    8500,   18000,   22000,    4500,   18500,   18000,
         15000,    5000,   15000,   35000,   28000,    5000,   20000,
         80000,    7000,  150000,   83000,   10000,   25000,    9000,
         16000,   17000,   20000,   17000,  100000,   14500,   25000,
         15000,   12000,   25000,   18000,   75000,   55000,  200000,
         32000,  150000,  200000,   16000,   10000,   47000,   12000,
         12500,   35000,  150000,   14000,   32000,   40000,   10000,
         10000,    7500,   15000,   15000,    8500,   17000,   15000,
          8000,    9500,    9000,   50000,    3000,   16000,   15000,
          6000,   10

## Feature Scaling

In [None]:
# Reshaping the y data.
y_train = y_train.reshape(len(y_train), 1)

In [None]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [None]:
X_train

array([[-0.482, -0.482, -0.381, ..., -0.415, -0.093,  0.048],
       [-0.482, -0.482, -0.381, ..., -0.495, -0.093,  0.048],
       [ 2.076, -0.482, -0.381, ...,  0.378,  1.122,  0.048],
       ...,
       [ 2.076, -0.482, -0.381, ...,  0.219, -0.093,  0.048],
       [-0.482, -0.482,  2.625, ...,  1.329, -0.093,  0.048],
       [-0.482, -0.482,  2.625, ...,  0.061, -0.093, -1.086]])

In [None]:
y_train

array([[-0.242],
       [ 0.081],
       [-0.313],
       ...,
       [-0.206],
       [-0.062],
       [-0.242]])

## Training the Multiple Linear Regression Model on the training set

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# from sklearn.svm import SVR
# regressor = SVR(kernel='rbf')
# regressor.fit(X_train, y_train)

LinearRegression()

## Predicting the Test Set Results

In [None]:
# X_test = X_test.reshape(len(X_test), 1)
sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))

array([[ 4.651e+04],
       [ 1.797e+04],
       [ 4.891e+03],
       [ 1.405e+03],
       [ 4.934e+04],
       [ 2.951e+04],
       [ 3.823e+04],
       [ 8.160e+03],
       [ 2.298e+04],
       [ 7.462e+04],
       [-6.004e+03],
       [ 8.421e+04],
       [ 7.506e+03],
       [ 3.670e+04],
       [ 7.942e+03],
       [ 1.993e+04],
       [-7.739e+02],
       [ 1.884e+04],
       [ 2.516e+04],
       [ 4.891e+03],
       [ 6.155e+04],
       [ 9.032e+03],
       [ 1.906e+04],
       [ 1.326e+05],
       [-1.428e+03],
       [ 6.743e+04],
       [ 9.532e+04],
       [ 5.653e+04],
       [ 2.385e+04],
       [ 3.736e+04],
       [ 7.506e+03],
       [ 6.939e+04],
       [-1.189e+04],
       [ 7.309e+04],
       [ 7.723e+04],
       [ 3.017e+04],
       [ 1.731e+04],
       [ 2.036e+04],
       [ 2.625e+04],
       [ 5.335e+02],
       [ 1.034e+04],
       [ 8.596e+03],
       [ 3.932e+04],
       [-5.786e+03],
       [ 1.906e+04],
       [ 6.111e+04],
       [ 4.673e+04],
       [ 9.76

In [None]:
y_test

array([  50000,   15000,   12000,   13000,   48000,   17000,   18000,
         12000,   14000,   75000,    8500,   60000,    9000,   15000,
         15000,   12000,    8000,   13500,   14500,   12000,   50000,
         20000,   11000,  130000,    7000,   70000,   60000,   46000,
         17000,   30000,    5000,   45000,    8000,   31000,   60000,
         35000,    8500,   18000,   22000,    4500,   18500,   18000,
         15000,    5000,   15000,   35000,   28000,    5000,   20000,
         80000,    7000,  150000,   83000,   10000,   25000,    9000,
         16000,   17000,   20000,   17000,  100000,   14500,   25000,
         15000,   12000,   25000,   18000,   75000,   55000,  200000,
         32000,  150000,  200000,   16000,   10000,   47000,   12000,
         12500,   35000,  150000,   14000,   32000,   40000,   10000,
         10000,    7500,   15000,   15000,    8500,   17000,   15000,
          8000,    9500,    9000,   50000,    3000,   16000,   15000,
          6000,   10

In [None]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))
np.set_printoptions(precision=3)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[46510.421 50000.   ]
 [17965.502 15000.   ]
 [ 4891.493 12000.   ]
 ...
 [36269.114 14000.   ]
 [17965.502 10000.   ]
 [14043.299 10000.   ]]


## Getting the final Multiple Linear Regression Equations with the values of Coefficients and Intercept.

In [None]:
regressor.intercept_

array([0.023])

In [None]:
regressor.coef_

array([[-2.943e+12, -2.943e+12, -2.504e+12, -2.922e+12, -2.354e+12,
        -3.021e+12, -1.820e+13, -2.617e+13, -2.557e+13,  3.706e-01,
         4.871e-02,  1.865e-01]])

## Calculating the Accuracy of the model.

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.16316856946628355