In [46]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2

In [6]:
house_price_dataset = pd.read_csv('/content/BostonHousing.csv')

In [7]:
house_price_dataset.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,price
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [9]:
house_price_dataset.shape

(506, 14)

In [10]:
house_price_dataset.isnull().sum()

Unnamed: 0,0
crim,0
zn,0
indus,0
chas,0
nox,0
rm,0
age,0
dis,0
rad,0
tax,0


In [8]:
house_price_dataset.describe()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,price
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [12]:
X = house_price_dataset.drop(['price'] , axis=1)
Y = house_price_dataset['price']

In [15]:
print(X)
print(X.shape)

        crim    zn  indus  chas    nox     rm   age     dis  rad  tax  \
0    0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296   
1    0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242   
2    0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242   
3    0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222   
4    0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222   
..       ...   ...    ...   ...    ...    ...   ...     ...  ...  ...   
501  0.06263   0.0  11.93     0  0.573  6.593  69.1  2.4786    1  273   
502  0.04527   0.0  11.93     0  0.573  6.120  76.7  2.2875    1  273   
503  0.06076   0.0  11.93     0  0.573  6.976  91.0  2.1675    1  273   
504  0.10959   0.0  11.93     0  0.573  6.794  89.3  2.3889    1  273   
505  0.04741   0.0  11.93     0  0.573  6.030  80.8  2.5050    1  273   

     ptratio       b  lstat  
0       15.3  396.90   4.98  
1       17.8  396.90   9.14  
2       17.8  392.83   4.03  
3  

In [17]:
print(Y)

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: price, Length: 506, dtype: float64


In [28]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [29]:
print(type(X_train))
print(type(X_test))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [31]:
scaler = StandardScaler()
training_scaled = scaler.fit_transform(X_train)
testing_scaled = scaler.transform(X_test)

In [37]:
print(training_scaled)
print("\n-----------------*-----------------\n")
print(testing_scaled)

[[-0.37743861 -0.47456976 -0.5837312  ...  0.52359086  0.45391267
  -0.83410575]
 [-0.38918729 -0.47456976 -0.78968387 ...  0.33155209  0.45391267
  -0.56340806]
 [-0.39619249  3.16608882 -1.39883965 ... -0.14854484 -0.13442027
   0.02927739]
 ...
 [-0.3783975  -0.47456976 -0.24869553 ...  0.33155209  0.45391267
  -0.10179728]
 [-0.3258116  -0.47456976 -0.47350302 ...  1.19572656  0.44029484
  -0.6061498 ]
 [-0.13206441 -0.47456976  1.18572129 ... -1.82888409 -0.60795914
  -0.23144722]]

-----------------*-----------------

[[ 0.27566504 -0.47456976  0.97106639 ...  0.81164902  0.42114476
  -0.34684992]
 [-0.26800163 -0.47456976  1.18572129 ... -1.82888409 -0.11654687
  -0.08612531]
 [-0.38933552  3.16608882 -0.93617133 ...  0.33155209  0.45391267
  -1.3056896 ]
 ...
 [-0.2964127  -0.47456976  1.52075696 ...  1.29174595  0.45391267
   0.80005342]
 [-0.39680164  2.25592418 -1.22914625 ... -1.39679686  0.23900624
  -1.18886218]
 [-0.37905876 -0.47456976 -0.5837312  ...  0.52359086  0.453

In [38]:
model = LinearRegression()
model.fit(training_scaled, Y_train)

In [51]:
training_prediction = model.predict(training_scaled)
testing_prediction = model.predict(testing_scaled)

In [52]:
train_mae = mae(Y_train, training_prediction)
train_mse = mse(Y_train, training_prediction)
train_r2 = r2(Y_train, training_prediction)

test_mae = mae(Y_test, testing_prediction)
test_mse = mse(Y_test, testing_prediction)
test_r2 = r2(Y_test, testing_prediction)

In [55]:
print(f"\n TRAIN MSE: {train_mse}")
print(f"\n TRAIN MAE: {train_mae}")
print(f"\n TRAIN R2: {train_r2}")

print("\n---------------*----------------")
print(f"\n TEST MSE: {test_mse}")
print(f"\n TEST MAE: {test_mae}")
print(f"\n TEST R2: {test_r2}")


 TRAIN MSE: 22.95369357611288

 TRAIN MAE: 3.384457539916795

 TRAIN R2: 0.7285831776605591

---------------*----------------

 TEST MSE: 18.495420122448383

 TEST MAE: 3.113043746893426

 TEST R2: 0.7789207451814419
