### Import Libraries

In [43]:
from linearregressionmodel import *
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

read data 

In [44]:
data = pd.read_csv('house_price_regression_dataset.csv')
data

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,2.623829e+05
1,4272,3,3,2016,4.753014,1,6,9.852609e+05
2,3592,1,2,2016,3.634823,0,9,7.779774e+05
3,966,1,2,1977,2.730667,1,8,2.296989e+05
4,4926,2,1,1993,4.699073,0,8,1.041741e+06
...,...,...,...,...,...,...,...,...
995,3261,4,1,1978,2.165110,2,10,7.014940e+05
996,3179,1,2,1999,2.977123,1,10,6.837232e+05
997,2606,4,2,1962,4.055067,0,2,5.720240e+05
998,4723,5,2,1950,1.930921,0,7,9.648653e+05


In [45]:
x = data.drop(columns=['House_Price'])
y = data['House_Price']
x, y

(     Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
 0              1360             2              1        1981  0.599637   
 1              4272             3              3        2016  4.753014   
 2              3592             1              2        2016  3.634823   
 3               966             1              2        1977  2.730667   
 4              4926             2              1        1993  4.699073   
 ..              ...           ...            ...         ...       ...   
 995            3261             4              1        1978  2.165110   
 996            3179             1              2        1999  2.977123   
 997            2606             4              2        1962  4.055067   
 998            4723             5              2        1950  1.930921   
 999            3268             4              2        1983  3.108790   
 
      Garage_Size  Neighborhood_Quality  
 0              0                     5  
 1            

### Split the data to training set and test set

In [46]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train, y_train

(     Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
 453            3116             4              3        2001  1.830445   
 520            1750             5              2        1976  2.255541   
 126            1806             1              2        2001  2.576853   
 278            4514             4              3        1993  1.247098   
 514            2352             3              2        1991  3.569689   
 ..              ...           ...            ...         ...       ...   
 508            3940             2              1        1958  3.217086   
 75              564             5              3        2018  2.844829   
 624            4800             1              1        1974  4.582620   
 309            1074             4              2        1982  2.487859   
 167            4082             3              3        1973  0.861000   
 
      Garage_Size  Neighborhood_Quality  
 453            1                     3  
 520          

### Standardization

In [47]:
x_train = (x_train - np.min(x_train))/(np.max(x_train) - np.min(x_train))
x_train

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality
453,0.623325,0.0008,0.0006,0.400280,0.000366,0.0002,0.0006
520,0.350070,0.0010,0.0004,0.395279,0.000451,0.0000,0.0002
126,0.361272,0.0002,0.0004,0.400280,0.000515,0.0000,0.0004
278,0.902981,0.0008,0.0006,0.398680,0.000249,0.0002,0.0006
514,0.470494,0.0006,0.0004,0.398280,0.000714,0.0004,0.0002
...,...,...,...,...,...,...,...
508,0.788158,0.0004,0.0002,0.391678,0.000644,0.0000,0.0020
75,0.112823,0.0010,0.0006,0.403681,0.000569,0.0004,0.0008
624,0.960192,0.0002,0.0002,0.394879,0.000917,0.0004,0.0002
309,0.214843,0.0008,0.0004,0.396479,0.000498,0.0002,0.0018


In [48]:
y_train = (y_train - np.min(y_train))/(np.max(y_train) - np.min(y_train))
y_train

453    0.586586
520    0.299335
126    0.309626
278    0.846401
514    0.416738
         ...   
508    0.695636
75     0.131685
624    0.909483
309    0.157970
167    0.748115
Name: House_Price, Length: 700, dtype: float64

In [49]:
x_test = (x_test - np.min(x_test))/(np.max(x_test) - np.min(x_test))
y_test = (y_test - np.min(y_test))/(np.max(y_test) - np.min(y_test))

### Call the linear regression model

In [50]:
model = Linear_Regression(lr = 0.005, epochs=5000)
model.fit(x_train, y_train)

weights and intercept

In [51]:
print('weights :')
print(model.w)
print("-------------------------------------")
print('intercept:')
print(model.b)

weights :
[[1.0029581 ]
 [0.75222822]
 [0.56211639]
 [0.00129803]
 [0.49913616]
 [0.03016327]
 [0.8787951 ]]
-------------------------------------
intercept:
-0.055176171460675265


### Train Score

In [52]:
y_pred = model.predict(x_train)
print("r2 score train :", r2_score(y_train, y_pred))
y_pred = y_pred.to_numpy().reshape(y_train.shape)
print(y_pred.shape)

r2 score train : 0.9834488490814541
(700,)


### Compare predicted values and true values in training set

In [53]:
dif = pd.DataFrame({'y_pred' : y_pred, 
                    'y_train' : y_train,
                    'dif' : y_train-y_pred})
dif

Unnamed: 0,y_pred,y_train,dif
453,0.572167,0.586586,0.014418
520,0.297821,0.299335,0.001514
126,0.308669,0.309626,0.000958
278,0.852590,0.846401,-0.006189
514,0.418447,0.416738,-0.001710
...,...,...,...
508,0.738314,0.695636,-0.042678
75,0.060593,0.131685,0.071092
624,0.909277,0.909483,0.000206
309,0.163480,0.157970,-0.005510


### Training Loss

In [54]:
train_loss = model.MSE(y_train, y_pred)
train_loss

0.0015517241607670952

### Test score

In [55]:
y_pred = model.predict(x_test)
print("r2 score test :", r2_score(y_test, y_pred))
y_pred = y_pred.to_numpy().reshape(y_test.shape)
print(y_pred.shape)

r2 score test : 0.9781437254330583
(300,)


### Compare predicted values and true values in test set

In [56]:
dif = pd.DataFrame({'y_pred' : y_pred, 
                    'y_test' : y_test,
                    'dif' : y_test-y_pred})
dif

Unnamed: 0,y_pred,y_test,dif
407,0.844733,0.876297,0.031564
182,0.396454,0.389164,-0.007290
301,0.242158,0.219334,-0.022824
378,0.759626,0.751250,-0.008376
468,0.585004,0.575181,-0.009823
...,...,...,...
945,0.717890,0.693382,-0.024508
949,0.740322,0.739464,-0.000859
239,0.737556,0.703110,-0.034446
236,0.739421,0.665790,-0.073631


### Test Loss

In [57]:
test_loss = model.MSE(y_test, y_pred)
test_loss

0.06084475446849316

### Use SKLearn to compare r2 score

In [59]:
linreg = LinearRegression()
linreg.fit(x_train, y_train)
y_pred_test = linreg.predict(x_test)
y_pred_train = linreg.predict(x_train)
print("r2 score train :", r2_score(y_train, y_pred_train))
print("-------------------------------------")
print("r2 score test :", r2_score(y_test, y_pred_test))
print("-------------------------------------")

r2 score train : 0.9984863367527426
-------------------------------------
r2 score test : 0.9972035550800299
-------------------------------------
