### Import Libraries

In [28]:
from linearregressionmodel import *
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

read data 

In [29]:
data = pd.read_csv('house_price_regression_dataset.csv')
data

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,2.623829e+05
1,4272,3,3,2016,4.753014,1,6,9.852609e+05
2,3592,1,2,2016,3.634823,0,9,7.779774e+05
3,966,1,2,1977,2.730667,1,8,2.296989e+05
4,4926,2,1,1993,4.699073,0,8,1.041741e+06
...,...,...,...,...,...,...,...,...
995,3261,4,1,1978,2.165110,2,10,7.014940e+05
996,3179,1,2,1999,2.977123,1,10,6.837232e+05
997,2606,4,2,1962,4.055067,0,2,5.720240e+05
998,4723,5,2,1950,1.930921,0,7,9.648653e+05


In [30]:
x = data.drop(columns=['House_Price'])
y = data['House_Price']
x, y

(     Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
 0              1360             2              1        1981  0.599637   
 1              4272             3              3        2016  4.753014   
 2              3592             1              2        2016  3.634823   
 3               966             1              2        1977  2.730667   
 4              4926             2              1        1993  4.699073   
 ..              ...           ...            ...         ...       ...   
 995            3261             4              1        1978  2.165110   
 996            3179             1              2        1999  2.977123   
 997            2606             4              2        1962  4.055067   
 998            4723             5              2        1950  1.930921   
 999            3268             4              2        1983  3.108790   
 
      Garage_Size  Neighborhood_Quality  
 0              0                     5  
 1            

### Split the data to training set and test set

In [31]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train, y_train

(     Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
 817             977             5              1        1956  1.795241   
 263            1639             2              2        2008  2.281827   
 365            2570             5              3        1962  4.522933   
 358            3559             2              1        2003  3.217329   
 367            3751             5              3        1973  3.782587   
 ..              ...           ...            ...         ...       ...   
 745            2417             4              1        2003  3.915891   
 432            4452             1              1        1975  3.431821   
 902             917             4              3        1953  0.561353   
 628            4542             3              1        1996  3.695516   
 247            3620             5              3        2011  4.892841   
 
      Garage_Size  Neighborhood_Quality  
 817            2                     1  
 263          

### Normalization

In [32]:
x_train = (x_train - np.min(x_train))/(np.max(x_train) - np.min(x_train))
x_train

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality
817,0.195439,0.0010,0.0002,0.391278,0.000359,0.0004,0.0002
263,0.327866,0.0004,0.0004,0.401680,0.000456,0.0002,0.0018
365,0.514103,0.0010,0.0006,0.392478,0.000905,0.0004,0.0020
358,0.711942,0.0004,0.0002,0.400680,0.000644,0.0004,0.0016
367,0.750350,0.0010,0.0006,0.394679,0.000757,0.0004,0.0002
...,...,...,...,...,...,...,...
745,0.483497,0.0008,0.0002,0.400680,0.000783,0.0004,0.0016
432,0.890578,0.0002,0.0002,0.395079,0.000687,0.0000,0.0006
902,0.183437,0.0008,0.0006,0.390678,0.000112,0.0000,0.0012
628,0.908582,0.0006,0.0002,0.399280,0.000739,0.0000,0.0002


In [33]:
y_train = (y_train - np.min(y_train))/(np.max(y_train) - np.min(y_train))
y_train

817    0.101110
263    0.282977
365    0.494666
358    0.648835
367    0.729843
         ...   
745    0.469202
432    0.805279
902    0.078752
628    0.869904
247    0.751795
Name: House_Price, Length: 700, dtype: float64

In [34]:
x_test = (x_test - np.min(x_test))/(np.max(x_test) - np.min(x_test))
y_test = (y_test - np.min(y_test))/(np.max(y_test) - np.min(y_test))

### Call the linear regression model

In [35]:
model = Linear_Regression(lr = 0.005, epochs=1000)
model.fit(x_train, y_train)

weights and intercept

In [36]:
print('weights :')
print(model.w)
print("-------------------------------------")
print('intercept:')
print(model.b)

weights :
[[1.00692146]
 [0.24956992]
 [0.37909691]
 [0.02741109]
 [0.53639199]
 [0.80462818]
 [0.99074453]]
-------------------------------------
intercept:
-0.06545104062664389


### Train Score

In [37]:
y_pred = model.predict(x_train)
print("r2 score train :", r2_score(y_train, y_pred))
y_pred = y_pred.to_numpy().reshape(y_train.shape)
print(y_pred.shape)

r2 score train : 0.9834010481018142
(700,)


### Compare predicted values and true values in training set

In [38]:
dif = pd.DataFrame({'y_pred' : y_pred, 
                    'y_train' : y_train,
                    'dif' : y_train-y_pred})
dif

Unnamed: 0,y_pred,y_train,dif
817,0.143104,0.101110,-0.041994
263,0.278135,0.282977,0.004841
365,0.466235,0.494666,0.028431
358,0.664830,0.648835,-0.015996
367,0.702314,0.729843,0.027529
...,...,...,...
745,0.434978,0.469202,0.034223
432,0.843209,0.805279,-0.037930
902,0.131641,0.078752,-0.052889
628,0.861184,0.869904,0.008719


### Training Loss

In [39]:
train_loss = model.MSE(y_train, y_pred)
train_loss

0.020502044634573474

### Test score

In [40]:
y_pred = model.predict(x_test)
print("r2 score test :", r2_score(y_test, y_pred))
y_pred = y_pred.to_numpy().reshape(y_test.shape)
print(y_pred.shape)

r2 score test : 0.9800722789305099
(300,)


### Compare predicted values and true values in test set

In [41]:
dif = pd.DataFrame({'y_pred' : y_pred, 
                    'y_test' : y_test,
                    'dif' : y_test-y_pred})
dif

Unnamed: 0,y_pred,y_test,dif
627,0.819690,0.840235,0.020545
96,0.149551,0.198546,0.048995
238,0.390494,0.391774,0.001280
309,0.164678,0.159908,-0.004770
210,0.339655,0.331755,-0.007900
...,...,...,...
175,0.180543,0.140650,-0.039893
597,0.660063,0.668477,0.008414
863,0.599494,0.632263,0.032769
891,0.316869,0.325800,0.008930


### Test Loss

In [42]:
test_loss = model.MSE(y_test, y_pred)
test_loss

0.0018861823484336497

### Use SKLearn to compare r2 score

In [43]:
linreg = LinearRegression()
linreg.fit(x_train, y_train)
y_pred_test = linreg.predict(x_test)
y_pred_train = linreg.predict(x_train)
print("r2 score train :", r2_score(y_train, y_pred_train))
print("-------------------------------------")
print("r2 score test :", r2_score(y_test, y_pred_test))
print("-------------------------------------")

r2 score train : 0.9986336524007168
-------------------------------------
r2 score test : 0.99802095245341
-------------------------------------
