In [38]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [39]:
train_path = "/kaggle/input/regression-model-dataset/train (2).csv"

train_data = pd.read_csv(train_path)

In [40]:
test_path = "/kaggle/input/regression-model-dataset/test (1).csv"

test_data = pd.read_csv(test_path)

In [41]:
train_data.dtypes

OverallQual         int64
YearBuilt           int64
YearRemodAdd        int64
TotalBsmtSF         int64
1stFlrSF            int64
GrLivArea           int64
FullBath            int64
TotRmsAbvGrd        int64
GarageCars          int64
GarageArea          int64
SalePrice           int64
ExterQual_TA        int64
Foundation_PConc    int64
KitchenQual_TA      int64
dtype: object

In [42]:
train_data.head()

Unnamed: 0,OverallQual,YearBuilt,YearRemodAdd,TotalBsmtSF,1stFlrSF,GrLivArea,FullBath,TotRmsAbvGrd,GarageCars,GarageArea,SalePrice,ExterQual_TA,Foundation_PConc,KitchenQual_TA
0,6,1969,1969,663,663,1352,1,7,1,299,158000,1,0,1
1,6,1920,1950,1012,1012,1012,1,6,1,308,118400,1,0,1
2,5,1910,2006,1022,1022,1022,1,4,1,280,85000,1,0,1
3,5,1973,1973,1656,1656,1656,2,8,2,506,135000,1,0,1
4,6,1978,1978,918,918,1683,2,7,2,440,172500,1,0,1


In [43]:
train_X = train_data.drop("SalePrice", axis=1)
train_y = train_data.SalePrice  # train.loc[:, "SalePrice"]

In [44]:
model = LinearRegression()
model.fit(train_X, train_y)

In [45]:
test_X = test_data.drop("SalePrice", axis=1)
test_y = test_data.SalePrice

In [46]:
predictions = model.predict(test_X)

In [47]:
comparison = pd.DataFrame({"Actual Values": test_y, "Predictions": predictions})

In [48]:
comparison.head()

Unnamed: 0,Actual Values,Predictions
0,118500,83380.944694
1,154900,105974.149765
2,133000,139238.138343
3,115000,104982.049557
4,154500,140473.360146


In [49]:
comparison.tail()

Unnamed: 0,Actual Values,Predictions
324,132250,102816.796295
325,123000,121698.649065
326,316600,271745.844407
327,142000,131258.275591
328,250000,263005.372419


In [57]:
rmse = np.sqrt(mean_squared_error(test_y, predictions))

In [59]:
print("Root Mean Squared Error : ", rmse)

Root Mean Squared Error :  33186.38417236769


In [65]:
correlations = train_data.corr()
correlations

Unnamed: 0,OverallQual,YearBuilt,YearRemodAdd,TotalBsmtSF,1stFlrSF,GrLivArea,FullBath,TotRmsAbvGrd,GarageCars,GarageArea,SalePrice,ExterQual_TA,Foundation_PConc,KitchenQual_TA
OverallQual,1.0,0.572367,0.550407,0.557685,0.539527,0.62889,0.598265,0.482744,0.627897,0.579378,0.792263,-0.692146,0.593079,-0.579892
YearBuilt,0.572367,1.0,0.615451,0.418706,0.315715,0.205311,0.496001,0.122193,0.530869,0.466243,0.503317,-0.6086,0.675289,-0.478635
YearRemodAdd,0.550407,0.615451,1.0,0.305751,0.299912,0.300983,0.500358,0.189233,0.507051,0.459938,0.504414,-0.58621,0.608433,-0.621112
TotalBsmtSF,0.557685,0.418706,0.305751,1.0,0.912271,0.51743,0.370448,0.337671,0.476327,0.539858,0.612205,-0.414837,0.330111,-0.353424
1stFlrSF,0.539527,0.315715,0.299912,0.912271,1.0,0.589766,0.392271,0.416777,0.472616,0.531808,0.621057,-0.355415,0.262008,-0.315156
GrLivArea,0.62889,0.205311,0.300983,0.51743,0.589766,1.0,0.624707,0.826999,0.492914,0.4998,0.712054,-0.427637,0.34034,-0.384288
FullBath,0.598265,0.496001,0.500358,0.370448,0.392271,0.624707,1.0,0.550967,0.528268,0.465081,0.597505,-0.516471,0.519781,-0.474227
TotRmsAbvGrd,0.482744,0.122193,0.189233,0.337671,0.416777,0.826999,0.550967,1.0,0.426842,0.389448,0.573845,-0.307535,0.2559,-0.251362
GarageCars,0.627897,0.530869,0.507051,0.476327,0.472616,0.492914,0.528268,0.426842,1.0,0.845512,0.658355,-0.543945,0.517289,-0.465095
GarageArea,0.579378,0.466243,0.459938,0.539858,0.531808,0.4998,0.465081,0.389448,0.845512,1.0,0.621354,-0.511492,0.451725,-0.455758


In [68]:
sales_price_corr = correlations["SalePrice"]

In [69]:
sales_price_corr

OverallQual         0.792263
YearBuilt           0.503317
YearRemodAdd        0.504414
TotalBsmtSF         0.612205
1stFlrSF            0.621057
GrLivArea           0.712054
FullBath            0.597505
TotRmsAbvGrd        0.573845
GarageCars          0.658355
GarageArea          0.621354
SalePrice           1.000000
ExterQual_TA       -0.598202
Foundation_PConc    0.517222
KitchenQual_TA     -0.527176
Name: SalePrice, dtype: float64

In [67]:
sales_price_corr.sort_values(ascending=False).head(10)

SalePrice           1.000000
OverallQual         0.792263
GrLivArea           0.712054
GarageCars          0.658355
GarageArea          0.621354
1stFlrSF            0.621057
TotalBsmtSF         0.612205
FullBath            0.597505
TotRmsAbvGrd        0.573845
Foundation_PConc    0.517222
Name: SalePrice, dtype: float64