### House Price Prediction

##### Importing the necessary libraries

In [4]:
# importing the necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import linear_model, metrics

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

import os

# hide warnings
import warnings
warnings.filterwarnings('ignore')

##### Data Understanding

In [5]:
# Reading the dataset
house_price = pd.read_csv("train.csv")
house_price.shape

(1460, 81)

In [6]:
# Taking a high level look at the data set
house_price.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [7]:
# Setting the option to view all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns

In [8]:
# Understanding a bit more on various columns in the data set
house_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [9]:
# Extract columns names with different data types
int_columns = [col for col in house_price.columns if house_price[col].dtype == 'int64']
float_columns = [col for col in house_price.columns if house_price[col].dtype == 'float64']
object_columns = [col for col in house_price.columns if house_price[col].dtype == 'object']

In [10]:
print(len(int_columns))
print(len(float_columns))
print(len(object_columns))

35
3
43


In [11]:
house_price.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [12]:
house_price.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

##### Checking to see how many null values are there in different columns

In [13]:
# Checked to see whether there are rows that have all NAs
rows_with_all_na = house_price.index[house_price.isna().all(axis=1)].tolist()
rows_with_all_na

[]

In [14]:
# Taking another look to understand the null values and also look at other details 
house_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [15]:
# Taking another look to see just the null values in different columns 
house_price.columns[house_price.isnull().any()]

Index(['LotFrontage', 'Alley', 'MasVnrType', 'MasVnrArea', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
       'Electrical', 'FireplaceQu', 'GarageType', 'GarageYrBlt',
       'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence',
       'MiscFeature'],
      dtype='object')

In [16]:
len(house_price.columns[house_price.isnull().any()])

19

In [17]:
len(object_columns)

43

##### Handling NAs in the columns of object data type

In [18]:
# There are many columns (19 of them) which have NAs. 
# Out of the 19 columns, 16 are of the object datatype and 3 are of float datatype; Let us handle columns with object data type first
columns_to_replace_nan = [
    'Alley', 'MasVnrType', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
    'Electrical', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
    'GarageCond', 'PoolQC', 'Fence', 'MiscFeature'
]

len(columns_to_replace_nan)

16

In [19]:
# Handling null values in the 16 columns of object data type
# Replacing all the null values in the 16 columns with the string 'Missing'

for column in columns_to_replace_nan:
    house_price[column].fillna('Missing', inplace=True)

In [20]:
# Double checking to ensure whether all the null values in the 16 columns of the object data type are adressed properly 
house_price.columns[house_price.isnull().any()]

Index(['LotFrontage', 'MasVnrArea', 'GarageYrBlt'], dtype='object')

##### Handling NAs in the columns with float data type

In [21]:
# Handling NAs in the column "MasVnrArea"

# There were about 861 entries with a value of 0 for "MasVnrArea" and about 8 NA entries; Converting all NA entries to 0
house_price['MasVnrArea'].fillna(0, inplace=True)

In [22]:
house_price.columns[house_price.isnull().any()]

Index(['LotFrontage', 'GarageYrBlt'], dtype='object')

In [23]:
# The column "GarageYrBlt" had null values for about 81 entries. 
# Replacing all the null values (81) for the column "GarageYrBlt" with values in the column "YearBuilt"
missing_garage_yrblt = house_price['GarageYrBlt'].isnull()
house_price.loc[missing_garage_yrblt, 'GarageYrBlt'] = house_price.loc[missing_garage_yrblt, 'YearBuilt']

In [24]:
# Double check to see whether all the null values (81) for the column "GarageYrBlt" have been replaced
house_price['GarageYrBlt'].isna().sum()

0

In [25]:
house_price.columns[house_price.isnull().any()]

Index(['LotFrontage'], dtype='object')

In [26]:
# Understanding the column 'LotFrontage'
house_price['LotFrontage'].describe()

count    1201.000000
mean       70.049958
std        24.284752
min        21.000000
25%        59.000000
50%        69.000000
75%        80.000000
max       313.000000
Name: LotFrontage, dtype: float64

In [27]:
# For the column 'LotFrontage', there are about 259 missing values 
# Using the median value for those 259 missing values 
house_price['LotFrontage'].fillna(house_price['LotFrontage'].median(), inplace=True)

In [28]:
house_price.columns[house_price.isnull().any()]

Index([], dtype='object')

##### We see that all the null values in various columns have been handled. We are now good to proceed with the next steps.

##### Label Encoding for all columns of object data type

In [29]:
#  Creating a list of columns having the object data type
object1_columns = [col for col in house_price.columns if house_price[col].dtype == 'object']
object1_columns

['MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'SaleType',
 'SaleCondition']

In [30]:
len(object1_columns)

43

In [31]:
# Perform label encoding for all columns of object data type
import pandas as pd
from sklearn.preprocessing import LabelEncoder

for column in object_columns:
    label_encoder = LabelEncoder()
    house_price[column] = label_encoder.fit_transform(house_price[column])

In [32]:
house_price.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,3,65.0,8450,1,1,3,3,0,4,0,5,2,2,0,5,7,5,2003,2003,1,1,12,13,1,196.0,2,4,2,2,4,4,2,706,6,0,150,856,1,0,1,5,856,854,0,1710,1,0,2,1,3,1,2,8,6,0,3,1,2003.0,2,2,548,5,5,2,0,61,0,0,0,0,3,2,1,0,2,2008,8,4,208500
1,2,20,3,80.0,9600,1,1,3,3,0,2,0,24,1,2,0,2,6,8,1976,1976,1,1,8,8,2,0.0,3,4,1,2,4,1,0,978,6,0,284,1262,1,0,1,5,1262,0,0,1262,0,1,2,0,3,1,3,6,6,1,5,1,1976.0,2,2,460,5,5,2,298,0,0,0,0,0,3,2,1,0,5,2007,8,4,181500
2,3,60,3,68.0,11250,1,1,0,3,0,4,0,5,2,2,0,5,7,5,2001,2002,1,1,12,13,1,162.0,2,4,2,2,4,3,2,486,6,0,434,920,1,0,1,5,920,866,0,1786,1,0,2,1,3,1,2,6,6,1,5,1,2001.0,2,2,608,5,5,2,0,42,0,0,0,0,3,2,1,0,9,2008,8,4,223500
3,4,70,3,60.0,9550,1,1,0,3,0,0,0,6,2,2,0,5,7,5,1915,1970,1,1,13,15,2,0.0,3,4,0,4,1,4,0,216,6,0,540,756,1,2,1,5,961,756,0,1717,1,0,1,0,3,1,2,7,6,1,2,5,1998.0,3,3,642,5,5,2,0,35,272,0,0,0,3,2,1,0,2,2006,8,0,140000
4,5,60,3,84.0,14260,1,1,0,3,0,2,0,15,2,2,0,5,8,5,2000,2000,1,1,12,13,1,350.0,2,4,2,2,4,0,2,655,6,0,490,1145,1,0,1,5,1145,1053,0,2198,1,0,2,1,4,1,2,9,6,1,5,1,2000.0,2,3,836,5,5,2,192,84,0,0,0,0,3,2,1,0,12,2008,8,4,250000


##### Preprocessing before model creation

In [33]:
house_price.shape

(1460, 81)

In [34]:
# Dropping the 'Id' column
house_price = house_price.drop(['Id'], axis=1)
house_price.shape

(1460, 80)

In [35]:
# Looking at the correlation matrix
cor = house_price.corr()
cor

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
MSSubClass,1.0,0.0359,-0.356718,-0.139781,-0.024969,0.09943,0.119289,-0.00294,-0.022844,0.07591,-0.025672,-0.005985,-0.024762,-0.042395,0.746063,0.397161,0.032628,-0.059316,0.02785,0.040581,-0.117817,-0.031336,-0.089159,-0.137229,-0.02985,0.023573,-0.017161,-0.003254,0.058126,-0.081702,-0.017905,-0.066799,0.006437,-0.069836,0.037025,-0.065649,-0.140759,-0.238518,0.048009,0.02076,-0.101774,0.050054,-0.251758,0.307886,0.046474,0.074853,0.003491,-0.002333,0.131608,0.177354,-0.023438,0.281721,0.010129,0.04038,0.003711,-0.045569,0.0502,0.133138,0.039567,-0.028806,-0.04011,-0.098672,-0.038571,-0.045895,-0.059925,-0.012579,-0.0061,-0.012037,-0.043825,-0.02603,0.008283,-0.005221,-0.02551,-0.046881,-0.007683,-0.013585,-0.021407,0.012464,-0.02494,-0.084284
MSZoning,0.0359,1.0,-0.105961,-0.034452,0.087654,-0.368086,0.061887,-0.017854,-0.001192,-0.009895,-0.022055,-0.249679,-0.027874,0.044606,0.00569,-0.105315,-0.160099,0.186951,-0.308908,-0.174728,-0.000301,0.005133,-0.008558,0.006963,-0.028279,-0.06283,0.200536,-0.096041,-0.235174,0.160778,-0.01145,0.029663,0.026347,-0.049128,-0.034004,0.028086,-0.046749,-0.087834,0.056866,0.134279,-0.049523,-0.07185,-0.055614,-0.051481,0.01152,-0.082615,-0.018526,0.007193,-0.19829,-0.133876,-0.016471,0.049434,0.128976,-0.0432,-0.095722,-0.011349,-0.002491,0.141142,-0.258899,0.117559,-0.157042,-0.18925,-0.162998,-0.102744,-0.100366,-0.009477,-0.154704,0.115509,0.000362,0.019089,-0.003128,0.002882,0.01271,-0.005553,0.009293,-0.031496,-0.020628,0.097437,0.009494,-0.166872
LotFrontage,-0.356718,-0.105961,1.0,0.304522,-0.037349,-0.065084,-0.138877,-0.07608,-0.001027,-0.177821,0.065603,0.084901,-0.009341,0.002599,-0.408013,0.030567,0.234812,-0.053281,0.116685,0.083348,0.153999,0.081277,0.075455,0.099515,-0.017693,0.178469,-0.18412,0.0483,0.098418,-0.140529,0.055032,-0.122454,-0.021146,0.214367,-0.016458,0.042463,0.124098,0.363472,-0.020325,-0.081081,0.06735,0.059966,0.413773,0.072388,0.037469,0.368007,0.090343,-0.006979,0.180534,0.047222,0.23684,-0.004905,-0.190232,0.320518,0.030114,0.233221,-0.019043,-0.253812,0.088433,-0.154697,0.269539,0.323511,0.09325,0.090704,0.088121,0.075542,0.137014,0.010287,0.061945,0.037655,0.180819,-0.191902,-0.023818,0.008677,-0.000255,0.010451,0.00638,-0.031412,0.058857,0.334771
LotArea,-0.139781,-0.034452,0.304522,1.0,-0.197131,-0.029676,-0.165315,-0.149083,0.010123,-0.121161,0.436868,0.044569,0.023846,0.022164,-0.205721,-0.03319,0.105806,-0.005636,0.014228,0.013788,0.077054,0.149837,0.004256,-0.00393,-0.008122,0.103321,-0.057275,0.01516,-0.011081,-0.061495,0.016715,-0.15116,-0.057081,0.214103,-0.056425,0.11117,-0.002618,0.260833,0.028158,0.004212,0.049755,0.04446,0.299475,0.050986,0.004779,0.263116,0.158155,0.048046,0.126031,0.014259,0.11969,-0.017784,-0.057018,0.190015,-0.022317,0.271364,0.005323,-0.150162,0.003669,-0.087669,0.154871,0.180403,0.053079,0.065741,0.015134,0.171698,0.084774,-0.01834,0.020423,0.04316,0.077672,-0.065167,-0.022092,0.106135,0.038068,0.001205,-0.014261,0.012292,0.034169,0.263843
Street,-0.024969,0.087654,-0.037349,-0.197131,1.0,-0.001587,-0.010224,0.115995,0.001682,0.01396,-0.17936,-0.011561,-0.071657,0.002039,-0.018243,0.023704,0.058823,0.042848,0.021137,0.065465,-0.019732,0.008081,0.002505,0.006166,0.010007,0.017108,0.049976,0.005874,0.035277,-0.03059,-0.020903,0.075439,-0.014746,-0.015643,0.058167,-0.038487,0.035229,0.005068,0.007904,-0.053995,0.069869,0.020759,0.00595,0.046983,0.007724,0.044121,-0.050524,0.015485,0.046471,0.027628,0.028865,0.013583,-0.025307,0.046828,-0.016444,-0.005348,-0.004348,-0.017098,0.013046,0.002339,-0.020025,-0.047794,0.003559,0.007713,0.024521,-0.01799,-0.005664,0.023082,0.007473,-0.03316,0.004413,-0.004067,0.000531,-0.160748,-0.022733,0.00369,-0.025043,0.014339,0.006064,0.041036
Alley,0.09943,-0.368086,-0.065084,-0.029676,-0.001587,1.0,-0.04192,0.023263,0.000647,-0.008167,0.015513,0.038922,0.073596,0.000784,0.161375,0.052204,0.116905,-0.09824,0.223182,0.130684,0.01214,0.003107,-0.008051,-0.023992,-0.014029,0.081722,-0.111324,0.06228,0.165573,-0.086908,0.043999,-0.021396,-0.044687,0.038292,0.025794,-0.015963,0.022355,0.056456,-0.043471,-0.064949,0.104749,0.079266,0.023936,0.004484,0.036846,0.024743,0.025543,-0.028541,0.095105,0.073401,-0.057889,-0.032155,-0.042799,-0.052932,0.018898,0.002202,0.001855,-0.060536,0.18216,-0.064758,0.065771,0.056943,0.123545,0.126092,0.09788,-0.015284,0.037247,-0.065148,0.002873,0.003621,0.001697,-0.001564,-0.02819,-0.054428,-0.013881,-0.011274,0.015176,0.001092,-0.000162,0.083121
LotShape,0.119289,0.061887,-0.138877,-0.165315,-0.010224,-0.04192,1.0,0.085434,-0.036101,0.221102,-0.099951,-0.038894,-0.115003,-0.043768,0.116262,-0.104026,-0.190497,0.013693,-0.226062,-0.153825,0.003182,-0.071174,-0.020463,-0.027951,0.007946,-0.106178,0.148818,-0.029497,-0.135124,0.176076,-0.033266,0.128489,0.086757,-0.134033,0.008225,-0.039738,-0.021666,-0.175782,0.075894,0.096248,-0.115256,-0.098822,-0.172613,-0.060205,0.020626,-0.175093,-0.076178,-0.026824,-0.15939,-0.111072,-0.050214,0.09252,0.122182,-0.11925,-0.029321,-0.194285,-0.062338,0.196949,-0.193256,0.189653,-0.183906,-0.160639,-0.118417,-0.096292,-0.113698,-0.166946,-0.075412,0.078213,-0.036459,-0.053054,-0.020051,0.018156,0.003692,-0.00809,-0.042061,-0.033455,0.036449,-0.000911,-0.038118,-0.25558
LandContour,-0.00294,-0.017854,-0.07608,-0.149083,0.115995,0.023263,0.085434,1.0,0.008238,-0.025527,-0.374267,0.019116,0.024801,-0.016185,0.051143,0.075234,0.028907,-0.045271,0.161622,0.093374,-0.004246,-0.020229,-0.011809,-0.034082,-0.076066,0.048274,-0.003613,0.009804,0.053478,-0.020063,0.018977,0.050187,-0.076248,-0.018097,-0.030834,0.008595,0.020694,0.005189,0.015746,-0.066276,0.10541,0.084315,-0.042103,-0.034245,-0.098352,-0.068523,0.009773,0.019061,0.050363,0.029727,-0.040851,-0.038959,0.030813,-0.053458,0.036113,-0.052239,0.051842,-0.109807,0.144358,-0.039882,0.045769,0.047626,0.041857,0.043414,0.140921,-0.002879,0.040676,-0.058742,-0.021404,0.003836,-0.013098,0.000731,-0.024112,0.011668,0.020912,-0.011599,0.020507,-0.025754,0.033809,0.015453
Utilities,-0.022844,-0.001192,-0.001027,0.010123,0.001682,0.000647,-0.036101,0.008238,1.0,-0.032589,-0.005909,0.046809,-0.00095,-0.000831,-0.010778,0.054283,-0.001881,0.009994,-0.011505,-0.03408,-0.012868,-0.003293,-0.029686,-0.0321,-0.032501,0.063452,0.017369,0.009535,-0.014377,0.026122,0.008519,0.017056,-0.013377,-0.0191,-0.012801,0.049913,-0.012639,-0.014233,-0.003221,0.00695,0.006907,-0.09059,0.012287,-0.020818,-0.003148,-0.008545,-0.021467,0.10338,-0.026862,-0.019939,0.004288,-0.005536,-0.010717,0.007769,0.006702,0.015721,-0.024398,-0.006579,-0.018425,0.00271,0.008161,0.006372,0.008263,0.007732,0.007586,-0.019692,0.028199,-0.009407,-0.003046,0.102365,-0.001798,0.001657,-0.000216,-0.004869,-0.002296,-0.051552,0.023353,-0.12677,-0.089701,-0.014314
LotConfig,0.07591,-0.009895,-0.177821,-0.121161,0.01396,-0.008167,0.221102,-0.025527,-0.032589,1.0,-0.007256,-0.036597,0.021457,0.033868,0.107229,-0.032945,-0.031086,-0.030788,0.013629,-0.005161,-0.010364,-0.068465,0.023316,0.005546,0.001826,-0.031639,-0.002503,0.034898,-0.011755,0.01477,0.030091,0.009063,0.014221,-0.023804,-0.004726,-0.010357,-0.011411,-0.040049,-2.4e-05,-0.010217,-0.003729,-0.026923,-0.062414,-0.051556,-0.004992,-0.089208,-0.012951,-0.009927,-0.007531,-0.025804,-0.056574,-0.0025,-0.010437,-0.049246,-0.021119,-0.061043,-0.031279,0.030678,-0.013668,0.015048,-0.066967,-0.064023,0.003261,0.007176,-0.045058,-0.035635,-0.054614,-0.070429,-0.030479,-0.004657,-0.046798,0.054786,0.046504,-0.015821,-0.018427,0.018902,-0.005992,0.014325,0.051579,-0.067396


In [36]:
# Taking a look at the columns to check the data type and other related information
house_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   int32  
 2   LotFrontage    1460 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   int32  
 5   Alley          1460 non-null   int32  
 6   LotShape       1460 non-null   int32  
 7   LandContour    1460 non-null   int32  
 8   Utilities      1460 non-null   int32  
 9   LotConfig      1460 non-null   int32  
 10  LandSlope      1460 non-null   int32  
 11  Neighborhood   1460 non-null   int32  
 12  Condition1     1460 non-null   int32  
 13  Condition2     1460 non-null   int32  
 14  BldgType       1460 non-null   int32  
 15  HouseStyle     1460 non-null   int32  
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int64  
 18  YearBuil

In [37]:
house_price.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,3,65.0,8450,1,1,3,3,0,4,0,5,2,2,0,5,7,5,2003,2003,1,1,12,13,1,196.0,2,4,2,2,4,4,2,706,6,0,150,856,1,0,1,5,856,854,0,1710,1,0,2,1,3,1,2,8,6,0,3,1,2003.0,2,2,548,5,5,2,0,61,0,0,0,0,3,2,1,0,2,2008,8,4,208500
1,20,3,80.0,9600,1,1,3,3,0,2,0,24,1,2,0,2,6,8,1976,1976,1,1,8,8,2,0.0,3,4,1,2,4,1,0,978,6,0,284,1262,1,0,1,5,1262,0,0,1262,0,1,2,0,3,1,3,6,6,1,5,1,1976.0,2,2,460,5,5,2,298,0,0,0,0,0,3,2,1,0,5,2007,8,4,181500
2,60,3,68.0,11250,1,1,0,3,0,4,0,5,2,2,0,5,7,5,2001,2002,1,1,12,13,1,162.0,2,4,2,2,4,3,2,486,6,0,434,920,1,0,1,5,920,866,0,1786,1,0,2,1,3,1,2,6,6,1,5,1,2001.0,2,2,608,5,5,2,0,42,0,0,0,0,3,2,1,0,9,2008,8,4,223500
3,70,3,60.0,9550,1,1,0,3,0,0,0,6,2,2,0,5,7,5,1915,1970,1,1,13,15,2,0.0,3,4,0,4,1,4,0,216,6,0,540,756,1,2,1,5,961,756,0,1717,1,0,1,0,3,1,2,7,6,1,2,5,1998.0,3,3,642,5,5,2,0,35,272,0,0,0,3,2,1,0,2,2006,8,0,140000
4,60,3,84.0,14260,1,1,0,3,0,2,0,15,2,2,0,5,8,5,2000,2000,1,1,12,13,1,350.0,2,4,2,2,4,0,2,655,6,0,490,1145,1,0,1,5,1145,1053,0,2198,1,0,2,1,4,1,2,9,6,1,5,1,2000.0,2,3,836,5,5,2,192,84,0,0,0,0,3,2,1,0,12,2008,8,4,250000


##### Performing the train and test split

In [38]:
house_price.shape

(1460, 80)

In [39]:
# Performing the Train - Test Split
from sklearn.model_selection import train_test_split
house_price_train, house_price_test = train_test_split(house_price, train_size = 0.7, random_state = 100)
print(house_price_train.shape)
print(house_price_test.shape)

(1021, 80)
(439, 80)


In [40]:
house_price_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
318,60,3,90.0,9900,1,1,3,2,0,4,1,15,2,2,0,5,7,5,1993,1993,1,1,6,6,1,256.0,2,4,2,2,4,1,2,987,6,0,360,1347,1,0,1,5,1372,1274,0,2646,1,0,2,1,4,1,2,9,6,1,5,1,1993.0,2,3,656,5,5,2,340,60,144,0,0,0,3,2,1,0,4,2009,8,4,260000
239,50,3,52.0,8741,1,1,3,3,0,4,0,7,2,2,0,0,6,4,1945,1950,1,1,12,13,2,0.0,3,4,1,4,0,4,3,94,6,0,641,735,1,4,1,0,798,689,0,1487,0,0,1,1,3,1,3,7,6,1,2,5,1949.0,3,1,220,5,5,2,0,140,0,0,0,0,3,3,1,0,4,2010,8,4,113000
986,50,4,59.0,5310,1,1,3,3,0,0,0,17,1,2,0,0,6,8,1910,2003,3,1,12,13,2,0.0,3,2,1,4,0,4,6,0,6,0,485,485,1,2,1,5,1001,634,0,1635,0,0,1,0,2,1,2,5,6,0,3,1,1950.0,3,1,255,1,5,2,394,0,0,0,0,0,3,2,1,0,6,2006,8,4,117000
1416,190,4,60.0,11340,1,1,3,3,0,4,0,17,2,2,1,5,4,6,1885,1950,1,1,12,0,2,0.0,3,4,2,4,4,4,6,0,6,0,777,777,1,2,1,5,1246,1044,0,2290,0,0,2,0,4,2,3,11,6,0,3,5,1971.0,3,2,560,5,5,0,0,0,114,0,0,0,3,2,1,0,4,2010,8,4,122500
390,50,3,50.0,8405,1,0,3,3,0,4,0,7,2,2,0,0,5,8,1900,1950,1,1,8,8,2,0.0,3,4,0,4,1,4,5,241,1,391,229,861,1,0,1,5,961,406,0,1367,1,0,1,0,4,1,3,7,6,0,3,5,1978.0,3,1,384,5,5,2,0,130,112,0,0,0,3,3,1,0,4,2008,8,4,119000


##### Scaling the data for train set

In [41]:
# Scaling the data for the train set
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
house_price_train = scaler.fit_transform(house_price_train)


In [42]:
type(house_price_train)
house_price_train = pd.DataFrame(house_price_train)
house_price_train.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8,0.312595
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8,0.108457
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8,0.114012
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8,0.12165
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8,0.116789


##### Creating the model

In [43]:
house_price.columns

Index(['MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
       'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
       'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd',
       'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt',
       'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond',
       'PavedDrive', 'Wo

In [44]:
house_price_train.columns

RangeIndex(start=0, stop=80, step=1)

In [45]:
house_price_train.columns = house_price.columns

In [46]:
house_price_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8,0.312595
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8,0.108457
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8,0.114012
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8,0.12165
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8,0.116789


In [47]:
house_price_train.shape

(1021, 80)

In [48]:
# Creating y_train and X_train from the training data set
y_train = house_price_train.pop('SalePrice')
print(y_train.shape)

X_train = house_price_train
print(X_train.shape)

(1021,)
(1021, 79)


In [49]:
# Coming up with the model
reg = LinearRegression()
reg.fit(X_train, y_train)


In [50]:
reg.coef_

array([-2.42527351e-02, -8.50031495e-03, -4.24547741e-02,  9.55583137e-02,
        5.00428048e-02,  2.25409262e-03, -2.18418588e-03,  1.43026566e-02,
       -2.66039209e-02,  1.30524149e-03,  2.75048530e-02,  1.44119011e-02,
       -5.23659466e-03, -1.16632235e-01, -1.76866848e-02, -1.35946971e-02,
        1.41417536e-01,  5.84167659e-02,  3.86628516e-02,  7.02768434e-03,
        1.23584567e-02,  4.35048812e-02, -6.16027955e-03, -5.21529812e-03,
        4.97684309e-03,  7.52625996e-02, -3.41821376e-02,  1.01147578e-03,
        7.19538616e-03, -2.45062407e-02,  5.83754483e-03, -1.40959868e-02,
       -6.20239540e-03, -3.60747687e+11,  1.21688217e-02, -9.42136942e+10,
       -1.49310169e+11,  3.90533020e+11,  7.58425122e-03, -1.72118437e-03,
       -2.27157080e-03, -3.86879183e-03, -6.59848412e+11, -3.15413651e+11,
       -7.86624843e+10,  7.94872948e+11,  3.63622899e-02, -2.80415565e-03,
        5.82166326e-03,  2.69268469e-03, -2.34316341e-02, -3.27702252e-02,
       -3.38505310e-02,  

In [51]:
# To see the coefficients in the order of contribution to the model


# Get the column names from X_train
column_names = X_train.columns

# Get the coefficients from the regression model
coefficients = reg.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model1_lr = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model1_lr.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model1_lr

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,794872900000.0
37,TotalBsmtSF,390533000000.0
16,OverallQual,0.1414175
3,LotArea,0.09555831
25,MasVnrArea,0.0752626
17,OverallCond,0.05841677
4,Street,0.0500428
60,GarageCars,0.04811188
21,RoofMatl,0.04350488
18,YearBuilt,0.03866285


In [52]:
# Predicting the values for the training set 
y_train_pred = reg.predict(X_train)
y_train_pred

array([0.365242  , 0.12903595, 0.17114449, ..., 0.36442757, 0.09407997,
       0.27964783])

In [53]:
# r2_score
r2_score(y_train, y_train_pred)

0.8648903928564398

#### The goodness of fit looks decent for the train set

##### Predicting for test set

In [54]:
house_price_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8


In [55]:
# Taking a high level look at the test set data 
house_price_test.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
1436,20,3,60.0,9000,1,1,3,3,0,2,0,12,2,2,0,2,4,6,1971,1971,1,1,6,6,2,0.0,3,4,2,4,4,4,0,616,6,0,248,864,1,4,1,5,864,0,0,864,0,0,1,0,3,1,3,5,6,0,3,5,1974.0,3,2,528,5,5,2,0,0,0,0,0,0,3,1,1,0,5,2007,8,4,120500
57,60,3,89.0,11645,1,1,0,3,0,0,0,5,2,2,0,5,7,5,2004,2004,1,1,12,13,2,0.0,2,4,2,2,4,4,6,0,6,0,860,860,1,0,1,5,860,860,0,1720,0,0,2,1,3,1,2,7,6,0,3,1,2004.0,2,2,565,5,5,2,0,70,0,0,0,0,3,2,1,0,8,2006,8,4,196500
780,20,3,63.0,7875,1,1,3,3,0,4,0,8,2,2,0,2,7,5,1995,1996,1,1,6,6,1,38.0,3,4,2,2,1,4,6,0,6,0,1237,1237,1,2,1,5,1253,0,0,1253,0,0,2,0,3,1,3,6,6,1,5,1,1995.0,0,2,402,5,5,2,220,21,0,0,0,0,3,2,1,0,6,2007,8,4,176000
382,60,3,79.0,9245,1,1,0,3,0,4,0,5,2,2,0,5,7,5,2006,2006,1,1,12,13,2,0.0,2,4,2,2,4,0,6,0,6,0,939,939,1,0,1,5,939,858,0,1797,0,0,2,1,3,1,2,8,6,0,3,1,2006.0,2,2,639,5,5,2,144,53,0,0,0,0,3,2,1,0,4,2007,8,4,213500
1170,80,3,76.0,9880,1,1,3,3,0,4,0,11,2,2,0,7,6,6,1977,1977,1,1,9,10,2,0.0,3,4,1,4,4,0,0,522,6,0,574,1096,1,4,1,5,1118,0,0,1118,1,0,1,0,3,1,3,6,6,1,4,1,1977.0,0,1,358,5,5,2,203,0,0,0,0,576,2,0,1,0,7,2008,8,4,171000


In [56]:
house_price_test.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
1436,20,3,60.0,9000,1,1,3,3,0,2,0,12,2,2,0,2,4,6,1971,1971,1,1,6,6,2,0.0,3,4,2,4,4,4,0,616,6,0,248,864,1,4,1,5,864,0,0,864,0,0,1,0,3,1,3,5,6,0,3,5,1974.0,3,2,528,5,5,2,0,0,0,0,0,0,3,1,1,0,5,2007,8,4,120500
57,60,3,89.0,11645,1,1,0,3,0,0,0,5,2,2,0,5,7,5,2004,2004,1,1,12,13,2,0.0,2,4,2,2,4,4,6,0,6,0,860,860,1,0,1,5,860,860,0,1720,0,0,2,1,3,1,2,7,6,0,3,1,2004.0,2,2,565,5,5,2,0,70,0,0,0,0,3,2,1,0,8,2006,8,4,196500
780,20,3,63.0,7875,1,1,3,3,0,4,0,8,2,2,0,2,7,5,1995,1996,1,1,6,6,1,38.0,3,4,2,2,1,4,6,0,6,0,1237,1237,1,2,1,5,1253,0,0,1253,0,0,2,0,3,1,3,6,6,1,5,1,1995.0,0,2,402,5,5,2,220,21,0,0,0,0,3,2,1,0,6,2007,8,4,176000
382,60,3,79.0,9245,1,1,0,3,0,4,0,5,2,2,0,5,7,5,2006,2006,1,1,12,13,2,0.0,2,4,2,2,4,0,6,0,6,0,939,939,1,0,1,5,939,858,0,1797,0,0,2,1,3,1,2,8,6,0,3,1,2006.0,2,2,639,5,5,2,144,53,0,0,0,0,3,2,1,0,4,2007,8,4,213500
1170,80,3,76.0,9880,1,1,3,3,0,4,0,11,2,2,0,7,6,6,1977,1977,1,1,9,10,2,0.0,3,4,1,4,4,0,0,522,6,0,574,1096,1,4,1,5,1118,0,0,1118,1,0,1,0,3,1,3,6,6,1,4,1,1977.0,0,1,358,5,5,2,203,0,0,0,0,576,2,0,1,0,7,2008,8,4,171000


In [57]:
# Scaling the data for the test set
house_price_test = scaler.transform(house_price_test)


In [58]:
house_price_test = pd.DataFrame(house_price_test)
house_price_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
0,0.0,0.75,0.133562,0.035192,1.0,0.5,1.0,1.0,0.0,0.5,0.0,0.5,0.25,0.285714,0.0,0.285714,0.333333,0.625,0.717391,0.35,0.2,0.142857,0.428571,0.4,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,0.0,0.109142,1.0,0.0,0.106164,0.141408,0.2,1.0,1.0,1.0,0.113889,0.0,0.0,0.08186,0.0,0.0,0.333333,0.0,0.375,0.0,1.0,0.181818,1.0,0.0,0.6,0.833333,0.73913,1.0,0.5,0.372355,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.25,0.333333,0.0,0.363636,0.25,1.0,0.8,0.118872
1,0.235294,0.75,0.232877,0.047566,1.0,0.5,0.0,1.0,0.0,0.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.956522,0.9,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,1.0,1.0,0.0,1.0,0.0,0.368151,0.140753,0.2,0.0,1.0,1.0,0.112963,0.416465,0.0,0.246349,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.363636,1.0,0.0,0.6,0.166667,0.956522,0.666667,0.5,0.398449,1.0,1.0,1.0,0.0,0.133843,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.636364,0.0,1.0,0.8,0.224413
2,0.0,0.75,0.143836,0.02993,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.25,0.285714,0.0,0.285714,0.666667,0.5,0.891304,0.766667,0.2,0.142857,0.428571,0.4,0.333333,0.02375,1.0,1.0,0.4,0.5,0.25,1.0,1.0,0.0,1.0,0.0,0.529538,0.202455,0.2,0.5,1.0,1.0,0.203935,0.0,0.0,0.15661,0.0,0.0,0.666667,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,1.0,0.166667,0.891304,0.0,0.5,0.283498,1.0,1.0,1.0,0.256709,0.040153,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.25,1.0,0.8,0.195945
3,0.235294,0.75,0.19863,0.036338,1.0,0.5,0.0,1.0,0.0,1.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.971014,0.933333,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.401969,0.153682,0.2,0.0,1.0,1.0,0.13125,0.415496,0.0,0.261145,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.454545,1.0,0.0,0.6,0.166667,0.971014,0.666667,0.5,0.450635,1.0,1.0,1.0,0.168028,0.101338,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.25,1.0,0.8,0.248021
4,0.352941,0.75,0.188356,0.039309,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.458333,0.25,0.285714,0.0,1.0,0.555556,0.625,0.76087,0.45,0.2,0.142857,0.642857,0.666667,0.666667,0.0,1.0,1.0,0.2,1.0,1.0,0.0,0.0,0.092488,1.0,0.0,0.245719,0.179378,0.2,1.0,1.0,1.0,0.172685,0.0,0.0,0.130669,0.333333,0.0,0.333333,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,0.8,0.166667,0.76087,0.0,0.25,0.252468,1.0,1.0,1.0,0.236873,0.0,0.0,0.0,0.0,0.888889,0.666667,0.0,0.333333,0.0,0.545455,0.5,1.0,0.8,0.189002


In [59]:
house_price_test.columns = house_price.columns

In [60]:
house_price_test.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,0.0,0.75,0.133562,0.035192,1.0,0.5,1.0,1.0,0.0,0.5,0.0,0.5,0.25,0.285714,0.0,0.285714,0.333333,0.625,0.717391,0.35,0.2,0.142857,0.428571,0.4,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,0.0,0.109142,1.0,0.0,0.106164,0.141408,0.2,1.0,1.0,1.0,0.113889,0.0,0.0,0.08186,0.0,0.0,0.333333,0.0,0.375,0.0,1.0,0.181818,1.0,0.0,0.6,0.833333,0.73913,1.0,0.5,0.372355,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.25,0.333333,0.0,0.363636,0.25,1.0,0.8,0.118872
1,0.235294,0.75,0.232877,0.047566,1.0,0.5,0.0,1.0,0.0,0.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.956522,0.9,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,1.0,1.0,0.0,1.0,0.0,0.368151,0.140753,0.2,0.0,1.0,1.0,0.112963,0.416465,0.0,0.246349,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.363636,1.0,0.0,0.6,0.166667,0.956522,0.666667,0.5,0.398449,1.0,1.0,1.0,0.0,0.133843,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.636364,0.0,1.0,0.8,0.224413
2,0.0,0.75,0.143836,0.02993,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.25,0.285714,0.0,0.285714,0.666667,0.5,0.891304,0.766667,0.2,0.142857,0.428571,0.4,0.333333,0.02375,1.0,1.0,0.4,0.5,0.25,1.0,1.0,0.0,1.0,0.0,0.529538,0.202455,0.2,0.5,1.0,1.0,0.203935,0.0,0.0,0.15661,0.0,0.0,0.666667,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,1.0,0.166667,0.891304,0.0,0.5,0.283498,1.0,1.0,1.0,0.256709,0.040153,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.25,1.0,0.8,0.195945
3,0.235294,0.75,0.19863,0.036338,1.0,0.5,0.0,1.0,0.0,1.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.971014,0.933333,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.401969,0.153682,0.2,0.0,1.0,1.0,0.13125,0.415496,0.0,0.261145,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.454545,1.0,0.0,0.6,0.166667,0.971014,0.666667,0.5,0.450635,1.0,1.0,1.0,0.168028,0.101338,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.25,1.0,0.8,0.248021
4,0.352941,0.75,0.188356,0.039309,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.458333,0.25,0.285714,0.0,1.0,0.555556,0.625,0.76087,0.45,0.2,0.142857,0.642857,0.666667,0.666667,0.0,1.0,1.0,0.2,1.0,1.0,0.0,0.0,0.092488,1.0,0.0,0.245719,0.179378,0.2,1.0,1.0,1.0,0.172685,0.0,0.0,0.130669,0.333333,0.0,0.333333,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,0.8,0.166667,0.76087,0.0,0.25,0.252468,1.0,1.0,1.0,0.236873,0.0,0.0,0.0,0.0,0.888889,0.666667,0.0,0.333333,0.0,0.545455,0.5,1.0,0.8,0.189002


In [61]:
# Creating y_test and X_test from the test data set
print(house_price_test.shape)

y_test = house_price_test.pop('SalePrice')
X_test = house_price_test

print(X_test.shape)
print(y_test.shape)

(439, 80)
(439, 79)
(439,)


In [62]:
X_test.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.0,0.75,0.133562,0.035192,1.0,0.5,1.0,1.0,0.0,0.5,0.0,0.5,0.25,0.285714,0.0,0.285714,0.333333,0.625,0.717391,0.35,0.2,0.142857,0.428571,0.4,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,0.0,0.109142,1.0,0.0,0.106164,0.141408,0.2,1.0,1.0,1.0,0.113889,0.0,0.0,0.08186,0.0,0.0,0.333333,0.0,0.375,0.0,1.0,0.181818,1.0,0.0,0.6,0.833333,0.73913,1.0,0.5,0.372355,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.25,0.333333,0.0,0.363636,0.25,1.0,0.8
1,0.235294,0.75,0.232877,0.047566,1.0,0.5,0.0,1.0,0.0,0.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.956522,0.9,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,1.0,1.0,0.0,1.0,0.0,0.368151,0.140753,0.2,0.0,1.0,1.0,0.112963,0.416465,0.0,0.246349,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.363636,1.0,0.0,0.6,0.166667,0.956522,0.666667,0.5,0.398449,1.0,1.0,1.0,0.0,0.133843,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.636364,0.0,1.0,0.8
2,0.0,0.75,0.143836,0.02993,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.333333,0.25,0.285714,0.0,0.285714,0.666667,0.5,0.891304,0.766667,0.2,0.142857,0.428571,0.4,0.333333,0.02375,1.0,1.0,0.4,0.5,0.25,1.0,1.0,0.0,1.0,0.0,0.529538,0.202455,0.2,0.5,1.0,1.0,0.203935,0.0,0.0,0.15661,0.0,0.0,0.666667,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,1.0,0.166667,0.891304,0.0,0.5,0.283498,1.0,1.0,1.0,0.256709,0.040153,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.25,1.0,0.8
3,0.235294,0.75,0.19863,0.036338,1.0,0.5,0.0,1.0,0.0,1.0,0.0,0.208333,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.971014,0.933333,0.2,0.142857,0.857143,0.866667,0.666667,0.0,0.666667,1.0,0.4,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.401969,0.153682,0.2,0.0,1.0,1.0,0.13125,0.415496,0.0,0.261145,0.0,0.0,0.666667,0.5,0.375,0.0,0.666667,0.454545,1.0,0.0,0.6,0.166667,0.971014,0.666667,0.5,0.450635,1.0,1.0,1.0,0.168028,0.101338,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.25,1.0,0.8
4,0.352941,0.75,0.188356,0.039309,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.458333,0.25,0.285714,0.0,1.0,0.555556,0.625,0.76087,0.45,0.2,0.142857,0.642857,0.666667,0.666667,0.0,1.0,1.0,0.2,1.0,1.0,0.0,0.0,0.092488,1.0,0.0,0.245719,0.179378,0.2,1.0,1.0,1.0,0.172685,0.0,0.0,0.130669,0.333333,0.0,0.333333,0.0,0.375,0.0,1.0,0.272727,1.0,0.333333,0.8,0.166667,0.76087,0.0,0.25,0.252468,1.0,1.0,1.0,0.236873,0.0,0.0,0.0,0.0,0.888889,0.666667,0.0,0.333333,0.0,0.545455,0.5,1.0,0.8


In [63]:
y_test.head()

0    0.118872
1    0.224413
2    0.195945
3    0.248021
4    0.189002
Name: SalePrice, dtype: float64

In [64]:
# Looking at the train and test set predictions and associated metrics


from sklearn.metrics import r2_score, mean_squared_error

y_pred_train = reg.predict(X_train)
y_pred_test = reg.predict(X_test)

metric = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric.append(mse_test_lr**0.5)

0.8648903928564398
0.6857195045647211
1.662829207104592
1.7126228051948897
0.0016286280187116475
0.0039011909002161496


##### The r2-score looks good for the training set (~86%). However, it can be improved for the test set (~68%).

#### Ridge Regression

In [65]:
X_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8


In [66]:
# We have a list of alphas to tune; As we know, too high value for alpha will lead to underfitting and too less value for alpha will lead to overfitting 
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
params = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 
 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 
 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20, 50, 100, 500, 1000 ]}

ridge = Ridge()

# cross validation
folds = 5
model_cv = GridSearchCV(estimator = ridge, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error',  
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            
model_cv.fit(X_train, y_train)

Fitting 5 folds for each of 28 candidates, totalling 140 fits


In [67]:
# Printing the best hyperparameter alpha; Now we have the optimal alpha and let us proceed using this.
print(model_cv.best_params_)

{'alpha': 0.01}


In [68]:
#Fitting Ridge model for alpha = 0.01 and printing coefficients which have been penalised
alpha = 0.01
ridge = Ridge(alpha=alpha)

ridge.fit(X_train, y_train)
print(ridge.coef_)

[-2.48207220e-02 -8.97209081e-03 -3.90930294e-02  8.96610320e-02
  4.58137419e-02  3.92657837e-03 -2.42222017e-03  1.49903821e-02
 -5.05278589e-02  1.46610701e-03  2.52032866e-02  1.60138159e-02
 -1.76972598e-03 -1.15462246e-01 -1.68912604e-02 -1.93607096e-02
  1.38767499e-01  6.14089640e-02  3.68667644e-02  5.22057670e-03
  6.87953395e-03  5.14734725e-02 -4.12175287e-03 -6.91739676e-03
  6.36877417e-03  7.62361433e-02 -3.21730598e-02  6.65272752e-03
  9.50684038e-03 -2.63391997e-02  5.04448155e-03 -1.62218076e-02
 -6.80861860e-03  6.47363079e-02  8.96067082e-03  3.01704909e-02
  8.58941713e-03  7.03613591e-02  3.82868442e-03 -2.41342920e-03
 -2.28008584e-03 -1.27507720e-03  1.46275522e-01  7.45431356e-02
 -3.38969665e-02  1.47652747e-01  3.59250344e-02 -4.00121120e-03
  1.10462284e-02  4.85864667e-03 -1.92263921e-02 -3.30044576e-02
 -3.51534661e-02  3.02398206e-02  2.75584974e-02  2.51687424e-02
 -1.58652979e-02  1.48031923e-03 -4.77025613e-05 -9.00176127e-03
  4.64322228e-02  2.48432

In [69]:
# Get the column names from X_train
column_names = X_train.columns

# Get the coefficients from the regression model
coefficients = ridge.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model2_ridge = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model2_ridge.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model2_ridge

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.147653
42,1stFlrSF,0.146276
16,OverallQual,0.138767
3,LotArea,0.089661
25,MasVnrArea,0.076236
43,2ndFlrSF,0.074543
37,TotalBsmtSF,0.070361
33,BsmtFinSF1,0.064736
17,OverallCond,0.061409
21,RoofMatl,0.051473


In [70]:
# Lets calculate some metrics such as R2 score, RSS and RMSE
y_pred_train = ridge.predict(X_train)
y_pred_test = ridge.predict(X_test)

metric2 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric2.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric2.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric2.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric2.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric2.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric2.append(mse_test_lr**0.5)

0.8655498160535974
0.7015019931652653
1.6547135136668127
1.6266185819210768
0.0016206792494288078
0.003705281507792886


##### The r2-score looks good for the training set (~86%). For the test set, the r2-score is about 70% (which is a improvement as compared to 68% which we achieved without regularization, in our earlier model). However, the test set prediction accuracy can be improved.

#### Lasso

In [71]:
lasso = Lasso()

# cross validation
model_cv = GridSearchCV(estimator = lasso, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error', 
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            

model_cv.fit(X_train, y_train) 

Fitting 5 folds for each of 28 candidates, totalling 140 fits


In [72]:
# Printing the best hyperparameter alpha
print(model_cv.best_params_)

{'alpha': 0.0001}


In [73]:
#Fitting Lasso model for alpha = 0.0001 and printing coefficients which have been penalised

alpha = 0.0001

lasso = Lasso(alpha=alpha)
        
lasso.fit(X_train, y_train) 

In [74]:
lasso.coef_

array([-0.02887071, -0.00231458, -0.        ,  0.01862417,  0.0084311 ,
        0.        , -0.00376731,  0.02054965, -0.        ,  0.0006162 ,
        0.03220081,  0.01509336,  0.        , -0.0138052 , -0.01299953,
       -0.01919955,  0.16144229,  0.03739311,  0.02372021,  0.00809398,
        0.00731412,  0.04863179, -0.        , -0.01168689,  0.00284854,
        0.06210873, -0.02781061,  0.00136415,  0.00110122, -0.03094325,
        0.00686943, -0.01753958, -0.01260129,  0.        ,  0.00078693,
        0.        ,  0.        ,  0.04347493,  0.        , -0.00318704,
        0.00226398,  0.        ,  0.        ,  0.0060509 , -0.01855446,
        0.31433743,  0.04258816,  0.        ,  0.01367784,  0.00172941,
       -0.        , -0.01964585, -0.03755254,  0.01034539,  0.02549806,
        0.0268716 , -0.01524282, -0.00176033,  0.        , -0.00978754,
        0.0522493 ,  0.        ,  0.        , -0.        ,  0.00109419,
        0.01712122, -0.        , -0.        ,  0.        ,  0.00

In [75]:
# Get the column names from X_train
column_names = X_train.columns

# Get the coefficients from the regression model
coefficients = lasso.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model3_lasso = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model3_lasso.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model3_lasso

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.314337
16,OverallQual,0.161442
25,MasVnrArea,0.062109
60,GarageCars,0.052249
21,RoofMatl,0.048632
37,TotalBsmtSF,0.043475
46,BsmtFullBath,0.042588
17,OverallCond,0.037393
10,LandSlope,0.032201
55,Fireplaces,0.026872


In [76]:
# Lets calculate some metrics such as R2 score, RSS and RMSE

y_pred_train = lasso.predict(X_train)
y_pred_test = lasso.predict(X_test)

metric3 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric3.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric3.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric3.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric3.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric3.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric3.append(mse_test_lr**0.5)

0.8483264066181498
0.8297516839536911
1.8666865099671575
0.9277417874854409
0.0018282923701931023
0.0021133070329964485


##### Training Set: The r2-score which we get from Lasso Regression for the training set is ~84%, which is a 2% reduction as compared to the score that we achieved through Ridge Regression (which is 86%). 

##### Test Set: For the test set, the r2-score is about 82% (which is a great improvement as compared to 70% which we achieved with Ridge regularization, in our earlier model). 

##### Comparing the metrics obtained through all the three models

In [77]:
# Creating a table which contain all the metrics

lr_table = {'Metric': ['R2 Score (Train)','R2 Score (Test)','RSS (Train)','RSS (Test)',
                       'MSE (Train)','MSE (Test)'], 
        'Linear Regression': metric
        }

lr_metric = pd.DataFrame(lr_table ,columns = ['Metric', 'Linear Regression'] )

rg_metric = pd.Series(metric2, name = 'Ridge Regression')
ls_metric = pd.Series(metric3, name = 'Lasso Regression')

final_metric = pd.concat([lr_metric, rg_metric, ls_metric], axis = 1)

final_metric

Unnamed: 0,Metric,Linear Regression,Ridge Regression,Lasso Regression
0,R2 Score (Train),0.86489,0.86555,0.848326
1,R2 Score (Test),0.68572,0.701502,0.829752
2,RSS (Train),1.662829,1.654714,1.866687
3,RSS (Test),1.712623,1.626619,0.927742
4,MSE (Train),0.040356,0.040258,0.042759
5,MSE (Test),0.06246,0.060871,0.045971


##### Conclusion: Lasso regression is the best model to be used in this case as it gives a good training set accuracy (~85%) and very high test set accuracy (~ 83%). The test accuracy is very good compared to the other models. 

##### The following codes are mainly for answering the subjective questions that are a part of this assignment. 

###### Q1: Part B - What will be the changes in the model if you choose to double the value of alpha for both ridge and lasso? 

###### Ridge Regression


In [78]:
# The optimal value of alpha that we obtained for "Ridge Regression" as per the model that we developed earlier is "0.01"
# Let us double the value of alpha (0.01*2 = 0.02) and try developing the Ridge Regression 

alpha = 0.02
ridge = Ridge(alpha=alpha)

ridge.fit(X_train, y_train)


In [79]:
print(ridge.coef_)

[-2.53100222e-02 -8.95370471e-03 -3.97418757e-02  8.84159291e-02
  4.52845855e-02  3.81318456e-03 -2.50924064e-03  1.55340605e-02
 -5.04633838e-02  1.39479528e-03  2.57227274e-02  1.61410243e-02
 -1.55096306e-03 -1.13525313e-01 -1.67209327e-02 -1.95253856e-02
  1.39754241e-01  6.05985182e-02  3.66172501e-02  5.19943684e-03
  6.97482831e-03  5.18594457e-02 -3.65772438e-03 -7.37534780e-03
  6.24069332e-03  7.56674648e-02 -3.20292186e-02  6.40307356e-03
  9.65681477e-03 -2.64623625e-02  5.27476161e-03 -1.63952829e-02
 -7.09753132e-03  6.20522791e-02  9.00506434e-03  2.96313823e-02
  8.69935916e-03  6.77940137e-02  3.48445986e-03 -2.38395903e-03
 -2.09673005e-03 -1.19671049e-03  1.45504307e-01  7.48606795e-02
 -3.22960277e-02  1.47296975e-01  3.66715569e-02 -3.23633329e-03
  1.16782069e-02  4.50211452e-03 -1.88188333e-02 -3.29006896e-02
 -3.53612452e-02  3.01008469e-02  2.76255456e-02  2.50673120e-02
 -1.58004045e-02  1.38876670e-03 -7.21325806e-04 -9.05156835e-03
  4.75715743e-02  1.69655

In [80]:
# Get the column names from X_train
column_names = X_train.columns

# Get the coefficients from the regression model
coefficients = ridge.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model2_ridge_q1 = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model2_ridge_q1.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model2_ridge_q1.head(10)

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.147297
42,1stFlrSF,0.145504
16,OverallQual,0.139754
3,LotArea,0.088416
25,MasVnrArea,0.075667
43,2ndFlrSF,0.074861
37,TotalBsmtSF,0.067794
33,BsmtFinSF1,0.062052
17,OverallCond,0.060599
21,RoofMatl,0.051859


In [81]:
result_df_model2_ridge.head(10)

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.147653
42,1stFlrSF,0.146276
16,OverallQual,0.138767
3,LotArea,0.089661
25,MasVnrArea,0.076236
43,2ndFlrSF,0.074543
37,TotalBsmtSF,0.070361
33,BsmtFinSF1,0.064736
17,OverallCond,0.061409
21,RoofMatl,0.051473


In [82]:
# Lets calculate R2 score for train and test sets
y_pred_train_ridge_q1 = ridge.predict(X_train)
y_pred_test_ridge_q1 = ridge.predict(X_test)

r2_train_ridge_q1 = r2_score(y_train, y_pred_train_ridge_q1)
print(r2_train_ridge_q1)

r2_test_ridge_q1 = r2_score(y_test, y_pred_test_ridge_q1)
print(r2_test_ridge_q1)


0.865277758455576
0.7194496122818024


###### Lasso Regression

In [83]:
# The optimal value of alpha that we obtained for "Lasso Regression" as per the model that we developed earlier is "0.0001"
# Let us double the value of alpha (0.0001*2 = 0.0002) and try developing the Lasso Regression Model 

alpha = 0.0002
lasso_q1 = Lasso(alpha=alpha)    
lasso_q1.fit(X_train, y_train)

In [84]:
lasso_q1.coef_

array([-3.26664250e-02, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -4.29449522e-03,  2.02363233e-02,
       -0.00000000e+00,  0.00000000e+00,  2.98589636e-02,  1.39095041e-02,
        0.00000000e+00, -0.00000000e+00, -1.23470210e-02, -1.52175282e-02,
        1.72687433e-01,  2.30234600e-02,  1.51935359e-02,  9.67483158e-03,
        5.54698443e-03,  3.59378288e-02, -0.00000000e+00, -1.10060229e-02,
        0.00000000e+00,  5.44450810e-02, -2.75738377e-02,  0.00000000e+00,
        0.00000000e+00, -3.24019210e-02,  7.72710107e-03, -1.75063915e-02,
       -1.40636959e-02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  8.09992342e-03, -0.00000000e+00, -3.63347446e-03,
        5.14063155e-03,  6.54739211e-04,  0.00000000e+00,  2.34214288e-03,
       -0.00000000e+00,  3.20010114e-01,  4.03134147e-02,  0.00000000e+00,
        1.14926117e-02,  0.00000000e+00,  0.00000000e+00, -6.21462866e-03,
       -3.89872818e-02,  

In [85]:
# Get the column names from X_train
column_names = X_train.columns

# Get the coefficients from the regression model
coefficients = lasso_q1.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model3_lasso_q1 = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model3_lasso_q1.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model3_lasso_q1.head(20)

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.32001
16,OverallQual,0.172687
60,GarageCars,0.054642
25,MasVnrArea,0.054445
46,BsmtFullBath,0.040313
21,RoofMatl,0.035938
10,LandSlope,0.029859
55,Fireplaces,0.028317
17,OverallCond,0.023023
54,Functional,0.022957


In [86]:
result_df_model3_lasso.head(10)

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.314337
16,OverallQual,0.161442
25,MasVnrArea,0.062109
60,GarageCars,0.052249
21,RoofMatl,0.048632
37,TotalBsmtSF,0.043475
46,BsmtFullBath,0.042588
17,OverallCond,0.037393
10,LandSlope,0.032201
55,Fireplaces,0.026872


In [87]:
# Lets calculate R2 score for train and test sets
y_pred_train_lasso_q1 = lasso_q1.predict(X_train)
y_pred_test_lasso_q1 = lasso_q1.predict(X_test)

r2_train_lasso_q1 = r2_score(y_train, y_pred_train_lasso_q1)
print(r2_train_lasso_q1)

r2_test_lasso_q1 = r2_score(y_test, y_pred_test_lasso_q1)
print(r2_test_lasso_q1)

0.8349699729472013
0.8369226950332541


###### Q3. After building the model, you realized that the five most important predictor variables in the lasso model are not available in the incoming data. You will now have to create another model excluding the five most important predictor variables. Which are the five most important predictor variables now?

In [88]:
# We have already built a model using Lasso; Let us take a look at the top predictors.
result_df_model3_lasso.head(10)

Unnamed: 0,Column_Name,Coefficient
45,GrLivArea,0.314337
16,OverallQual,0.161442
25,MasVnrArea,0.062109
60,GarageCars,0.052249
21,RoofMatl,0.048632
37,TotalBsmtSF,0.043475
46,BsmtFullBath,0.042588
17,OverallCond,0.037393
10,LandSlope,0.032201
55,Fireplaces,0.026872


In [89]:
house_price_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8


In [90]:
X_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,0.235294,0.75,0.236301,0.039403,1.0,0.5,1.0,0.666667,0.0,1.0,0.5,0.625,0.25,0.285714,0.0,0.714286,0.666667,0.5,0.876812,0.716667,0.2,0.142857,0.428571,0.4,0.333333,0.16,0.666667,1.0,0.4,0.5,1.0,0.25,0.333333,0.174876,1.0,0.0,0.15411,0.220458,0.2,0.0,1.0,1.0,0.231481,0.616949,0.0,0.424289,0.333333,0.0,0.666667,0.5,0.5,0.0,0.666667,0.545455,1.0,0.333333,1.0,0.166667,0.876812,0.666667,0.75,0.462623,1.0,1.0,1.0,0.396733,0.114723,0.26087,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,0.75,1.0,0.8
1,0.176471,0.75,0.106164,0.033981,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.555556,0.375,0.528986,0.0,0.2,0.142857,0.857143,0.866667,0.666667,0.0,1.0,1.0,0.2,1.0,0.0,1.0,0.5,0.016655,1.0,0.0,0.274401,0.120295,0.2,1.0,1.0,0.0,0.098611,0.333656,0.0,0.201576,0.0,0.0,0.333333,0.5,0.375,0.0,1.0,0.363636,1.0,0.333333,0.4,0.833333,0.557971,1.0,0.25,0.155148,1.0,1.0,1.0,0.0,0.267686,0.0,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,1.0,1.0,0.8
2,0.176471,1.0,0.130137,0.017931,1.0,0.5,1.0,1.0,0.0,0.0,0.0,0.708333,0.125,0.285714,0.0,0.0,0.555556,0.875,0.275362,0.883333,0.6,0.142857,0.857143,0.866667,0.666667,0.0,1.0,0.5,0.2,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.20762,0.079378,0.2,0.5,1.0,1.0,0.145602,0.307022,0.0,0.230015,0.0,0.0,0.333333,0.0,0.25,0.0,0.666667,0.181818,1.0,0.0,0.6,0.166667,0.565217,1.0,0.25,0.179831,0.2,1.0,1.0,0.459743,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.454545,0.0,1.0,0.8
3,1.0,1.0,0.133562,0.046139,1.0,0.5,1.0,1.0,0.0,1.0,0.0,0.708333,0.25,0.285714,0.25,0.714286,0.333333,0.625,0.094203,0.0,0.2,0.142857,0.857143,0.0,0.666667,0.0,1.0,1.0,0.4,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.33262,0.127169,0.2,0.5,1.0,1.0,0.202315,0.505569,0.0,0.35588,0.0,0.0,0.666667,0.0,0.5,0.5,1.0,0.727273,1.0,0.0,0.6,0.833333,0.717391,1.0,0.5,0.394922,1.0,1.0,0.0,0.0,0.0,0.206522,0.0,0.0,0.0,1.0,0.5,0.333333,0.0,0.272727,1.0,1.0,0.8
4,0.176471,0.75,0.099315,0.032409,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.291667,0.25,0.285714,0.0,0.0,0.444444,0.875,0.202899,0.0,0.2,0.142857,0.571429,0.533333,0.666667,0.0,1.0,1.0,0.0,1.0,0.25,1.0,0.833333,0.0427,0.166667,0.265265,0.098031,0.140917,0.2,0.0,1.0,1.0,0.136343,0.19661,0.0,0.178517,0.333333,0.0,0.333333,0.0,0.5,0.0,1.0,0.363636,1.0,0.0,0.6,0.833333,0.768116,1.0,0.25,0.270804,1.0,1.0,1.0,0.0,0.248566,0.202899,0.0,0.0,0.0,1.0,0.75,0.333333,0.0,0.272727,0.5,1.0,0.8


In [91]:
X_train.shape

(1021, 79)

In [92]:
y_train.shape

(1021,)

In [93]:
X_test.shape

(439, 79)

In [94]:
y_test.shape

(439,)

In [95]:
X_train_l = X_train.copy(deep=True)
y_train_l = y_train.copy(deep=True)

X_test_l = X_test.copy(deep=True)
y_test_l = y_test.copy(deep=True)

In [96]:
print(X_train_l.shape)
print(y_train_l.shape)

print(X_test_l.shape)
print(y_test_l.shape)

(1021, 79)
(1021,)
(439, 79)
(439,)


In [97]:
# List of columns to be dropped
columns_to_drop = ['GrLivArea', 'OverallQual', 'MasVnrArea', 'GarageCars', 'RoofMatl']

# Drop the columns from the DataFrame
X_train_l.drop(columns=columns_to_drop, inplace=True)

# Print the shape of the DataFrame after dropping the columns
print(X_train.shape)
print(X_train_l.shape)

(1021, 79)
(1021, 74)


In [98]:
# Drop the columns from the other dataframes: X_test_l
X_test_l.drop(columns=columns_to_drop, inplace=True)

# Print the shape of the DataFrame after dropping the columns
print(X_test.shape)
print(X_test_l.shape)

(439, 79)
(439, 74)


In [99]:
reg_q3 = LinearRegression()
reg_q3.fit(X_train_l, y_train_l)

In [100]:
lasso_q3 = Lasso()

# cross validation
model_cv = GridSearchCV(estimator = lasso_q3, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error', 
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            

model_cv.fit(X_train_l, y_train_l) 

Fitting 5 folds for each of 28 candidates, totalling 140 fits


In [101]:
# Printing the best hyperparameter alpha
print(model_cv.best_params_)

{'alpha': 0.0001}


In [102]:
#Fitting Lasso model for alpha = 0.0001 and printing coefficients which have been penalised

alpha = 0.0001
lasso_q3 = Lasso(alpha=alpha)   
lasso_q3.fit(X_train_l, y_train_l) 

In [111]:
# Get the column names from X_train
column_names = X_train_l.columns

# Get the coefficients from the regression model
coefficients = lasso_q3.coef_

# Create a dataframe with two columns: 'Column_Name' and 'Coefficient'
result_df_model3_lasso_q3 = pd.DataFrame({'Column_Name': column_names, 'Coefficient': coefficients})

# Sort the dataframe by 'Coefficient' in descending order
result_df_model3_lasso_q3.sort_values(by='Coefficient', ascending=False, inplace=True)

# Print the resulting dataframe
result_df_model3_lasso_q3

Unnamed: 0,Column_Name,Coefficient
39,1stFlrSF,0.308429
40,2ndFlrSF,0.170193
34,TotalBsmtSF,0.116894
56,GarageArea,0.066253
16,OverallCond,0.04765
17,YearBuilt,0.045924
51,Fireplaces,0.043811
50,Functional,0.039285
42,BsmtFullBath,0.035184
10,LandSlope,0.029573


In [112]:
result_df_model3_lasso_q3.head()

Unnamed: 0,Column_Name,Coefficient
39,1stFlrSF,0.308429
40,2ndFlrSF,0.170193
34,TotalBsmtSF,0.116894
56,GarageArea,0.066253
16,OverallCond,0.04765
