<h1 align="center"> <i>House Pricing Prediction</i> </h1>

## Importing required libraries

In [1]:
import numpy as np
import pandas as pd
import pydot
import math
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [3]:
test.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

## Handling Null values

In [5]:
train.describe()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1460.0,1460.0,1201.0,1460.0,1460.0,1460.0,1460.0,1460.0,1452.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,730.5,56.89726,70.049958,10516.828082,6.099315,5.575342,1971.267808,1984.865753,103.685262,443.639726,...,94.244521,46.660274,21.95411,3.409589,15.060959,2.758904,43.489041,6.321918,2007.815753,180921.19589
std,421.610009,42.300571,24.284752,9981.264932,1.382997,1.112799,30.202904,20.645407,181.066207,456.098091,...,125.338794,66.256028,61.119149,29.317331,55.757415,40.177307,496.123024,2.703626,1.328095,79442.502883
min,1.0,20.0,21.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0
25%,365.75,20.0,59.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0
50%,730.5,50.0,69.0,9478.5,6.0,5.0,1973.0,1994.0,0.0,383.5,...,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0
75%,1095.25,70.0,80.0,11601.5,7.0,6.0,2000.0,2004.0,166.0,712.25,...,168.0,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,214000.0
max,1460.0,190.0,313.0,215245.0,10.0,9.0,2010.0,2010.0,1600.0,5644.0,...,857.0,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0,755000.0


In [6]:
train_num_cols=train.columns[train.dtypes!= "object"]
train_cat_cols=train.columns[train.dtypes== "object"]
test_num_cols=test.columns[test.dtypes!= "object"]
test_cat_cols=test.columns[test.dtypes== "object"]

In [7]:
train[train_num_cols].isnull().sum()

Id                 0
MSSubClass         0
LotFrontage      259
LotArea            0
OverallQual        0
OverallCond        0
YearBuilt          0
YearRemodAdd       0
MasVnrArea         8
BsmtFinSF1         0
BsmtFinSF2         0
BsmtUnfSF          0
TotalBsmtSF        0
1stFlrSF           0
2ndFlrSF           0
LowQualFinSF       0
GrLivArea          0
BsmtFullBath       0
BsmtHalfBath       0
FullBath           0
HalfBath           0
BedroomAbvGr       0
KitchenAbvGr       0
TotRmsAbvGrd       0
Fireplaces         0
GarageYrBlt       81
GarageCars         0
GarageArea         0
WoodDeckSF         0
OpenPorchSF        0
EnclosedPorch      0
3SsnPorch          0
ScreenPorch        0
PoolArea           0
MiscVal            0
MoSold             0
YrSold             0
SalePrice          0
dtype: int64

In [8]:
test[test_num_cols].isnull().sum()

Id                 0
MSSubClass         0
LotFrontage      227
LotArea            0
OverallQual        0
OverallCond        0
YearBuilt          0
YearRemodAdd       0
MasVnrArea        15
BsmtFinSF1         1
BsmtFinSF2         1
BsmtUnfSF          1
TotalBsmtSF        1
1stFlrSF           0
2ndFlrSF           0
LowQualFinSF       0
GrLivArea          0
BsmtFullBath       2
BsmtHalfBath       2
FullBath           0
HalfBath           0
BedroomAbvGr       0
KitchenAbvGr       0
TotRmsAbvGrd       0
Fireplaces         0
GarageYrBlt       78
GarageCars         1
GarageArea         1
WoodDeckSF         0
OpenPorchSF        0
EnclosedPorch      0
3SsnPorch          0
ScreenPorch        0
PoolArea           0
MiscVal            0
MoSold             0
YrSold             0
dtype: int64

In [9]:
train["LotFrontage"].fillna(train["LotFrontage"].mean(),inplace=True)
test["LotFrontage"].fillna(test["LotFrontage"].mean(),inplace=True)

In [10]:
train["GarageYrBlt"].fillna(train["GarageYrBlt"].median(),inplace=True)
test["GarageYrBlt"].fillna(test["GarageYrBlt"].median(),inplace=True)

In [11]:
train["MasVnrArea"].fillna(train["MasVnrArea"].mean(),inplace=True)
test["MasVnrArea"].fillna(test["MasVnrArea"].mean(),inplace=True)

In [12]:
test["BsmtFinSF1"].fillna(test["BsmtFinSF1"].mean(),inplace=True)
test["BsmtFinSF2"].fillna(test["BsmtFinSF2"].mean(),inplace=True)
test["BsmtUnfSF"].fillna(test["BsmtUnfSF"].mean(),inplace=True)
test["TotalBsmtSF"].fillna(test["TotalBsmtSF"].mean(),inplace=True)
test["BsmtFullBath"].fillna(test["BsmtFullBath"].mean(),inplace=True)
test["BsmtHalfBath"].fillna(test["BsmtHalfBath"].mean(),inplace=True)
test["GarageCars"].fillna(test["GarageCars"].mean(),inplace=True)
test["GarageArea"].fillna(test["GarageArea"].mean(),inplace=True)

In [13]:
train[train_cat_cols].isnull().sum()

MSZoning            0
Street              0
Alley            1369
LotShape            0
LandContour         0
Utilities           0
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
RoofStyle           0
RoofMatl            0
Exterior1st         0
Exterior2nd         0
MasVnrType          8
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           37
BsmtCond           37
BsmtExposure       38
BsmtFinType1       37
BsmtFinType2       38
Heating             0
HeatingQC           0
CentralAir          0
Electrical          1
KitchenQual         0
Functional          0
FireplaceQu       690
GarageType         81
GarageFinish       81
GarageQual         81
GarageCond         81
PavedDrive          0
PoolQC           1453
Fence            1179
MiscFeature      1406
SaleType            0
SaleCondition       0
dtype: int64

In [14]:
test[test_cat_cols].isnull().sum()

MSZoning            4
Street              0
Alley            1352
LotShape            0
LandContour         0
Utilities           2
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
RoofStyle           0
RoofMatl            0
Exterior1st         1
Exterior2nd         1
MasVnrType         16
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           44
BsmtCond           45
BsmtExposure       44
BsmtFinType1       42
BsmtFinType2       42
Heating             0
HeatingQC           0
CentralAir          0
Electrical          0
KitchenQual         1
Functional          2
FireplaceQu       730
GarageType         76
GarageFinish       78
GarageQual         78
GarageCond         78
PavedDrive          0
PoolQC           1456
Fence            1169
MiscFeature      1408
SaleType            1
SaleCondition       0
dtype: int64

###### Many categorical variables have missing values lets drop categorical variables with 80% missing values

In [15]:
train[train_cat_cols].isnull().sum()/len(train)

MSZoning         0.000000
Street           0.000000
Alley            0.937671
LotShape         0.000000
LandContour      0.000000
Utilities        0.000000
LotConfig        0.000000
LandSlope        0.000000
Neighborhood     0.000000
Condition1       0.000000
Condition2       0.000000
BldgType         0.000000
HouseStyle       0.000000
RoofStyle        0.000000
RoofMatl         0.000000
Exterior1st      0.000000
Exterior2nd      0.000000
MasVnrType       0.005479
ExterQual        0.000000
ExterCond        0.000000
Foundation       0.000000
BsmtQual         0.025342
BsmtCond         0.025342
BsmtExposure     0.026027
BsmtFinType1     0.025342
BsmtFinType2     0.026027
Heating          0.000000
HeatingQC        0.000000
CentralAir       0.000000
Electrical       0.000685
KitchenQual      0.000000
Functional       0.000000
FireplaceQu      0.472603
GarageType       0.055479
GarageFinish     0.055479
GarageQual       0.055479
GarageCond       0.055479
PavedDrive       0.000000
PoolQC      

In [16]:
train.drop(["Alley","PoolQC","Fence","MiscFeature"],axis=1, inplace=True)
test.drop(["Alley","PoolQC","Fence","MiscFeature"],axis=1, inplace=True)

In [17]:
train_cat_cols = train_cat_cols.drop(["Alley","PoolQC","Fence","MiscFeature"])
test_cat_cols = test_cat_cols.drop(["Alley","PoolQC","Fence","MiscFeature"])

In [18]:
train[train_cat_cols].isnull().sum()

MSZoning           0
Street             0
LotShape           0
LandContour        0
Utilities          0
LotConfig          0
LandSlope          0
Neighborhood       0
Condition1         0
Condition2         0
BldgType           0
HouseStyle         0
RoofStyle          0
RoofMatl           0
Exterior1st        0
Exterior2nd        0
MasVnrType         8
ExterQual          0
ExterCond          0
Foundation         0
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Heating            0
HeatingQC          0
CentralAir         0
Electrical         1
KitchenQual        0
Functional         0
FireplaceQu      690
GarageType        81
GarageFinish      81
GarageQual        81
GarageCond        81
PavedDrive         0
SaleType           0
SaleCondition      0
dtype: int64

In [19]:
test[test_cat_cols].isnull().sum()

MSZoning           4
Street             0
LotShape           0
LandContour        0
Utilities          2
LotConfig          0
LandSlope          0
Neighborhood       0
Condition1         0
Condition2         0
BldgType           0
HouseStyle         0
RoofStyle          0
RoofMatl           0
Exterior1st        1
Exterior2nd        1
MasVnrType        16
ExterQual          0
ExterCond          0
Foundation         0
BsmtQual          44
BsmtCond          45
BsmtExposure      44
BsmtFinType1      42
BsmtFinType2      42
Heating            0
HeatingQC          0
CentralAir         0
Electrical         0
KitchenQual        1
Functional         2
FireplaceQu      730
GarageType        76
GarageFinish      78
GarageQual        78
GarageCond        78
PavedDrive         0
SaleType           1
SaleCondition      0
dtype: int64

In [20]:
train["MasVnrType"].value_counts()

None       864
BrkFace    445
Stone      128
BrkCmn      15
Name: MasVnrType, dtype: int64

In [21]:
train["MasVnrType"].fillna("None",inplace=True)
test["MasVnrType"].fillna("None",inplace=True)

In [22]:
train["BsmtQual"].value_counts()

TA    649
Gd    618
Ex    121
Fa     35
Name: BsmtQual, dtype: int64

In [23]:
train["BsmtQual"].fillna("TA",inplace=True)
test["BsmtQual"].fillna("TA",inplace=True)

In [24]:
train["BsmtCond"].value_counts()

TA    1311
Gd      65
Fa      45
Po       2
Name: BsmtCond, dtype: int64

In [25]:
train["BsmtCond"].fillna("TA",inplace=True)
test["BsmtCond"].fillna("TA",inplace=True)

In [26]:
train["BsmtExposure"].value_counts()

No    953
Av    221
Gd    134
Mn    114
Name: BsmtExposure, dtype: int64

In [27]:
train["BsmtExposure"].fillna("No",inplace=True)
test["BsmtExposure"].fillna("No",inplace=True)

In [28]:
train["BsmtFinType1"].value_counts()

Unf    430
GLQ    418
ALQ    220
BLQ    148
Rec    133
LwQ     74
Name: BsmtFinType1, dtype: int64

In [29]:
train["BsmtFinType1"].fillna("Unf",inplace=True)
test["BsmtFinType1"].fillna("Unf",inplace=True)

In [30]:
train["BsmtFinType2"].value_counts()

Unf    1256
Rec      54
LwQ      46
BLQ      33
ALQ      19
GLQ      14
Name: BsmtFinType2, dtype: int64

In [31]:
train["BsmtFinType2"].fillna("Unf",inplace=True)
test["BsmtFinType2"].fillna("Unf",inplace=True)

In [32]:
train["FireplaceQu"].value_counts()

Gd    380
TA    313
Fa     33
Ex     24
Po     20
Name: FireplaceQu, dtype: int64

In [33]:
train["FireplaceQu"].fillna("Gd",inplace=True)
test["FireplaceQu"].fillna("Gd",inplace=True)

In [34]:
train["GarageType"].value_counts()

Attchd     870
Detchd     387
BuiltIn     88
Basment     19
CarPort      9
2Types       6
Name: GarageType, dtype: int64

In [35]:
train["GarageType"].fillna("Attchd",inplace=True)
test["GarageType"].fillna("Attchd",inplace=True)

In [36]:
train["GarageFinish"].value_counts()

Unf    605
RFn    422
Fin    352
Name: GarageFinish, dtype: int64

In [37]:
train["GarageFinish"].fillna("Unf",inplace=True)
test["GarageFinish"].fillna("Unf",inplace=True)

In [38]:
train["GarageQual"].value_counts()

TA    1311
Fa      48
Gd      14
Po       3
Ex       3
Name: GarageQual, dtype: int64

In [39]:
train["GarageQual"].fillna("TA",inplace=True)
test["GarageQual"].fillna("TA",inplace=True)

In [40]:
train["GarageCond"].value_counts()

TA    1326
Fa      35
Gd       9
Po       7
Ex       2
Name: GarageCond, dtype: int64

In [41]:
train["GarageCond"].fillna("TA",inplace=True)
test["GarageCond"].fillna("TA",inplace=True)

In [42]:
train["Electrical"].value_counts()

SBrkr    1334
FuseA      94
FuseF      27
FuseP       3
Mix         1
Name: Electrical, dtype: int64

In [43]:
train["Electrical"].fillna("SBrkr",inplace=True)

In [44]:
train["MSZoning"].value_counts()

RL         1151
RM          218
FV           65
RH           16
C (all)      10
Name: MSZoning, dtype: int64

In [45]:
test["MSZoning"].fillna("RL",inplace=True)

In [46]:
train["Utilities"].value_counts()

AllPub    1459
NoSeWa       1
Name: Utilities, dtype: int64

In [47]:
test["Utilities"].fillna("AllPub",inplace=True)

In [48]:
train["Exterior1st"].value_counts()

VinylSd    515
HdBoard    222
MetalSd    220
Wd Sdng    206
Plywood    108
CemntBd     61
BrkFace     50
WdShing     26
Stucco      25
AsbShng     20
Stone        2
BrkComm      2
AsphShn      1
CBlock       1
ImStucc      1
Name: Exterior1st, dtype: int64

In [49]:
test["Exterior1st"].fillna("VinylSd",inplace=True)
test["Exterior2nd"].fillna("VinylSd",inplace=True)

In [50]:
train["KitchenQual"].value_counts()

TA    735
Gd    586
Ex    100
Fa     39
Name: KitchenQual, dtype: int64

In [51]:
test["KitchenQual"].fillna("TA",inplace=True)

In [52]:
train["Functional"].value_counts()

Typ     1360
Min2      34
Min1      31
Mod       15
Maj1      14
Maj2       5
Sev        1
Name: Functional, dtype: int64

In [53]:
test["Functional"].fillna("Typ",inplace=True)

In [54]:
train["SaleType"].value_counts()

WD       1267
New       122
COD        43
ConLD       9
ConLI       5
ConLw       5
CWD         4
Oth         3
Con         2
Name: SaleType, dtype: int64

In [55]:
test["SaleType"].fillna("WD",inplace=True)

# One hot encoding

In [56]:
train = pd.get_dummies(train,columns =["MSZoning","Street","LotShape","LandContour",'LandSlope', 
                                'LotConfig','Neighborhood', 'Condition1', 'BldgType', 
                                'RoofStyle',  'MasVnrType', 'ExterQual', 
                                'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 
                                'BsmtFinType1', 'HeatingQC', 'CentralAir', 
                                'KitchenQual', 'FireplaceQu', 'GarageType', 'GarageFinish', 
                                'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition'],drop_first =True)

In [57]:
train.drop(["Id","Exterior2nd","Condition2","BsmtFinType2","Utilities", 'HouseStyle',
         'RoofMatl', 'Exterior1st',  'Heating', 'Electrical', 'Functional', 
        'GarageQual'],axis=1,inplace=True) 

In [58]:
test = pd.get_dummies(test,columns =["MSZoning","Street","LotShape","LandContour",'LandSlope', 
                                    'LotConfig','Neighborhood', 'Condition1', 'BldgType', 
                                    'RoofStyle',  'MasVnrType', 'ExterQual', 
                                    'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 
                                    'BsmtFinType1', 'HeatingQC', 'CentralAir', 
                                    'KitchenQual', 'FireplaceQu', 'GarageType', 'GarageFinish', 
                                    'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition'],drop_first =True)

In [59]:
test.drop(["Id","Exterior2nd","Condition2","BsmtFinType2","Utilities", 
           'HouseStyle', 'RoofMatl', 'Exterior1st',  'Heating', 
           'Electrical', 'Functional', 'GarageQual'],axis=1,inplace=True)

## Feature Selection

In [60]:
X = train.drop(["SalePrice"],axis=1)
y = train["SalePrice"]

## Spliting Train  into train and test

In [61]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state = 1)

## Regression models

### Simple linear regression model

In [62]:
simple_reg = LinearRegression().fit(X_train.iloc[:,0:1], y_train)

In [63]:
sy_predictions = simple_reg.predict(X_test.iloc[:,0:1])

### Multiple linear regression model

In [64]:
multi_reg = LinearRegression().fit(X_train, y_train)

In [65]:
my_predictions = multi_reg.predict(X_test)

### Polynomial Regression Model

In [66]:
poly = PolynomialFeatures()
X_poly = poly.fit_transform(X_train)
poly_reg = LinearRegression().fit(X_poly, y_train)

In [67]:
py_predictions = poly_reg.predict(poly.fit_transform(X_test))

### DecisionTree Regressor

In [68]:
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)

DecisionTreeRegressor()

In [69]:
dy_predictions = dt_reg.predict(X_test)

### Random Forest

In [70]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)

RandomForestRegressor()

In [71]:
ry_predictions = rf_reg.predict(X_test)

## RMSE for Five models

In [72]:
rmse = pd.DataFrame({'RMSE':[math.sqrt(mean_squared_error(sy_predictions,y_test)),
                             math.sqrt(mean_squared_error(my_predictions,y_test)),
                             math.sqrt(mean_squared_error(py_predictions,y_test)),
                             math.sqrt(mean_squared_error(dy_predictions,y_test)),
                             math.sqrt(mean_squared_error(ry_predictions,y_test))]},                                      
                     index=["Simple Linear regression","Multiple Linear regression",
                            "Polynomial regression","Decision Tree",
                            "Random forest"])
rmse.head()

Unnamed: 0,RMSE
Simple Linear regression,84751.086864
Multiple Linear regression,34827.239921
Polynomial regression,261755.349635
Decision Tree,34542.408737
Random forest,27433.849073


we can observe that RMSE for Random forest is giving the less error.

## Specific to Random Forest

### Saving Tree Picture

In [73]:
tree = rf_reg.estimators_[5]
export_graphviz(tree, out_file = 'tree.dot', feature_names = X_train.columns, rounded = True, precision = 1)
(graph, ) = pydot.graph_from_dot_file('tree.dot')

In [74]:
graph.write_png('tree.png')

### Best estimators

In [75]:
importances = list(rf_reg.feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(X_train.columns, importances)]

feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

Variable: OverallQual          Importance: 0.54
Variable: GrLivArea            Importance: 0.1
Variable: TotalBsmtSF          Importance: 0.04
Variable: GarageCars           Importance: 0.04
Variable: BsmtFinSF1           Importance: 0.03
Variable: 1stFlrSF             Importance: 0.03
Variable: LotArea              Importance: 0.02
Variable: GarageArea           Importance: 0.02
Variable: LotFrontage          Importance: 0.01
Variable: YearBuilt            Importance: 0.01
Variable: YearRemodAdd         Importance: 0.01
Variable: MasVnrArea           Importance: 0.01
Variable: BsmtUnfSF            Importance: 0.01
Variable: 2ndFlrSF             Importance: 0.01
Variable: FullBath             Importance: 0.01
Variable: TotRmsAbvGrd         Importance: 0.01
Variable: GarageYrBlt          Importance: 0.01
Variable: WoodDeckSF           Importance: 0.01
Variable: OpenPorchSF          Importance: 0.01
Variable: MoSold               Importance: 0.01
Variable: GarageFinish_Unf     Importance

### Random Forest with best estimators

In [76]:
rf_most_important = RandomForestRegressor()

train_important = X_train[["OverallQual","GrLivArea"]]
test_important = X_test[["OverallQual","GrLivArea"]]

rf_most_important.fit(train_important, y_train)

RandomForestRegressor()

In [77]:
rf_predictions = rf_most_important.predict(test_important)

### Comparing Random forest and Random forest with best estimators

In [78]:
rmse = pd.DataFrame({'RMSE':[math.sqrt(mean_squared_error(ry_predictions,y_test)),
                             math.sqrt(mean_squared_error(rf_predictions,y_test))]},                                      
                     index=["Random forest","Random forest with best estimators"])
rmse.head()

Unnamed: 0,RMSE
Random forest,27433.849073
Random forest with best estimators,35367.242757


we can observe that random forest is giving better results than random forest with best estimators

## Saving test predictions to submission.csv file

In [79]:
df = pd.DataFrame(rf_reg.predict(test),columns=['Prediction'])
df.to_csv('submission.csv')