## Problem Statement

#### What features drive the price of houses in Ames, Iowa and where are they located?
TL;DR
Linear Regression, LASSO and Ridge models will be developed to see which features have the most significant effect on the price of a house.


We will be comparing against the linear regression and the LASSO model of our data to determine which model fared better. A better model is one with lesser features (with high coefficients) and ideally a high R-squared. R-squared can be perceive as the proportion of the variance in the dependent variable that is predictable from the independent variable.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

from scipy.stats import skew

In [None]:
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 120)

In [None]:
train = pd.read_csv('train.csv')

In [None]:
test = pd.read_csv('test.csv')

In [None]:
train_test = pd.concat([train, test], sort=False)
train_test.reset_index(drop=True, inplace=True)

In [None]:
train_test.to_csv('./train_test.csv')

In [None]:
train_test.describe().T
#Lot Frontage, Mass Vnr Area, BsmtFin SF 1, BsmtFin SF 2, 
#Bsmt Unf SF, Total Bsmt SF, Bsmt Full Bath, Bsmt Half Bath, Garage Yr Blt, Garage Cars, Garage Area, SalePrice

#### Correcting Lot Frontage

In [None]:
train_test[train_test['Lot Frontage'].isnull()].head()

In [None]:
train_test['Lot Frontage'].fillna(train_test['Lot Frontage'].median(), inplace=True) 
# Replace NaN values with mean value of Lot Frontage

In [None]:
train_test[train_test['Lot Frontage'].isnull()] #double check for NaN values

##### Correcting Bsmt Features

In [None]:
train_test[train_test['Bsmt Cond'].isnull()]

In [None]:
train_test['Bsmt Cond'].fillna('NA', inplace=True)
train_test['Bsmt Qual'].fillna('NA', inplace=True)
train_test['Bsmt Exposure'].fillna('NA', inplace=True)
train_test['BsmtFin Type 1'].fillna('NA', inplace=True)
train_test['BsmtFin Type 2'].fillna('NA', inplace=True)

In [None]:
train_test.loc[[1327], ['Bsmt Cond']] #1327 do not have a basement as Bsmt Cond is NaN

In [None]:
train_test[train_test['Bsmt Unf SF'].isnull()].tail() 

In [None]:
train_test[train_test['BsmtFin SF 1'].isnull()].tail() 

In [None]:
train_test[train_test['BsmtFin SF 2'].isnull()].tail() # index 1327 has null values in Bsmt variables

In [None]:
train_test[train_test['Bsmt Half Bath'].isnull()].tail() 

In [None]:
train_test[train_test['Bsmt Full Bath'].isnull()].tail()

In [None]:
train_test[train_test['Total Bsmt SF'].isnull()].tail()

In [None]:
train_test.loc[[616], ['Bsmt Cond']]

In [None]:
train_test['Bsmt Unf SF'].fillna(0, inplace=True) 
train_test['BsmtFin SF 1'].fillna(0, inplace=True) 
train_test['BsmtFin SF 2'].fillna(0, inplace=True) 
#As 1327 do not have a basement, NaN values are filled with 0 value
train_test['Bsmt Half Bath'].fillna(0, inplace=True)
train_test['Bsmt Full Bath'].fillna(0, inplace=True)
train_test['Total Bsmt SF'].fillna(0, inplace=True)
# 0 values were mistakenly filled with NaN; Typo

In [None]:
train_test[train_test['Bsmt Cond'] == 'None']

In [None]:
train_test[train_test['Bsmt Qual'] == 'NA']

In [None]:
train_test[train_test['Bsmt Exposure'] == 'NA']

In [None]:
train_test[train_test['BsmtFin Type 1'] == 'NA']

In [None]:
train_test[train_test['BsmtFin Type 2'] == 'NA']

In [None]:
mask1 = train_test['Bsmt Cond'] == 'NA'
mask2 = train_test['Bsmt Qual'] == 'NA'
mask3 = train_test['Bsmt Exposure'] == 'NA'
mask4 = train_test['BsmtFin Type 1'] == 'NA'
mask5 = train_test['BsmtFin Type 2'] == 'NA'
train_test[mask3^mask1][['Bsmt Cond', 'Bsmt Exposure']]

In [None]:
train_test[mask4^mask1][['Bsmt Cond', 'BsmtFin Type 1']]

In [None]:
train_test[mask5^mask1][['Bsmt Cond', 'BsmtFin Type 2']]

In [None]:
train_test.loc[[1456, 1547, 1997], ['Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2']] = 'No'
#Change NA values to 'No'

In [None]:
train_test.loc[[1456, 1547, 1997], ['Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2']]

###### Summary for Bsmt:
1. Fill all NaN values with 'NA' 
2. Fill index 1327 and 616 with zero values as they do not have a basement
3. Rectify values that have been classified as 'NA' when actual value is 'No'

##### Correcting Fireplace Qu

In [None]:
train_test[['Fireplaces', 'Fireplace Qu']]

In [None]:
train_test['Fireplace Qu'].fillna('NA', inplace=True)

In [None]:
mask1 = train_test['Fireplace Qu'] == 'NA'
mask2 = train_test['Fireplaces'] != 0
train_test[mask1&2][['Fireplaces', 'Fireplace Qu']]
#Double check with Fireplaces to ensure data is accurate

##### Correcting Alley

In [None]:
train_test['Alley'].fillna('NA', inplace=True)

##### Correcting Garage Features

In [None]:
train_test[['Garage Finish', 'Garage Qual', 'Garage Cond']]

In [None]:
train_test['Garage Finish'].fillna('NA', inplace=True)
train_test['Garage Qual'].fillna('NA', inplace=True)
train_test['Garage Cond'].fillna('NA', inplace=True)
train_test['Garage Type'].fillna('NA', inplace=True)

In [None]:
train_test[train_test['Garage Yr Blt'].isnull()]

In [None]:
train_test['Garage Yr Blt'].fillna('2005', inplace=True)
#fill NaN values with mode

In [None]:
train_test[train_test['Garage Cars'].isnull()]

In [None]:
train_test.loc[[1712], ['Garage Finish']]

In [None]:
train_test['Garage Cars'].fillna(0, inplace=True)
train_test['Garage Area'].fillna(0, inplace=True)
#As 1712 have no garage, garage cars and garage area will take on 0 value

In [None]:
train_test.loc[[1699], ['Garage Yr Blt']]

In [None]:
train_test.loc[1699, 'Garage Yr Blt'] = 2007

In [None]:
train_test[['Garage Finish', 'Garage Qual', 'Garage Cond', 'Garage Cars', 'Garage Area', 'Garage Yr Blt', 'Garage Type']].isnull().sum()

#### Correcting Electrical feature

In [None]:
train_test.Electrical.mode()

In [None]:
train_test['Electrical'].fillna('SBrkr', inplace=True) 
#putting mode into missing value

#### Correcting Pool QC

In [None]:
train_test['Pool QC']

In [None]:
train_test['Pool QC'].fillna('NA', inplace=True)

In [None]:
mask1 = train_test['Pool QC'] == 'NA'
mask2 = train_test['Pool Area'] != 0
train_test[mask1&mask2][['Pool QC', 'Pool Area']]

In [None]:
train_test['Fence']

In [None]:
train_test['Fence'].fillna('NA', inplace=True)

#### Correcting Misc Feature

In [None]:
train_test['Misc Feature']

In [None]:
train_test['Misc Feature'].fillna('NA', inplace=True)

In [None]:
mask1 = train_test['Misc Feature'] == 'NA'
mask2 = train_test['Misc Val'] != 0
train_test[mask1&mask2][['Misc Feature', 'Misc Val']]

#### Correcting Mas Vnr Values

In [None]:
train_test[train_test['Mas Vnr Type'].isnull()]

In [None]:
train_test[train_test['Mas Vnr Area'].isnull()]

In [None]:
train_test['Mas Vnr Type'].fillna('None', inplace=True)
train_test['Mas Vnr Area'].fillna(0, inplace=True)

In [None]:
mask1= train_test['Mas Vnr Area']>1
mask2=train_test['Mas Vnr Type']=="None"
train_test[mask1&mask2][['Mas Vnr Area', 'Mas Vnr Type']]
#need to change Mas Vnr Type

In [None]:
train_test.loc[[810, 1832, 2434, 2914], 'Mas Vnr Type'] = 'BrkFace'
#BrkFace is used instead of the mode ('None') as Mas Vnr Area is not 0

In [None]:
train_test.loc[[810, 1832, 2434, 2914],'Mas Vnr Type']  

In [None]:
mask3= train_test['Mas Vnr Area']==0.0
mask4=train_test['Mas Vnr Type']!="None"
train_test[mask3&mask4][['Mas Vnr Area', 'Mas Vnr Type']]

In [None]:
train_test.loc[[669, 1136,1187], 'Mas Vnr Area'] = train_test['Mas Vnr Area'].mean() 

In [None]:
train_test.loc[[669, 1136,1187],'Mas Vnr Area']
#Mas Vnr Area takes on the mean value

In [None]:
mask5= train_test['Mas Vnr Area']!=0
mask6=train_test['Mas Vnr Type']=="None"
train_test[mask5&mask6][['Mas Vnr Area', 'Mas Vnr Type']]

In [None]:
train_test.loc[[765, 1148, 1684], 'Mas Vnr Area'] = 0
#Mas Vnr Type = None. Hence, Mas Vnr Area should be 0.

##### Looking at SalePrice

In [None]:
train_test.SalePrice.isnull().sum()
#879 NaN values represents the saleprice in test set

In [None]:
#train_test.SalePrice.fillna(0, inplace=True)
#test set sale prices are set at 0 while train set sale prices are > 0

In [None]:
#train_test.SalePrice.value_counts()
#879 input from test set are set as 0

In [None]:
train_test.describe().T

#We have accounted for all null/missing values
#Note: NaN values of saleprice are of the test set

In [None]:
train_test.columns = [x.lower().replace(' ','_') for x in train_test.columns]

In [None]:
train_test.ms_subclass = train_test.ms_subclass.astype('str')

In [None]:
train_test_cleaned = train_test
train_test_cleaned.info()

In [None]:
train_test_cleaned.to_csv('./train_test_cleaned.csv')

# EDA 

#### Feature Selection (low variance)

In [None]:
train_test_cleaned.var().sort_values(ascending=False).tail(30)

In [None]:
train_test_cleaned.drop('kitchen_abvgr', axis=1, inplace=True)
train_test_cleaned.drop('bsmt_half_bath', axis=1, inplace=True)
train_test_cleaned.drop('half_bath', axis=1, inplace=True)
train_test_cleaned.drop('bsmt_full_bath', axis=1, inplace=True)
train_test_cleaned.drop('full_bath', axis=1, inplace=True)
train_test_cleaned.drop('bedroom_abvgr', axis=1, inplace=True)
#features with variance < 1 will be dropped 


#### Coverting Ordinal features to numerical

In [None]:
print(train_test_cleaned.lot_shape.head())
#print(train_test_cleaned["lot_shape"].value_counts())
cleanup_nums = {"lot_shape": {"IR3": 0, "IR2": 1, "IR1": 2, "Reg": 3}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["lot_shape"].head())

In [None]:
print(train_test_cleaned.utilities.head())
#print(train_test_cleaned["utilities"].value_counts())
cleanup_nums = {"utilities": {"ELO": 0, "NoSeWa": 1, "NoSewr": 2, "AllPub": 3}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["utilities"].head())

In [None]:
print(train_test_cleaned.land_slope.head())
#print(train_test_cleaned["land_slope"].value_counts())
cleanup_nums = {"land_slope": {"Gtl": 0, "Mod": 1, "Sev": 2}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["land_slope"].head())

In [None]:
print(train_test_cleaned.exter_qual.head())
#print(train_test_cleaned["exter_qual"].value_counts())
cleanup_nums = {"exter_qual": {"Po": 0, "Fa": 1, "TA": 2, "Gd": 3, "Ex": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["exter_qual"].head())

In [None]:
print(train_test_cleaned.exter_cond.head())
#print(train_test_cleaned["exter_cond"].value_counts())
cleanup_nums = {"exter_cond": {"Po": 0, "Fa": 1, "TA": 2, "Gd": 3, "Ex": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["exter_cond"].head())

In [None]:
print(train_test_cleaned.bsmt_qual.head())
#print(train_test_cleaned["bsmt_qual"].value_counts())
cleanup_nums = {"bsmt_qual": {"NA": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["bsmt_qual"].head())

In [None]:
print(train_test_cleaned.bsmt_cond.head())
#print(train_test_cleaned["bsmt_cond"].value_counts())
cleanup_nums = {"bsmt_cond": {"NA": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["bsmt_cond"].head())

In [None]:
print(train_test_cleaned.bsmt_exposure.head())
#print(train_test_cleaned["bsmt_exposure"].value_counts())
cleanup_nums = {"bsmt_exposure": {"NA": 0, "No": 1, "Mn": 2, "Av": 3, "Gd": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["bsmt_exposure"].head())

In [None]:
print(train_test_cleaned.bsmtfin_type_1.head())
#print(train_test_cleaned["bsmtfin_type_1"].value_counts())
cleanup_nums = {"bsmtfin_type_1": {"NA": 0, "Unf": 1, "LwQ": 2, "Rec": 3, "BLQ": 4, "ALQ": 5, "GLQ": 6}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["bsmtfin_type_1"].head())

In [None]:
print(train_test_cleaned.bsmtfin_type_2.head())
#print(train_test_cleaned["bsmtfin_type_2"].value_counts())
cleanup_nums = {"bsmtfin_type_2": {"NA": 0, "Unf": 1, "LwQ": 2, "Rec": 3, "BLQ": 4, "ALQ": 5, "GLQ": 6}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["bsmtfin_type_2"].head())

In [None]:
print(train_test_cleaned.heating_qc.head())
#print(train_test_cleaned["heating_qc"].value_counts())
cleanup_nums = {"heating_qc": {"Po": 0, "Fa": 1, "TA": 2, "Gd": 3, "Ex": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["heating_qc"].head())

In [None]:
print(train_test_cleaned.electrical.head())
#print(train_test_cleaned["electrical"].value_counts())
cleanup_nums = {"electrical": {"Mix": 0, "FuseP": 1, "FuseF": 2, "FuseA": 3, "SBrkr": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["electrical"].head())

In [None]:
print(train_test_cleaned.kitchen_qual.head())
#print(train_test_cleaned["kitchen_qual"].value_counts())
cleanup_nums = {"kitchen_qual": {"Po": 0, "Fa": 1, "TA": 2, "Gd": 3, "Ex": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["kitchen_qual"].head())

In [None]:
print(train_test_cleaned.functional.head())
#print(train_test_cleaned["functional"].value_counts())
cleanup_nums = {"functional": {"Sal": 0, "Sev": 1, "Maj2": 2, "Maj1": 3, "Mod": 4, "Min2": 5, "Min1": 6, "Typ": 7}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["functional"].head())

In [None]:
print(train_test_cleaned.fireplace_qu.head())
#print(train_test_cleaned["fireplace_qu"].value_counts())
cleanup_nums = {"fireplace_qu": {"NA": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["fireplace_qu"].head())

In [None]:
print(train_test_cleaned.garage_finish.head())
#print(train_test_cleaned["garage_finish"].value_counts())
cleanup_nums = {"garage_finish": {"NA": 0, "Unf": 1, "RFn": 2, "Fin": 3}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["garage_finish"].head())

In [None]:
print(train_test_cleaned.garage_qual.head())
#print(train_test_cleaned["garage_qual"].value_counts())
cleanup_nums = {"garage_qual": {"NA": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["garage_qual"].head())

In [None]:
print(train_test_cleaned.garage_cond.head())
#print(train_test_cleaned["garage_cond"].value_counts())
cleanup_nums = {"garage_cond": {"NA": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["garage_cond"].head())

In [None]:
print(train_test_cleaned.paved_drive.head())
#print(train_test_cleaned["paved_drive"].value_counts())
cleanup_nums = {"paved_drive": {"N": 0, "P": 1, "Y": 2}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["paved_drive"].head())

In [None]:
print(train_test_cleaned.pool_qc.head())
#print(train_test_cleaned["pool_qc"].value_counts())
cleanup_nums = {"pool_qc": {"NA": 0, "Fa": 1, "TA": 2, "Gd": 3, "Ex": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["pool_qc"].head())

In [None]:
print(train_test_cleaned.fence.head())
#print(train_test_cleaned["fence"].value_counts())
cleanup_nums = {"fence": {"NA": 0, "MnWw": 1, "GdWo": 2, "MnPrv": 3, "GdPrv": 4}}
train_test_cleaned.replace(cleanup_nums, inplace=True)
print(train_test_cleaned["fence"].head())

#### Accounting for outliers

In [None]:
train_test_cleaned_int = train_test_cleaned.select_dtypes(include = int)
train_test_cleaned_flt1 = train_test_cleaned_int.astype(float)
train_test_cleaned_flt = train_test_cleaned.select_dtypes(include = float)
train_test_cleaned_num = pd.concat([train_test_cleaned_flt1, train_test_cleaned_flt], sort=False, axis=1)

In [None]:
train_test_cleaned_num.hist(figsize=(20, 20));

In [None]:
train_test_cleaned_num.describe().T

In [None]:
def outlier(x,y):
    t=x.dropna()
    skew_value=skew(t)
    y=np.log1p(y)
    print('sample length: %s  and skew: %s'%(len(x),skew_value))
    fig,axs=plt.subplots(1,2,figsize=(8,3))
    sns.boxplot(x,orient='v',ax=axs[0])
    sns.regplot(x,y,ax=axs[1])
    plt.tight_layout()

In [None]:
outlier(train_test_cleaned.lot_frontage, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.overall_qual, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.overall_cond, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned['1st_flr_sf'], train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned['2nd_flr_sf'], train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.low_qual_fin_sf, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.gr_liv_area, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.wood_deck_sf, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.enclosed_porch, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.open_porch_sf, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned['3ssn_porch'], train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.screen_porch, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.exter_qual, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.exter_cond, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.heating_qc, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.electrical, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.mas_vnr_area, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.bsmtfin_sf_1, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.bsmtfin_sf_2, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.bsmt_unf_sf, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.total_bsmt_sf, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.year_built, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.yr_sold, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned.garage_area, train_test_cleaned.saleprice)

In [None]:
outlier(train_test_cleaned['year_remod/add'], train_test_cleaned.saleprice)

In [None]:
train_test_cleaned_int = train_test_cleaned.select_dtypes(include = int)
train_test_cleaned_flt1 = train_test_cleaned_int.astype(float)
train_test_cleaned_flt = train_test_cleaned.select_dtypes(include = float)
train_test_cleaned_num = pd.concat([train_test_cleaned_flt1, train_test_cleaned_flt], sort=False, axis=1)

#### Checking for multicollinearity

In [None]:
plt.figure(figsize=(150,30))
mask = np.zeros_like(train_test_cleaned.corr())
mask[np.triu_indices_from(mask)] = True
sns.heatmap(train_test_cleaned.corr(), mask=mask, annot=True, square=True, cmap='Blues')

#### Human feature selection 

The following are dropped as:
    - to prevent multicollinearity from forming in our model 
    - little to no correlation with our target
    - does not add significant value to our model

##### Multicollinearity

In [None]:
train_test_cleaned.drop('1st_flr_sf', axis=1, inplace=True)
train_test_cleaned.drop('2nd_flr_sf', axis=1, inplace=True)
# total_bsmt_sf can explain saleprice better
train_test_cleaned.drop('totrms_abvgrd', axis=1, inplace=True) # gr_liv_area can explain saleprice better
train_test_cleaned.drop('exter_qual', axis=1, inplace=True) #overall_qual can explain saleprice better
train_test_cleaned.drop('garage_cars', axis=1, inplace=True) 
#garage_area shows the total square feet of a particular garage

##### Little to no correlation

In [None]:
train_test_cleaned.drop('pid', axis=1, inplace=True)
train_test_cleaned.drop('utilities', axis=1, inplace=True)
train_test_cleaned.drop('land_slope', axis=1, inplace=True)
train_test_cleaned.drop('functional', axis=1, inplace=True) 
train_test_cleaned.drop('overall_cond', axis=1, inplace=True)
train_test_cleaned.drop('exter_cond', axis=1, inplace=True)
train_test_cleaned.drop('low_qual_fin_sf', axis=1, inplace=True)
train_test_cleaned.drop('misc_val', axis=1, inplace=True)
train_test_cleaned.drop('mo_sold', axis=1, inplace=True) 
train_test_cleaned.drop('fence', axis=1, inplace=True) 

##### Not significant to model

In [None]:
train_test_cleaned.drop('bsmtfin_sf_1', axis=1, inplace=True)
train_test_cleaned.drop('bsmtfin_sf_2', axis=1, inplace=True) 
train_test_cleaned.drop('bsmt_unf_sf', axis=1, inplace=True) 
#total_bsmt_sf includes these three variables

train_test_cleaned.drop('misc_feature', axis=1, inplace=True) 
#since misc_val does not add value to our target, there is no point keeping misc_feature

train_test_cleaned.drop('street', axis=1, inplace=True) 
train_test_cleaned.drop('alley', axis=1, inplace=True) 
train_test_cleaned.drop('land_contour', axis=1, inplace=True) 
train_test_cleaned.drop('lot_config', axis=1, inplace=True) 

train_test_cleaned.drop('condition_1', axis=1, inplace=True) 
train_test_cleaned.drop('exterior_1st', axis=1, inplace=True) 
#looking for >=2 conditions may affect saleprice more

train_test_cleaned.drop('mas_vnr_type', axis=1, inplace=True) 
train_test_cleaned.drop('foundation', axis=1, inplace=True) 
train_test_cleaned.drop('roof_matl', axis=1, inplace=True) 
#can be easily replaced once bought over

train_test_cleaned.drop('garage_yr_blt', axis=1, inplace=True) 
#garage_cond and garage_qual can be a better gauge to show how the overall condition of the garage is