In [None]:
# To upload our datasets from our working directory we need to mount our drive contents to the colab environment. 
# For the code to do so you can search “mount” in code snippets or use the code given below. 
# Our entire drive contents are now mounted on colab at the location “/gdrive”.

from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive
!pip install vecstack

from vecstack import stacking
import pandas as pd
import numpy as np


from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,classification_report
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation


from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
pd.set_option('display.max_columns',None)#displaying long list of columns
pd.set_option('display.max_rows', None)#displaying long list of rows
pd.set_option('display.width', 1000)#width of window

import warnings
warnings.filterwarnings("ignore")




Mounted at /gdrive
/gdrive
Collecting vecstack
  Downloading https://files.pythonhosted.org/packages/d0/a1/b9a1e9e9e5a12078da1ab9788c7885e4c745358f7e57d5f94d9db6a4e898/vecstack-0.4.0.tar.gz
Building wheels for collected packages: vecstack
  Building wheel for vecstack (setup.py) ... [?25l[?25hdone
  Created wheel for vecstack: filename=vecstack-0.4.0-cp36-none-any.whl size=19880 sha256=272f8b7c7bb2841aeff4341a2ecc6cc255a4d0a348c469723a29225da7edd9de
  Stored in directory: /root/.cache/pip/wheels/5f/bb/4e/f6488433d53bc0684673d6845e5bf11a25240577c8151c140e
Successfully built vecstack
Installing collected packages: vecstack
Successfully installed vecstack-0.4.0


In [None]:


trainfile = r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/train.csv'
trainData = pd.read_csv(trainfile)  #creates a dataframe
testfile = r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/test.csv'
testData = pd.read_csv(testfile)  #creates a dataframe
print(trainData.shape)
print(testData.shape)


(1460, 81)
(1459, 80)


In [None]:
#Extract Target Column before doing missing value substitutions and one-hot encoding======
Target_Train_Cols = trainData["SalePrice"]#make copy of target column
trainData = trainData.drop(["SalePrice"], axis=1) #extracting training data without the target column

print(trainData.shape)
print(testData.shape)

(1460, 80)
(1459, 80)


In [None]:
Target_Train_Cols.isnull().sum()

0

In [None]:
#DROP COLUMNS WITH LOTS OF MISSING VALUES===============================
#CAN ALSO DROP ROWS WITH LOTS OF MISSING VALUES


#Combine Train data and test data first so that the SAME COLUMNS are DROPPED in each
combined_Data = pd.concat([trainData, testData], keys=[0,1])

combined_Data.isnull().sum()



In [None]:
#Define threshold for dropping columns
percent=int(0.6*(combined_Data.shape[0]))
print(percent)
#Drop columns that have less than "thresh" number of non_Nans
td1=combined_Data.dropna(thresh=percent,axis=1)
print(td1.shape)

1751
(2919, 75)


In [None]:
#look at what other columns have missing values
td1.isnull().sum()

In [None]:
train_data = td1.xs(0)
test_data = td1.xs(1)

In [None]:


categorical_columns = []
numeric_columns = []
for c in train_data.columns:
    if train_data[c].map(type).eq(str).any(): #check if there are any strings in column
        categorical_columns.append(c)
    else:
        numeric_columns.append(c)

#create two DataFrames, one for each data type
data_numeric = train_data[numeric_columns]
data_categorical = pd.DataFrame(train_data[categorical_columns])



imp1 = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp1.fit_transform(data_numeric), columns = data_numeric.columns) #only apply imputer to numeric columns

imp2 = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
data_categorical = pd.DataFrame(imp2.fit_transform(data_categorical), columns = data_categorical.columns) #only apply imputer to numeric columns

Xtrain = pd.concat([data_numeric, data_categorical], axis = 1)

In [None]:

categorical_columns = []
numeric_columns = []
for c in test_data.columns:
    if test_data[c].map(type).eq(str).any(): #check if there are any strings in column
        categorical_columns.append(c)
    else:
        numeric_columns.append(c)

#create two DataFrames, one for each data type
data_numeric = test_data[numeric_columns]
data_categorical = pd.DataFrame(test_data[categorical_columns])



imp1 = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp1.fit_transform(data_numeric), columns = data_numeric.columns) #only apply imputer to numeric columns

imp2 = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
data_categorical = pd.DataFrame(imp2.fit_transform(data_categorical), columns = data_categorical.columns) #only apply imputer to numeric columns

Xtest = pd.concat([data_numeric, data_categorical], axis = 1)

In [None]:
#DO ONE-HOT ENCODING ON CATEGORICAL VARIABLES==============================================
#The below function returns a list of categorical features which are not numeric. 
cat_cols = Xtrain.select_dtypes(exclude=['float','int']).columns #selecting the categorical columns
print(cat_cols.shape)
print(cat_cols)

#If there are categorical columns which are encoded as numeric ones 
#then we need to explicitly enter the column names in a list and concatenate the two lists in python.
#ONE-HOT ENCODING-generate one-hot encoding on a common basis -THIS TAKES 30 MINS

###---combined_Data = pd.get_dummies(td1,train_cat_cols)
###---combined_Data.head(10)


(38,)
Index(['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition'], dtype='object')


In [None]:
#List of Categorical Features
categoricalFeatures = ['MSZoning', "Street","LotShape","LandContour","Utilities","LotConfig","LandSlope","Neighborhood","Condition1","Condition2","BldgType","HouseStyle","RoofStyle","RoofMatl","Exterior1st","Exterior2nd","MasVnrType","ExterQual","ExterCond","Foundation","BsmtQual","BsmtCond","BsmtExposure","BsmtFinType1","BsmtFinType2","Heating","HeatingQC","CentralAir","Electrical","KitchenQual","Functional","GarageType","GarageFinish","GarageQual","GarageCond","PavedDrive","SaleType","SaleCondition"]


In [None]:
# OneHotEncoding on Train (fit & transform)
# OneHotEncoding is to be done on Categorical variables.
ohe = OneHotEncoder(handle_unknown='ignore',sparse=False)
Xcat = pd.DataFrame(ohe.fit_transform(Xtrain[categoricalFeatures]),columns=ohe.get_feature_names(),index=Xtrain.index)
Xtrain = pd.concat([Xtrain,Xcat],axis=1)
Xtrain.drop(labels=categoricalFeatures,axis=1,inplace=True)
Xtrain.sample(5)

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,x0_C (all),x0_FV,x0_RH,x0_RL,x0_RM,x1_Grvl,x1_Pave,x2_IR1,x2_IR2,x2_IR3,x2_Reg,x3_Bnk,x3_HLS,x3_Low,x3_Lvl,x4_AllPub,x4_NoSeWa,x5_Corner,x5_CulDSac,x5_FR2,x5_FR3,x5_Inside,x6_Gtl,x6_Mod,x6_Sev,x7_Blmngtn,x7_Blueste,x7_BrDale,x7_BrkSide,x7_ClearCr,x7_CollgCr,x7_Crawfor,x7_Edwards,x7_Gilbert,x7_IDOTRR,x7_MeadowV,x7_Mitchel,x7_NAmes,x7_NPkVill,x7_NWAmes,x7_NoRidge,x7_NridgHt,x7_OldTown,x7_SWISU,x7_Sawyer,x7_SawyerW,x7_Somerst,x7_StoneBr,x7_Timber,x7_Veenker,x8_Artery,x8_Feedr,x8_Norm,x8_PosA,x8_PosN,x8_RRAe,x8_RRAn,x8_RRNe,x8_RRNn,x9_Artery,x9_Feedr,x9_Norm,x9_PosA,x9_PosN,x9_RRAe,x9_RRAn,x9_RRNn,x10_1Fam,x10_2fmCon,x10_Duplex,x10_Twnhs,x10_TwnhsE,x11_1.5Fin,x11_1.5Unf,x11_1Story,x11_2.5Fin,x11_2.5Unf,x11_2Story,x11_SFoyer,x11_SLvl,x12_Flat,x12_Gable,x12_Gambrel,x12_Hip,x12_Mansard,x12_Shed,x13_ClyTile,x13_CompShg,x13_Membran,x13_Metal,x13_Roll,x13_Tar&Grv,x13_WdShake,x13_WdShngl,x14_AsbShng,x14_AsphShn,x14_BrkComm,x14_BrkFace,x14_CBlock,x14_CemntBd,x14_HdBoard,x14_ImStucc,x14_MetalSd,x14_Plywood,x14_Stone,x14_Stucco,x14_VinylSd,x14_Wd Sdng,x14_WdShing,x15_AsbShng,x15_AsphShn,x15_Brk Cmn,x15_BrkFace,x15_CBlock,x15_CmentBd,x15_HdBoard,x15_ImStucc,x15_MetalSd,x15_Other,x15_Plywood,x15_Stone,x15_Stucco,x15_VinylSd,x15_Wd Sdng,x15_Wd Shng,x16_BrkCmn,x16_BrkFace,x16_None,x16_Stone,x17_Ex,x17_Fa,x17_Gd,x17_TA,x18_Ex,x18_Fa,x18_Gd,x18_Po,x18_TA,x19_BrkTil,x19_CBlock,x19_PConc,x19_Slab,x19_Stone,x19_Wood,x20_Ex,x20_Fa,x20_Gd,x20_TA,x21_Fa,x21_Gd,x21_Po,x21_TA,x22_Av,x22_Gd,x22_Mn,x22_No,x23_ALQ,x23_BLQ,x23_GLQ,x23_LwQ,x23_Rec,x23_Unf,x24_ALQ,x24_BLQ,x24_GLQ,x24_LwQ,x24_Rec,x24_Unf,x25_Floor,x25_GasA,x25_GasW,x25_Grav,x25_OthW,x25_Wall,x26_Ex,x26_Fa,x26_Gd,x26_Po,x26_TA,x27_N,x27_Y,x28_FuseA,x28_FuseF,x28_FuseP,x28_Mix,x28_SBrkr,x29_Ex,x29_Fa,x29_Gd,x29_TA,x30_Maj1,x30_Maj2,x30_Min1,x30_Min2,x30_Mod,x30_Sev,x30_Typ,x31_2Types,x31_Attchd,x31_Basment,x31_BuiltIn,x31_CarPort,x31_Detchd,x32_Fin,x32_RFn,x32_Unf,x33_Ex,x33_Fa,x33_Gd,x33_Po,x33_TA,x34_Ex,x34_Fa,x34_Gd,x34_Po,x34_TA,x35_N,x35_P,x35_Y,x36_COD,x36_CWD,x36_Con,x36_ConLD,x36_ConLI,x36_ConLw,x36_New,x36_Oth,x36_WD,x37_Abnorml,x37_AdjLand,x37_Alloca,x37_Family,x37_Normal,x37_Partial
1353,1354.0,50.0,56.0,14720.0,8.0,5.0,1995.0,1996.0,579.0,816.0,0.0,1217.0,2033.0,2053.0,1185.0,0.0,3238.0,1.0,0.0,2.0,1.0,4.0,1.0,9.0,1.0,1996.0,3.0,666.0,283.0,86.0,0.0,0.0,0.0,0.0,0.0,3.0,2010.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1406,1407.0,85.0,70.0,8445.0,5.0,7.0,1972.0,2007.0,0.0,656.0,0.0,112.0,768.0,768.0,0.0,0.0,768.0,1.0,0.0,1.0,0.0,2.0,1.0,5.0,0.0,1988.0,2.0,396.0,58.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2009.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1408,1409.0,70.0,60.0,7740.0,4.0,7.0,1910.0,1950.0,0.0,0.0,0.0,622.0,622.0,741.0,622.0,0.0,1363.0,0.0,0.0,1.0,0.0,3.0,1.0,6.0,0.0,1966.0,2.0,528.0,0.0,0.0,0.0,0.0,168.0,0.0,0.0,6.0,2010.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1429,1430.0,20.0,70.049958,12546.0,6.0,7.0,1981.0,1981.0,310.0,678.0,0.0,762.0,1440.0,1440.0,0.0,0.0,1440.0,0.0,0.0,2.0,0.0,3.0,1.0,7.0,1.0,1981.0,2.0,467.0,0.0,0.0,99.0,0.0,0.0,0.0,0.0,4.0,2007.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1034,1035.0,30.0,50.0,6305.0,5.0,7.0,1938.0,1950.0,0.0,0.0,0.0,920.0,920.0,954.0,0.0,0.0,954.0,0.0,0.0,1.0,0.0,2.0,1.0,5.0,1.0,1938.0,1.0,240.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,2007.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [None]:
# OneHotEncoding on Test (only transform)
# OneHotEncoding is to be done on Categorical variables.
Xcat = pd.DataFrame(ohe.transform(Xtest[categoricalFeatures]),columns=ohe.get_feature_names(),index=Xtest.index)
Xtest = pd.concat([Xtest,Xcat],axis=1)
Xtest.drop(labels=categoricalFeatures,axis=1,inplace=True)
Xtest.sample(5)

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,x0_C (all),x0_FV,x0_RH,x0_RL,x0_RM,x1_Grvl,x1_Pave,x2_IR1,x2_IR2,x2_IR3,x2_Reg,x3_Bnk,x3_HLS,x3_Low,x3_Lvl,x4_AllPub,x4_NoSeWa,x5_Corner,x5_CulDSac,x5_FR2,x5_FR3,x5_Inside,x6_Gtl,x6_Mod,x6_Sev,x7_Blmngtn,x7_Blueste,x7_BrDale,x7_BrkSide,x7_ClearCr,x7_CollgCr,x7_Crawfor,x7_Edwards,x7_Gilbert,x7_IDOTRR,x7_MeadowV,x7_Mitchel,x7_NAmes,x7_NPkVill,x7_NWAmes,x7_NoRidge,x7_NridgHt,x7_OldTown,x7_SWISU,x7_Sawyer,x7_SawyerW,x7_Somerst,x7_StoneBr,x7_Timber,x7_Veenker,x8_Artery,x8_Feedr,x8_Norm,x8_PosA,x8_PosN,x8_RRAe,x8_RRAn,x8_RRNe,x8_RRNn,x9_Artery,x9_Feedr,x9_Norm,x9_PosA,x9_PosN,x9_RRAe,x9_RRAn,x9_RRNn,x10_1Fam,x10_2fmCon,x10_Duplex,x10_Twnhs,x10_TwnhsE,x11_1.5Fin,x11_1.5Unf,x11_1Story,x11_2.5Fin,x11_2.5Unf,x11_2Story,x11_SFoyer,x11_SLvl,x12_Flat,x12_Gable,x12_Gambrel,x12_Hip,x12_Mansard,x12_Shed,x13_ClyTile,x13_CompShg,x13_Membran,x13_Metal,x13_Roll,x13_Tar&Grv,x13_WdShake,x13_WdShngl,x14_AsbShng,x14_AsphShn,x14_BrkComm,x14_BrkFace,x14_CBlock,x14_CemntBd,x14_HdBoard,x14_ImStucc,x14_MetalSd,x14_Plywood,x14_Stone,x14_Stucco,x14_VinylSd,x14_Wd Sdng,x14_WdShing,x15_AsbShng,x15_AsphShn,x15_Brk Cmn,x15_BrkFace,x15_CBlock,x15_CmentBd,x15_HdBoard,x15_ImStucc,x15_MetalSd,x15_Other,x15_Plywood,x15_Stone,x15_Stucco,x15_VinylSd,x15_Wd Sdng,x15_Wd Shng,x16_BrkCmn,x16_BrkFace,x16_None,x16_Stone,x17_Ex,x17_Fa,x17_Gd,x17_TA,x18_Ex,x18_Fa,x18_Gd,x18_Po,x18_TA,x19_BrkTil,x19_CBlock,x19_PConc,x19_Slab,x19_Stone,x19_Wood,x20_Ex,x20_Fa,x20_Gd,x20_TA,x21_Fa,x21_Gd,x21_Po,x21_TA,x22_Av,x22_Gd,x22_Mn,x22_No,x23_ALQ,x23_BLQ,x23_GLQ,x23_LwQ,x23_Rec,x23_Unf,x24_ALQ,x24_BLQ,x24_GLQ,x24_LwQ,x24_Rec,x24_Unf,x25_Floor,x25_GasA,x25_GasW,x25_Grav,x25_OthW,x25_Wall,x26_Ex,x26_Fa,x26_Gd,x26_Po,x26_TA,x27_N,x27_Y,x28_FuseA,x28_FuseF,x28_FuseP,x28_Mix,x28_SBrkr,x29_Ex,x29_Fa,x29_Gd,x29_TA,x30_Maj1,x30_Maj2,x30_Min1,x30_Min2,x30_Mod,x30_Sev,x30_Typ,x31_2Types,x31_Attchd,x31_Basment,x31_BuiltIn,x31_CarPort,x31_Detchd,x32_Fin,x32_RFn,x32_Unf,x33_Ex,x33_Fa,x33_Gd,x33_Po,x33_TA,x34_Ex,x34_Fa,x34_Gd,x34_Po,x34_TA,x35_N,x35_P,x35_Y,x36_COD,x36_CWD,x36_Con,x36_ConLD,x36_ConLI,x36_ConLw,x36_New,x36_Oth,x36_WD,x37_Abnorml,x37_AdjLand,x37_Alloca,x37_Family,x37_Normal,x37_Partial
1367,2828.0,60.0,88.0,12128.0,6.0,4.0,1989.0,1989.0,232.0,549.0,0.0,319.0,868.0,1313.0,1246.0,0.0,2559.0,0.0,0.0,2.0,1.0,4.0,1.0,9.0,1.0,1989.0,2.0,506.0,0.0,245.0,0.0,0.0,168.0,0.0,0.0,11.0,2006.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
993,2454.0,80.0,76.0,11800.0,4.0,7.0,1949.0,2002.0,0.0,0.0,0.0,0.0,0.0,1382.0,0.0,0.0,1382.0,0.0,0.0,2.0,0.0,1.0,1.0,6.0,1.0,1957.0,1.0,384.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1113,2574.0,20.0,70.0,18044.0,8.0,5.0,1986.0,1986.0,0.0,0.0,0.0,279.0,279.0,2726.0,0.0,0.0,2726.0,0.0,0.0,2.0,1.0,2.0,1.0,6.0,1.0,1986.0,2.0,691.0,216.0,64.0,169.0,0.0,0.0,228.0,0.0,8.0,2007.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
656,2117.0,50.0,50.0,6000.0,6.0,7.0,1937.0,2000.0,0.0,201.0,162.0,462.0,825.0,825.0,672.0,0.0,1497.0,0.0,0.0,2.0,0.0,3.0,1.0,5.0,0.0,2004.0,1.0,672.0,272.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2008.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
885,2346.0,20.0,72.0,8640.0,8.0,5.0,2007.0,2007.0,0.0,24.0,0.0,1314.0,1338.0,1338.0,0.0,0.0,1338.0,0.0,0.0,2.0,0.0,3.0,1.0,6.0,0.0,2007.0,2.0,598.0,0.0,141.0,0.0,0.0,0.0,0.0,0.0,11.0,2007.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
#The below function returns a list of categorical features which are not numeric. 
cat_cols = Xtrain.select_dtypes(exclude=['float','int']).columns #selecting the categorical columns
print(cat_cols.shape)
print(cat_cols)


(0,)
Index([], dtype='object')


In [None]:
Xtest.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Columns: 271 entries, Id to x37_Partial
dtypes: float64(271)
memory usage: 3.0 MB


In [None]:
Xtrain.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,x0_C (all),x0_FV,x0_RH,x0_RL,x0_RM,x1_Grvl,x1_Pave,x2_IR1,x2_IR2,x2_IR3,x2_Reg,x3_Bnk,x3_HLS,x3_Low,x3_Lvl,x4_AllPub,x4_NoSeWa,x5_Corner,x5_CulDSac,x5_FR2,x5_FR3,x5_Inside,x6_Gtl,x6_Mod,x6_Sev,x7_Blmngtn,x7_Blueste,x7_BrDale,x7_BrkSide,x7_ClearCr,x7_CollgCr,x7_Crawfor,x7_Edwards,x7_Gilbert,x7_IDOTRR,x7_MeadowV,x7_Mitchel,x7_NAmes,x7_NPkVill,x7_NWAmes,x7_NoRidge,x7_NridgHt,x7_OldTown,x7_SWISU,x7_Sawyer,x7_SawyerW,x7_Somerst,x7_StoneBr,x7_Timber,x7_Veenker,x8_Artery,x8_Feedr,x8_Norm,x8_PosA,x8_PosN,x8_RRAe,x8_RRAn,x8_RRNe,x8_RRNn,x9_Artery,x9_Feedr,x9_Norm,x9_PosA,x9_PosN,x9_RRAe,x9_RRAn,x9_RRNn,x10_1Fam,x10_2fmCon,x10_Duplex,x10_Twnhs,x10_TwnhsE,x11_1.5Fin,x11_1.5Unf,x11_1Story,x11_2.5Fin,x11_2.5Unf,x11_2Story,x11_SFoyer,x11_SLvl,x12_Flat,x12_Gable,x12_Gambrel,x12_Hip,x12_Mansard,x12_Shed,x13_ClyTile,x13_CompShg,x13_Membran,x13_Metal,x13_Roll,x13_Tar&Grv,x13_WdShake,x13_WdShngl,x14_AsbShng,x14_AsphShn,x14_BrkComm,x14_BrkFace,x14_CBlock,x14_CemntBd,x14_HdBoard,x14_ImStucc,x14_MetalSd,x14_Plywood,x14_Stone,x14_Stucco,x14_VinylSd,x14_Wd Sdng,x14_WdShing,x15_AsbShng,x15_AsphShn,x15_Brk Cmn,x15_BrkFace,x15_CBlock,x15_CmentBd,x15_HdBoard,x15_ImStucc,x15_MetalSd,x15_Other,x15_Plywood,x15_Stone,x15_Stucco,x15_VinylSd,x15_Wd Sdng,x15_Wd Shng,x16_BrkCmn,x16_BrkFace,x16_None,x16_Stone,x17_Ex,x17_Fa,x17_Gd,x17_TA,x18_Ex,x18_Fa,x18_Gd,x18_Po,x18_TA,x19_BrkTil,x19_CBlock,x19_PConc,x19_Slab,x19_Stone,x19_Wood,x20_Ex,x20_Fa,x20_Gd,x20_TA,x21_Fa,x21_Gd,x21_Po,x21_TA,x22_Av,x22_Gd,x22_Mn,x22_No,x23_ALQ,x23_BLQ,x23_GLQ,x23_LwQ,x23_Rec,x23_Unf,x24_ALQ,x24_BLQ,x24_GLQ,x24_LwQ,x24_Rec,x24_Unf,x25_Floor,x25_GasA,x25_GasW,x25_Grav,x25_OthW,x25_Wall,x26_Ex,x26_Fa,x26_Gd,x26_Po,x26_TA,x27_N,x27_Y,x28_FuseA,x28_FuseF,x28_FuseP,x28_Mix,x28_SBrkr,x29_Ex,x29_Fa,x29_Gd,x29_TA,x30_Maj1,x30_Maj2,x30_Min1,x30_Min2,x30_Mod,x30_Sev,x30_Typ,x31_2Types,x31_Attchd,x31_Basment,x31_BuiltIn,x31_CarPort,x31_Detchd,x32_Fin,x32_RFn,x32_Unf,x33_Ex,x33_Fa,x33_Gd,x33_Po,x33_TA,x34_Ex,x34_Fa,x34_Gd,x34_Po,x34_TA,x35_N,x35_P,x35_Y,x36_COD,x36_CWD,x36_Con,x36_ConLD,x36_ConLI,x36_ConLw,x36_New,x36_Oth,x36_WD,x37_Abnorml,x37_AdjLand,x37_Alloca,x37_Family,x37_Normal,x37_Partial
0,1.0,60.0,65.0,8450.0,7.0,5.0,2003.0,2003.0,196.0,706.0,0.0,150.0,856.0,856.0,854.0,0.0,1710.0,1.0,0.0,2.0,1.0,3.0,1.0,8.0,0.0,2003.0,2.0,548.0,0.0,61.0,0.0,0.0,0.0,0.0,0.0,2.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2.0,20.0,80.0,9600.0,6.0,8.0,1976.0,1976.0,0.0,978.0,0.0,284.0,1262.0,1262.0,0.0,0.0,1262.0,0.0,1.0,2.0,0.0,3.0,1.0,6.0,1.0,1976.0,2.0,460.0,298.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,3.0,60.0,68.0,11250.0,7.0,5.0,2001.0,2002.0,162.0,486.0,0.0,434.0,920.0,920.0,866.0,0.0,1786.0,1.0,0.0,2.0,1.0,3.0,1.0,6.0,1.0,2001.0,2.0,608.0,0.0,42.0,0.0,0.0,0.0,0.0,0.0,9.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,4.0,70.0,60.0,9550.0,7.0,5.0,1915.0,1970.0,0.0,216.0,0.0,540.0,756.0,961.0,756.0,0.0,1717.0,1.0,0.0,1.0,0.0,3.0,1.0,7.0,1.0,1998.0,3.0,642.0,0.0,35.0,272.0,0.0,0.0,0.0,0.0,2.0,2006.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,5.0,60.0,84.0,14260.0,8.0,5.0,2000.0,2000.0,350.0,655.0,0.0,490.0,1145.0,1145.0,1053.0,0.0,2198.0,1.0,0.0,2.0,1.0,4.0,1.0,9.0,1.0,2000.0,3.0,836.0,192.0,84.0,0.0,0.0,0.0,0.0,0.0,12.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [None]:
print(Xtrain.shape)
print(Xtest.shape)

(1460, 271)
(1459, 271)


In [None]:
TrainData = pd.concat([Xtrain,Target_Train_Cols], axis=1)
TestData = Xtest

In [None]:
export_csv = TrainData.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/Preprocess_Train.csv')
exporttest_csv = TestData.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/Preprocess_Test.csv')

In [None]:
X_test=Xtest
X_train= Xtrain
y_train=Target_Train_Cols
Ytrain=y_train

In [None]:
Xtrain.head()


Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,x0_C (all),x0_FV,x0_RH,x0_RL,x0_RM,x1_Grvl,x1_Pave,x2_IR1,x2_IR2,x2_IR3,x2_Reg,x3_Bnk,x3_HLS,x3_Low,x3_Lvl,x4_AllPub,x4_NoSeWa,x5_Corner,x5_CulDSac,x5_FR2,x5_FR3,x5_Inside,x6_Gtl,x6_Mod,x6_Sev,x7_Blmngtn,x7_Blueste,x7_BrDale,x7_BrkSide,x7_ClearCr,x7_CollgCr,x7_Crawfor,x7_Edwards,x7_Gilbert,x7_IDOTRR,x7_MeadowV,x7_Mitchel,x7_NAmes,x7_NPkVill,x7_NWAmes,x7_NoRidge,x7_NridgHt,x7_OldTown,x7_SWISU,x7_Sawyer,x7_SawyerW,x7_Somerst,x7_StoneBr,x7_Timber,x7_Veenker,x8_Artery,x8_Feedr,x8_Norm,x8_PosA,x8_PosN,x8_RRAe,x8_RRAn,x8_RRNe,x8_RRNn,x9_Artery,x9_Feedr,x9_Norm,x9_PosA,x9_PosN,x9_RRAe,x9_RRAn,x9_RRNn,x10_1Fam,x10_2fmCon,x10_Duplex,x10_Twnhs,x10_TwnhsE,x11_1.5Fin,x11_1.5Unf,x11_1Story,x11_2.5Fin,x11_2.5Unf,x11_2Story,x11_SFoyer,x11_SLvl,x12_Flat,x12_Gable,x12_Gambrel,x12_Hip,x12_Mansard,x12_Shed,x13_ClyTile,x13_CompShg,x13_Membran,x13_Metal,x13_Roll,x13_Tar&Grv,x13_WdShake,x13_WdShngl,x14_AsbShng,x14_AsphShn,x14_BrkComm,x14_BrkFace,x14_CBlock,x14_CemntBd,x14_HdBoard,x14_ImStucc,x14_MetalSd,x14_Plywood,x14_Stone,x14_Stucco,x14_VinylSd,x14_Wd Sdng,x14_WdShing,x15_AsbShng,x15_AsphShn,x15_Brk Cmn,x15_BrkFace,x15_CBlock,x15_CmentBd,x15_HdBoard,x15_ImStucc,x15_MetalSd,x15_Other,x15_Plywood,x15_Stone,x15_Stucco,x15_VinylSd,x15_Wd Sdng,x15_Wd Shng,x16_BrkCmn,x16_BrkFace,x16_None,x16_Stone,x17_Ex,x17_Fa,x17_Gd,x17_TA,x18_Ex,x18_Fa,x18_Gd,x18_Po,x18_TA,x19_BrkTil,x19_CBlock,x19_PConc,x19_Slab,x19_Stone,x19_Wood,x20_Ex,x20_Fa,x20_Gd,x20_TA,x21_Fa,x21_Gd,x21_Po,x21_TA,x22_Av,x22_Gd,x22_Mn,x22_No,x23_ALQ,x23_BLQ,x23_GLQ,x23_LwQ,x23_Rec,x23_Unf,x24_ALQ,x24_BLQ,x24_GLQ,x24_LwQ,x24_Rec,x24_Unf,x25_Floor,x25_GasA,x25_GasW,x25_Grav,x25_OthW,x25_Wall,x26_Ex,x26_Fa,x26_Gd,x26_Po,x26_TA,x27_N,x27_Y,x28_FuseA,x28_FuseF,x28_FuseP,x28_Mix,x28_SBrkr,x29_Ex,x29_Fa,x29_Gd,x29_TA,x30_Maj1,x30_Maj2,x30_Min1,x30_Min2,x30_Mod,x30_Sev,x30_Typ,x31_2Types,x31_Attchd,x31_Basment,x31_BuiltIn,x31_CarPort,x31_Detchd,x32_Fin,x32_RFn,x32_Unf,x33_Ex,x33_Fa,x33_Gd,x33_Po,x33_TA,x34_Ex,x34_Fa,x34_Gd,x34_Po,x34_TA,x35_N,x35_P,x35_Y,x36_COD,x36_CWD,x36_Con,x36_ConLD,x36_ConLI,x36_ConLw,x36_New,x36_Oth,x36_WD,x37_Abnorml,x37_AdjLand,x37_Alloca,x37_Family,x37_Normal,x37_Partial
0,1.0,60.0,65.0,8450.0,7.0,5.0,2003.0,2003.0,196.0,706.0,0.0,150.0,856.0,856.0,854.0,0.0,1710.0,1.0,0.0,2.0,1.0,3.0,1.0,8.0,0.0,2003.0,2.0,548.0,0.0,61.0,0.0,0.0,0.0,0.0,0.0,2.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2.0,20.0,80.0,9600.0,6.0,8.0,1976.0,1976.0,0.0,978.0,0.0,284.0,1262.0,1262.0,0.0,0.0,1262.0,0.0,1.0,2.0,0.0,3.0,1.0,6.0,1.0,1976.0,2.0,460.0,298.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,3.0,60.0,68.0,11250.0,7.0,5.0,2001.0,2002.0,162.0,486.0,0.0,434.0,920.0,920.0,866.0,0.0,1786.0,1.0,0.0,2.0,1.0,3.0,1.0,6.0,1.0,2001.0,2.0,608.0,0.0,42.0,0.0,0.0,0.0,0.0,0.0,9.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,4.0,70.0,60.0,9550.0,7.0,5.0,1915.0,1970.0,0.0,216.0,0.0,540.0,756.0,961.0,756.0,0.0,1717.0,1.0,0.0,1.0,0.0,3.0,1.0,7.0,1.0,1998.0,3.0,642.0,0.0,35.0,272.0,0.0,0.0,0.0,0.0,2.0,2006.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,5.0,60.0,84.0,14260.0,8.0,5.0,2000.0,2000.0,350.0,655.0,0.0,490.0,1145.0,1145.0,1053.0,0.0,2198.0,1.0,0.0,2.0,1.0,4.0,1.0,9.0,1.0,2000.0,3.0,836.0,192.0,84.0,0.0,0.0,0.0,0.0,0.0,12.0,2008.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [None]:
#Decision Tree Regressor ========================================================================
#CONSTRUCT DEFAULT DECISION TREE AND OBTAIN RESPECTIVE ACCURACY 
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
rgr_predict_Train=dt.predict(X_train)
rgr_predict_Test=dt.predict(X_test)

In [None]:

#rgr.feature_importances_
mean_squared_error(y_train,rgr_predict_Train)
print("RMSE (training) for Decision Tree:{0:10f}".format(mean_squared_error(y_train,rgr_predict_Train)))


scores=cross_val_score(dt,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

RMSE (training) for Decision Tree:  0.000000
Mean_score= 0.7018089607000488


In [None]:
#Save predictions
df_DT=pd.DataFrame()
df_DT['TARGET']=rgr_predict_Test
export_csv = df_DT.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/DT_dfl_Test.csv')

In [None]:
#Hyperparameter tuning done for decision tree classifier

#RANDOM SEARCH--------------------------------------------

import time
start_time = time.time()

print("RandomizedSearchCV-Decision tree")
parameters={'max_depth': range(11,25,2),'min_samples_leaf' : range(10,50,10),'criterion': ['mse', 'friedman_mse', 'mae']}
dt_random = RandomizedSearchCV(dt,parameters,n_iter=25,cv=5)
dt_random.fit(Xtrain, Ytrain)
rand_parm=dt_random.best_params_
print(rand_parm)


print("--- %s seconds ---" % (time.time() - start_time))

RandomizedSearchCV-Decision tree
{'min_samples_leaf': 20, 'max_depth': 15, 'criterion': 'mse'}
--- 16.299522876739502 seconds ---


In [None]:
#GRID SEARCH----------------------------------------

import time
start_time = time.time()

print("GridSearchCV-Decision tree")
dt_grid = GridSearchCV(dt,parameters)
dt_grid.fit(Xtrain, Ytrain)
grid_parm1=dt_grid.best_params_
print(grid_parm1)


print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
#Using the parameters obtained from HyperParameterTuning in the DecisionTreeRegressor 
dtRand = DecisionTreeRegressor(**rand_parm)
dtGrid = DecisionTreeRegressor(**grid_parm1)

dtRand.fit(Xtrain,Ytrain)
dtRand_predict = dtRand.predict(Xtest)
dtGrid.fit(Xtrain,Ytrain)
dtGrid_predict = dtGrid.predict(Xtest)



In [None]:
#rgr.feature_importances_

scores=cross_val_score(dtGrid,X_train, y_train,cv=5)
scores
print("scores=", scores)

scores= [0.79805678 0.79159572 0.77157205 0.77676235 0.72730272]


In [None]:
#Save predictions
HT_DT=pd.DataFrame()
HT_DT['TARGET']=dtGrid_predict
export_csv = HT_DT.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/DT_hptl_Test.csv')

In [None]:
#Random Forest ========================================================================
#CONSTRUCT DEFAULT RANDOM FOREST AND OBTAIN RESPECTIVE ACCURACY 
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
rfr_predict_Train=rfr.predict(X_train)
rfr_predict_Test=rfr.predict(X_test)



In [None]:
#Save predictions
df_RF=pd.DataFrame()
df_RF['TARGET']=rfr_predict_Test
export_csv = df_RF.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/RF_dfl_Test.csv')

In [None]:
## corss validation score
scores=cross_val_score(rfr,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

Mean_score= 0.8531057887851246


In [None]:
#Randomized SEARCH----------------------------------------

import time
start_time = time.time()

print("RandomizedSearchCV-RF")
parameters={'max_depth': range(10,40,4),'criterion': ['mse', 'mae']}
rfr_random = RandomizedSearchCV(rfr,parameters,n_iter=5,cv=5)
rfr_random.fit(Xtrain, Ytrain)
rand_parm=rfr_random.best_params_
print(rand_parm)



print("--- %s seconds ---" % (time.time() - start_time))

RandomizedSearchCV-RF
{'max_depth': 14, 'criterion': 'mae'}
--- 507.01677417755127 seconds ---


In [None]:
#Using the parameters obtained from HyperParameterTuning in the RandomForestRegressor 
rfrRand = RandomForestRegressor(**rand_parm)

rfrRand.fit(Xtrain,Ytrain)
rfrRand_predict = rfrRand.predict(Xtest)

# cross validation 
scores=cross_val_score(rfrRand,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

Mean_score= 0.8617599539717877


In [None]:
#Save predictions
df_rfr=pd.DataFrame()
df_rfr['TARGET']=rfrRand_predict
export_csv = df_rfr.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/RF_HPT_Test.csv')

In [None]:
#Construct Multilayer Perceptron 
#Default mode
mlp = MLPRegressor()
mlp.fit(X_train, y_train)
mlp_predict=mlp.predict(X_test)

# cross validation 
scores=cross_val_score(mlp,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

#Save predictions
df_MLP=pd.DataFrame()
df_MLP['TARGET']=mlp_predict
export_csv = df_MLP.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/MLP_dfl_Test.csv')

Mean_score= 0.628989697830497


In [None]:
#Randomized SEARCH----------------------------------------

import time
start_time = time.time()

print("RandomizedSearchCV-MLP")
parameters={'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'], 'learning_rate': ['constant', 'invscaling', 'adaptive']}
mlp_random = RandomizedSearchCV(mlp,parameters,n_iter=10,cv=5)
mlp_random.fit(X_train, y_train)
rand_parm=mlp_random.best_params_
print(rand_parm)



print("--- %s seconds ---" % (time.time() - start_time))

RandomizedSearchCV-MLP
{'solver': 'lbfgs', 'learning_rate': 'invscaling', 'activation': 'relu'}
--- 130.8493847846985 seconds ---


In [None]:
#Using the parameters obtained from HyperParameterTuning in the Multilayer Perceptron 
mlpRand = MLPRegressor(**rand_parm)

mlpRand.fit(Xtrain,Ytrain)
mlpRand_predict = mlpRand.predict(Xtest)

# cross validation 
scores=cross_val_score(mlpRand,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())


#Save predictions
hpt_MLP=pd.DataFrame()
hpt_MLP['TARGET']=mlpRand_predict
export_csv = hpt_MLP.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/MLP_hpt_Test.csv')

Mean_score= 0.6936826128709581


In [None]:
#Construct Support Vecor machine 
#Default mode
svr = LinearSVR()
svr.fit(X_train, y_train)
svr_predict=svr.predict(X_test)
# cross validation 
scores=cross_val_score(svr,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

#Save predictions
df_svr=pd.DataFrame()
df_svr['TARGET']=svr_predict
export_csv = df_svr.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/svr_dfl_Test.csv')

Mean_score= 0.02289839613839382


In [None]:
#Randomized SEARCH----------------------------------------

import time
start_time = time.time()

print("RandomizedSearchCV-SVM")
parameters={'loss' : ['epsilon_insensitive', 'squared_epsilon_insensitive'],'max_iter':range(500,2000,500)}
svr_random = RandomizedSearchCV(svr,parameters,n_iter=10,cv=5)
svr_random.fit(X_train, y_train)
rand_parm=svr_random.best_params_
print(rand_parm)



print("--- %s seconds ---" % (time.time() - start_time))

RandomizedSearchCV-SVM
{'max_iter': 1000, 'loss': 'squared_epsilon_insensitive'}
--- 8.361206531524658 seconds ---


In [None]:
#Using the parameters obtained from HyperParameterTuning in the Support vector Machine 
svrRand = LinearSVR(**rand_parm)

svrRand.fit(Xtrain,Ytrain)
svrRand_predict = svrRand.predict(Xtest)

# cross validation 
scores=cross_val_score(svrRand,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())


#Save predictions
hpt_svr=pd.DataFrame()
hpt_svr['TARGET']=svrRand_predict
export_csv = hpt_svr.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/svr_hpt_Test.csv')

Mean_score= 0.236649716909688


In [None]:
#STACKING MODELS =====================================================================
print("___________________________________________________________________________________________\nEnsemble Methods Predictions using MLPRegressor,LinearSVR, RandomForest and Decision Tree Classifier\n")

models = [ LinearSVR(), RandomForestRegressor(), DecisionTreeRegressor(), MLPRegressor() ]
      
S_Train, S_Test = stacking(models,                   
                           X_train, y_train, X_test,   
                           regression=True, 
     
                           mode='oof_pred_bag', 
       
                           needs_proba=False,
         
                           save_dir=None, 
                                        
                           n_folds=4, 
                                                    
                           verbose=2)


___________________________________________________________________________________________
Ensemble Methods Predictions using GradientBoosting, RandomForest and Decision Tree Classifier

task:         [regression]
metric:       [mean_absolute_error]
mode:         [oof_pred_bag]
n_models:     [4]

model  0:     [LinearSVR]
    fold  0:  [25562.17563838]
    fold  1:  [38043.77704626]
    fold  2:  [24121.19367341]
    fold  3:  [29377.06488245]
    ----
    MEAN:     [29276.05281012] + [5414.07074404]
    FULL:     [29276.05281012]

model  1:     [RandomForestRegressor]
    fold  0:  [17153.85421918]
    fold  1:  [18345.15572603]
    fold  2:  [16309.42739726]
    fold  3:  [18479.07282192]
    ----
    MEAN:     [17571.87754110] + [892.95664679]
    FULL:     [17571.87754110]

model  2:     [DecisionTreeRegressor]
    fold  0:  [27318.04931507]
    fold  1:  [27022.92054795]
    fold  2:  [23946.71232877]
    fold  3:  [28267.92602740]
    ----
    MEAN:     [26638.90205479] + [1620.

In [None]:
#STACKING - CONTRUCT A GRADIENT BOOSTING MODEL==============================
model = GradientBoostingRegressor()
    
model = model.fit(S_Train, y_train)
y_pred_train = model.predict(S_Train)
y_pred_test = model.predict(S_Test)


In [None]:
#Check the prediction
pred_st=pd.DataFrame(model.predict(S_Test),columns=["Prediction"] )

pred_st.head()

Unnamed: 0,Prediction
0,125155.872272
1,157412.701698
2,175889.339619
3,175889.339619
4,195379.498337


In [None]:
## Cross Validation score
scores=cross_val_score(model,X_train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())

#Save predictions
df_stm=pd.DataFrame()
df_stm['TARGET']=y_pred_test
export_csv = df_stm.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/stm_dfl_Test.csv')

Mean_score= 0.8851829312914214


In [None]:
#Randomized SEARCH----------------------------------------

import time
start_time = time.time()

print("RandomizedSearchCV-Stacked Model")
parameters={ 'n_estimators':[5,10,20, 30, 50],'learning_rate':[0.01,.1] }
stm_random = RandomizedSearchCV(model,parameters,n_iter=15,cv=5)
stm_random.fit(S_Train, y_train)
rand_parm=stm_random.best_params_
print(rand_parm)



print("--- %s seconds ---" % (time.time() - start_time))

RandomizedSearchCV-Stacked Model
{'n_estimators': 30, 'learning_rate': 0.1}
--- 1.7247254848480225 seconds ---


In [None]:
#Using the parameters obtained from HyperParameterTuning in the Stacked Model 
stmRand = GradientBoostingRegressor(**rand_parm)

stmRand.fit(S_Train, y_train)
stmRand_predict = stmRand.predict(S_Test)

# cross validation 
scores=cross_val_score(stmRand,S_Train, y_train,cv=5)
scores
print("Mean_score=", scores.mean())


#Save predictions
hpt_stm=pd.DataFrame()
hpt_stm['TARGET']=stmRand_predict
export_csv = hpt_stm.to_csv(r'/gdrive/My Drive/ASU_MSBA/CIS_508/Team Assignment 1/stm_hpt_Test.csv')

Mean_score= 0.8464313023558754
