## House Price Predictions
https://www.kaggle.com/competitions/home-data-for-ml-course/overview


In [158]:
import pandas as pd
#settings :
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

import numpy as np
import matplotlib.pyplot as plt # library of vizualization
import seaborn as sns # library of vizualization
import missingno as msno
from scipy.stats import zscore

data1 = pd.read_csv("train.csv")
data2 = pd.read_csv("test.csv")
data2['SalePrice'] = 0
data = pd.concat([data1,data2], axis=0)

In [159]:
data1.dtypes

Id                 int64
MSSubClass         int64
MSZoning          object
LotFrontage      float64
LotArea            int64
Street            object
Alley             object
LotShape          object
LandContour       object
Utilities         object
LotConfig         object
LandSlope         object
Neighborhood      object
Condition1        object
Condition2        object
BldgType          object
HouseStyle        object
OverallQual        int64
OverallCond        int64
YearBuilt          int64
YearRemodAdd       int64
RoofStyle         object
RoofMatl          object
Exterior1st       object
Exterior2nd       object
MasVnrType        object
MasVnrArea       float64
ExterQual         object
ExterCond         object
Foundation        object
BsmtQual          object
BsmtCond          object
BsmtExposure      object
BsmtFinType1      object
BsmtFinSF1         int64
BsmtFinType2      object
BsmtFinSF2         int64
BsmtUnfSF          int64
TotalBsmtSF        int64
Heating           object


In [160]:
data2.dtypes

Id                 int64
MSSubClass         int64
MSZoning          object
LotFrontage      float64
LotArea            int64
Street            object
Alley             object
LotShape          object
LandContour       object
Utilities         object
LotConfig         object
LandSlope         object
Neighborhood      object
Condition1        object
Condition2        object
BldgType          object
HouseStyle        object
OverallQual        int64
OverallCond        int64
YearBuilt          int64
YearRemodAdd       int64
RoofStyle         object
RoofMatl          object
Exterior1st       object
Exterior2nd       object
MasVnrType        object
MasVnrArea       float64
ExterQual         object
ExterCond         object
Foundation        object
BsmtQual          object
BsmtCond          object
BsmtExposure      object
BsmtFinType1      object
BsmtFinSF1       float64
BsmtFinType2      object
BsmtFinSF2       float64
BsmtUnfSF        float64
TotalBsmtSF      float64
Heating           object


In [161]:
# We only care about the object type here, so let's just work on those...
column_types = data.dtypes
object_columns = column_types[column_types == 'object'].index
object_data = data[object_columns]

object_data.fillna("N/A", inplace=True)
print(object_data)

     MSZoning Street Alley LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive PoolQC  Fence MiscFeature SaleType SaleCondition
0          RL   Pave   N/A      Reg         Lvl    AllPub    Inside       Gtl      CollgCr       Norm       Norm     1Fam     2Story     Gable  CompShg     VinylSd     VinylSd    BrkFace        Gd        TA      PConc       Gd       TA           No          GLQ          Unf    GasA        Ex          Y      SBrkr          Gd        Typ         N/A     Attchd          RFn         TA         TA          Y    N/A    N/A         N/A       WD        Normal
1          RL   Pave   N/A      Reg         Lvl    AllPub       FR2       Gtl      Veenk

# Work through the columns

We will use the column descriptions provided in the data_description.txt file that came with the dataset to help make sense of the columns. Let's look at the first one that we identify as nominal to see how we would deal with it...

### MSZoning: Identifies the general zoning classification of the sale.	
Has identifiers that can not be placed in a ordinal format, thus the order we assign does not matter here. I will make a simple function that I can use on all nominal columns.

In [162]:
object_data['MSZoning'].unique()


array(['RL', 'RM', 'C (all)', 'FV', 'RH', 'N/A'], dtype=object)

In [163]:
## Function to count the unique values in a column of given name convert them to identifier numbers. 
from sklearn.preprocessing import OrdinalEncoder

def nominalize(column, data=object_data):
    # Disable the SettingWithCopyWarning, since I TOTALLY know what I am doing and will never make a mistake because that is for n00bs
    pd.options.mode.chained_assignment = None
    # Get unique values
    unique_values = data[column].unique()
    # Define encoder variable
    oenc = OrdinalEncoder(categories=[unique_values], dtype=int)
    # Replace column values with identifier numbers
    data[column] = oenc.fit_transform(data[[column]]) + 1   
    # Rename the column so that it's easier to understand
    new_name = column + "_Nominal_ID"
    data.rename(columns={column: new_name}, inplace=True)

In [164]:
nominalize('MSZoning')
# check that it worked on this column:
object_data['MSZoning_Nominal_ID'].unique()


array([1, 2, 3, 4, 5, 6])

In [165]:
object_data

Unnamed: 0,MSZoning_Nominal_ID,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,1,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1,1,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
2,1,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
3,1,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,Y,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
4,1,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2,Pave,,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,2Story,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Unf,Unf,GasA,Gd,Y,SBrkr,TA,Typ,,,,,,Y,,,,WD,Normal
1455,2,Pave,,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,Gable,CompShg,CemntBd,CmentBd,,TA,TA,CBlock,TA,TA,No,Rec,Unf,GasA,TA,Y,SBrkr,TA,Typ,,CarPort,Unf,TA,TA,Y,,,,WD,Abnorml
1456,1,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,Gable,CompShg,VinylSd,VinylSd,,TA,TA,CBlock,TA,TA,No,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
1457,1,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,Gable,CompShg,HdBoard,Wd Shng,,TA,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,TA,Y,SBrkr,TA,Typ,,,,,,Y,,MnPrv,Shed,WD,Normal


## Other Nominal Values
So let's just go through the list of columns in the text file to figure out which values are nominal and replace all of them with identifiers.

### List of Nominals and quick descriptions:
Street: Type of road access to property

Alley: Type of alley access to property

LandContour: Flatness of the property   ## This kinda could be ordinal as well?

LotConfig: Lot configuration

Neighborhood: Physical locations within Ames city limits

Condition1: Proximity to various conditions

Condition2: Proximity to various conditions (if more than one is present)

BldgType: Type of dwelling

HouseStyle: Style of dwelling

RoofStyle: Type of roof

RoofMatl: Roof material

Exterior1st: Exterior covering on house

Exterior2nd: Exterior covering on house (if more than one material)

MasVnrType: Masonry veneer type

Foundation: Type of foundation

Heating: Type of heating

Electrical: Electrical system

GarageType: Garage location

MiscFeature: Miscellaneous feature not covered in other categories

SaleType: Type of sale

SaleCondition: Condition of sale



CentralAir: Central air conditioning  ## Technically like a nominal value, but this is more like a binary, so I will handle it separately


In [166]:
## So let's just iterate through all of them using the function
nominal_categories = [
    'Street', 'Alley', 'LandContour', 'LotConfig', 'Neighborhood',
    'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle',
    'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'Foundation',
    'Heating', 'Electrical', 'GarageType', 'MiscFeature', 'SaleType',
    'SaleCondition'
]

for i in nominal_categories:
    nominalize(i)

object_data

Unnamed: 0,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape,LandContour_Nominal_ID,Utilities,LotConfig_Nominal_ID,LandSlope,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual,ExterCond,Foundation_Nominal_ID,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating_Nominal_ID,HeatingQC,CentralAir,Electrical_Nominal_ID,KitchenQual,Functional,FireplaceQu,GarageType_Nominal_ID,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,1,1,Reg,1,AllPub,1,Gtl,1,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,No,GLQ,Unf,1,Ex,Y,1,Gd,Typ,,1,RFn,TA,TA,Y,,,1,1,1
1,1,1,1,Reg,1,AllPub,2,Gtl,2,2,1,1,2,1,1,2,2,2,TA,TA,2,Gd,TA,Gd,ALQ,Unf,1,Ex,Y,1,TA,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
2,1,1,1,IR1,1,AllPub,1,Gtl,1,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,Mn,GLQ,Unf,1,Ex,Y,1,Gd,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
3,1,1,1,IR1,1,AllPub,3,Gtl,3,1,1,1,1,1,1,3,3,2,TA,TA,3,TA,Gd,No,ALQ,Unf,1,Gd,Y,1,Gd,Typ,Gd,2,Unf,TA,TA,Y,,,1,1,2
4,1,1,1,IR1,1,AllPub,2,Gtl,4,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,Av,GLQ,Unf,1,Ex,Y,1,Gd,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2,1,1,Reg,1,AllPub,1,Gtl,15,1,1,5,1,1,1,7,7,2,TA,TA,2,TA,TA,No,Unf,Unf,1,Gd,Y,1,TA,Typ,,5,,,,Y,,,1,1,1
1455,2,1,1,Reg,1,AllPub,1,Gtl,15,1,1,4,1,1,1,7,7,2,TA,TA,2,TA,TA,No,Rec,Unf,1,TA,Y,1,TA,Typ,,4,Unf,TA,TA,Y,,,1,1,2
1456,1,1,1,Reg,1,AllPub,1,Gtl,5,1,1,1,2,1,1,1,1,2,TA,TA,2,TA,TA,No,ALQ,Unf,1,Ex,Y,1,TA,Typ,TA,2,Unf,TA,TA,Y,,,1,1,2
1457,1,1,1,Reg,1,AllPub,1,Gtl,5,1,1,1,5,1,1,4,3,2,TA,TA,1,Gd,TA,Av,GLQ,Unf,1,TA,Y,1,TA,Typ,,5,,,,Y,,MnPrv,2,1,1


## Ordinal Values.

So now we have a lot of categories where the order matters. This will take a lot of manual work to work through, but the text file makes it easier since we can see the meaning of the data that exist.

In [167]:
#Define function to use the column and category list to do the work for us

def ordinalize(column, rating_order, data=object_data):
    #define encoder
    oenc = OrdinalEncoder(categories=[rating_order], dtype=int)
    data[column] = oenc.fit_transform(data[[column]]) + 1
    # Rename the column so that it's easier to understand
    new_name = column + "_Ordinal_ID"
    data.rename(columns={column: new_name}, inplace=True)

# Separate function for categories with N/A, as I feel like 0 is a more descriptive value there.
def ordinalize_with_na(column, rating_order, data=object_data):
    #define encoder
    oenc = OrdinalEncoder(categories=[rating_order], dtype=int)
    data[column] = oenc.fit_transform(data[[column]])
    # Rename the column so that it's easier to understand
    new_name = column + "_Ordinal_ID"
    data.rename(columns={column: new_name}, inplace=True)


In [168]:

# Test our function
LotShape = ['IR3', 'IR2', 'IR1', 'Reg']
ordinalize('LotShape', LotShape)

object_data

Unnamed: 0,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities,LotConfig_Nominal_ID,LandSlope,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual,ExterCond,Foundation_Nominal_ID,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating_Nominal_ID,HeatingQC,CentralAir,Electrical_Nominal_ID,KitchenQual,Functional,FireplaceQu,GarageType_Nominal_ID,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,1,1,4,1,AllPub,1,Gtl,1,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,No,GLQ,Unf,1,Ex,Y,1,Gd,Typ,,1,RFn,TA,TA,Y,,,1,1,1
1,1,1,1,4,1,AllPub,2,Gtl,2,2,1,1,2,1,1,2,2,2,TA,TA,2,Gd,TA,Gd,ALQ,Unf,1,Ex,Y,1,TA,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
2,1,1,1,3,1,AllPub,1,Gtl,1,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,Mn,GLQ,Unf,1,Ex,Y,1,Gd,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
3,1,1,1,3,1,AllPub,3,Gtl,3,1,1,1,1,1,1,3,3,2,TA,TA,3,TA,Gd,No,ALQ,Unf,1,Gd,Y,1,Gd,Typ,Gd,2,Unf,TA,TA,Y,,,1,1,2
4,1,1,1,3,1,AllPub,2,Gtl,4,1,1,1,1,1,1,1,1,1,Gd,TA,1,Gd,TA,Av,GLQ,Unf,1,Ex,Y,1,Gd,Typ,TA,1,RFn,TA,TA,Y,,,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2,1,1,4,1,AllPub,1,Gtl,15,1,1,5,1,1,1,7,7,2,TA,TA,2,TA,TA,No,Unf,Unf,1,Gd,Y,1,TA,Typ,,5,,,,Y,,,1,1,1
1455,2,1,1,4,1,AllPub,1,Gtl,15,1,1,4,1,1,1,7,7,2,TA,TA,2,TA,TA,No,Rec,Unf,1,TA,Y,1,TA,Typ,,4,Unf,TA,TA,Y,,,1,1,2
1456,1,1,1,4,1,AllPub,1,Gtl,5,1,1,1,2,1,1,1,1,2,TA,TA,2,TA,TA,No,ALQ,Unf,1,Ex,Y,1,TA,Typ,TA,2,Unf,TA,TA,Y,,,1,1,2
1457,1,1,1,4,1,AllPub,1,Gtl,5,1,1,1,5,1,1,4,3,2,TA,TA,1,Gd,TA,Av,GLQ,Unf,1,TA,Y,1,TA,Typ,,5,,,,Y,,MnPrv,2,1,1


In [169]:
# Seems to work so let's do the same with the rest, remembering to use the separate function where applicable.

Utilities = ['N/A','ELO', 'NoSeWa', 'NoSewr', 'AllPub']
ordinalize('Utilities', Utilities)

LandSlope = ['Sev', 'Mod', 'Gtl']
ordinalize('LandSlope', LandSlope)

ExterQual = ['Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize('ExterQual', ExterQual)

ExterCond = ['Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize('ExterCond', ExterCond)

BsmtQual = ['N/A', 'Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('BsmtQual', BsmtQual)

BsmtCond = ['N/A', 'Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('BsmtCond', BsmtCond)

BsmtExposure = ['N/A', 'No', 'Mn', 'Av', 'Gd']
ordinalize_with_na('BsmtExposure', BsmtExposure)

BsmtFinType1 = ['N/A', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ']
ordinalize_with_na('BsmtFinType1', BsmtFinType1)

BsmtFinType2 = ['N/A', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ']
ordinalize_with_na('BsmtFinType2', BsmtFinType2)

HeatingQC = ['Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize('HeatingQC', HeatingQC)

KitchenQual = ['N/A','Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize('KitchenQual', KitchenQual)

Functional = ['N/A','Sal', 'Sev', 'Maj2', 'Maj1', 'Mod', 'Min2', 'Min1', 'Typ']
ordinalize('Functional', Functional)

FireplaceQu = ['N/A', 'Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('FireplaceQu', FireplaceQu)

GarageFinish = ['N/A', 'Unf', 'RFn', 'Fin']
ordinalize_with_na('GarageFinish', GarageFinish)

GarageQual = ['N/A', 'Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('GarageQual', GarageQual)

GarageCond = ['N/A', 'Po', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('GarageCond', GarageCond)

PavedDrive = ['N', 'P', 'Y']
ordinalize('PavedDrive', PavedDrive)

PoolQC = ['N/A', 'Fa', 'TA', 'Gd', 'Ex']
ordinalize_with_na('PoolQC', PoolQC)

Fence = ['N/A', 'MnWw', 'GdWo', 'MnPrv', 'GdPrv']
ordinalize_with_na('Fence', Fence)

In [170]:
CentralAir = ['N/A', 'N', 'Y']
ordinalize_with_na('CentralAir', CentralAir)

In [171]:
object_data

Unnamed: 0,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities_Ordinal_ID,LotConfig_Nominal_ID,LandSlope_Ordinal_ID,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual_Ordinal_ID,ExterCond_Ordinal_ID,Foundation_Nominal_ID,BsmtQual_Ordinal_ID,BsmtCond_Ordinal_ID,BsmtExposure_Ordinal_ID,BsmtFinType1_Ordinal_ID,BsmtFinType2_Ordinal_ID,Heating_Nominal_ID,HeatingQC_Ordinal_ID,CentralAir_Ordinal_ID,Electrical_Nominal_ID,KitchenQual_Ordinal_ID,Functional_Ordinal_ID,FireplaceQu_Ordinal_ID,GarageType_Nominal_ID,GarageFinish_Ordinal_ID,GarageQual_Ordinal_ID,GarageCond_Ordinal_ID,PavedDrive_Ordinal_ID,PoolQC_Ordinal_ID,Fence_Ordinal_ID,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,1,1,4,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,1,6,1,1,5,2,1,5,9,0,1,2,3,3,3,0,0,1,1,1
1,1,1,1,4,1,5,2,3,2,2,1,1,2,1,1,2,2,2,3,3,2,4,3,4,5,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
2,1,1,1,3,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,2,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
3,1,1,1,3,1,5,3,3,3,1,1,1,1,1,1,3,3,2,3,3,3,3,4,1,5,1,1,4,2,1,5,9,4,2,1,3,3,3,0,0,1,1,2
4,1,1,1,3,1,5,2,3,4,1,1,1,1,1,1,1,1,1,4,3,1,4,3,3,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2,1,1,4,1,5,1,3,15,1,1,5,1,1,1,7,7,2,3,3,2,3,3,1,1,1,1,4,2,1,4,9,0,5,0,0,0,3,0,0,1,1,1
1455,2,1,1,4,1,5,1,3,15,1,1,4,1,1,1,7,7,2,3,3,2,3,3,1,3,1,1,3,2,1,4,9,0,4,1,3,3,3,0,0,1,1,2
1456,1,1,1,4,1,5,1,3,5,1,1,1,2,1,1,1,1,2,3,3,2,3,3,1,5,1,1,5,2,1,4,9,3,2,1,3,3,3,0,0,1,1,2
1457,1,1,1,4,1,5,1,3,5,1,1,1,5,1,1,4,3,2,3,3,1,4,3,3,6,1,1,3,2,1,4,9,0,5,0,0,0,3,0,3,2,1,1


In [172]:
data = data.select_dtypes(exclude='object')
data = pd.concat([data, object_data], axis=1)


data.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities_Ordinal_ID,LotConfig_Nominal_ID,LandSlope_Ordinal_ID,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual_Ordinal_ID,ExterCond_Ordinal_ID,Foundation_Nominal_ID,BsmtQual_Ordinal_ID,BsmtCond_Ordinal_ID,BsmtExposure_Ordinal_ID,BsmtFinType1_Ordinal_ID,BsmtFinType2_Ordinal_ID,Heating_Nominal_ID,HeatingQC_Ordinal_ID,CentralAir_Ordinal_ID,Electrical_Nominal_ID,KitchenQual_Ordinal_ID,Functional_Ordinal_ID,FireplaceQu_Ordinal_ID,GarageType_Nominal_ID,GarageFinish_Ordinal_ID,GarageQual_Ordinal_ID,GarageCond_Ordinal_ID,PavedDrive_Ordinal_ID,PoolQC_Ordinal_ID,Fence_Ordinal_ID,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,208500,1,1,1,4,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,1,6,1,1,5,2,1,5,9,0,1,2,3,3,3,0,0,1,1,1
1,2,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,181500,1,1,1,4,1,5,2,3,2,2,1,1,2,1,1,2,2,2,3,3,2,4,3,4,5,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
2,3,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,223500,1,1,1,3,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,2,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
3,4,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,140000,1,1,1,3,1,5,3,3,3,1,1,1,1,1,1,3,3,2,3,3,3,3,4,1,5,1,1,4,2,1,5,9,4,2,1,3,3,3,0,0,1,1,2
4,5,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,250000,1,1,1,3,1,5,2,3,4,1,1,1,1,1,1,1,1,1,4,3,1,4,3,3,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1


In [173]:
data.shape

(2919, 81)

In [179]:
train = data.iloc[:1460, :]
train.shape

column_names = data.columns.tolist()

train



Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities_Ordinal_ID,LotConfig_Nominal_ID,LandSlope_Ordinal_ID,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual_Ordinal_ID,ExterCond_Ordinal_ID,Foundation_Nominal_ID,BsmtQual_Ordinal_ID,BsmtCond_Ordinal_ID,BsmtExposure_Ordinal_ID,BsmtFinType1_Ordinal_ID,BsmtFinType2_Ordinal_ID,Heating_Nominal_ID,HeatingQC_Ordinal_ID,CentralAir_Ordinal_ID,Electrical_Nominal_ID,KitchenQual_Ordinal_ID,Functional_Ordinal_ID,FireplaceQu_Ordinal_ID,GarageType_Nominal_ID,GarageFinish_Ordinal_ID,GarageQual_Ordinal_ID,GarageCond_Ordinal_ID,PavedDrive_Ordinal_ID,PoolQC_Ordinal_ID,Fence_Ordinal_ID,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,208500,1,1,1,4,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,1,6,1,1,5,2,1,5,9,0,1,2,3,3,3,0,0,1,1,1
1,2,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,181500,1,1,1,4,1,5,2,3,2,2,1,1,2,1,1,2,2,2,3,3,2,4,3,4,5,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
2,3,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,223500,1,1,1,3,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,2,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
3,4,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,140000,1,1,1,3,1,5,3,3,3,1,1,1,1,1,1,3,3,2,3,3,3,3,4,1,5,1,1,4,2,1,5,9,4,2,1,3,3,3,0,0,1,1,2
4,5,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,250000,1,1,1,3,1,5,2,3,4,1,1,1,1,1,1,1,1,1,4,3,1,4,3,3,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,62.0,7917,6,5,1999,2000,0.0,0.0,0.0,953.0,953.0,953,694,0,1647,0.0,0.0,2,1,3,1,7,1,1999.0,2.0,460.0,0,40,0,0,0,0,0,8,2007,175000,1,1,1,4,1,5,1,3,18,1,1,1,1,1,1,1,1,2,3,3,1,4,3,1,1,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
1456,1457,20,85.0,13175,6,6,1978,1988,119.0,790.0,163.0,589.0,1542.0,2073,0,0,2073,1.0,0.0,2,0,3,1,7,2,1978.0,2.0,500.0,349,0,0,0,0,0,0,2,2010,210000,1,1,1,4,1,5,1,3,7,1,1,1,2,1,1,8,5,3,3,3,2,4,3,1,5,3,1,3,2,1,4,8,3,1,1,3,3,3,0,3,1,1,1
1457,1458,70,66.0,9042,7,9,1941,2006,0.0,275.0,0.0,877.0,1152.0,1188,1152,0,2340,0.0,0.0,2,0,4,1,9,2,1941.0,1.0,252.0,0,60,0,0,0,0,2500,5,2010,266500,1,1,1,4,1,5,1,3,3,1,1,1,1,1,1,7,7,2,5,4,6,3,4,1,6,1,1,5,2,1,5,9,4,1,2,3,3,3,0,4,2,1,1
1458,1459,20,68.0,9717,5,6,1950,1996,0.0,49.0,1029.0,0.0,1078.0,1078,0,0,1078,1.0,0.0,1,0,2,1,5,0,1950.0,1.0,240.0,366,0,112,0,0,0,0,4,2010,142125,1,1,1,4,1,5,1,3,12,1,1,1,2,2,1,2,2,2,3,3,2,3,3,2,6,3,1,4,2,3,5,9,0,1,1,3,3,3,0,0,1,1,1


In [175]:
test = pd.DataFrame(columns=data.columns)
test = pd.concat([test, data.iloc[1460:, :]], axis=0)

  test = pd.concat([test, data.iloc[1460:, :]], axis=0)


In [180]:
test

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities_Ordinal_ID,LotConfig_Nominal_ID,LandSlope_Ordinal_ID,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual_Ordinal_ID,ExterCond_Ordinal_ID,Foundation_Nominal_ID,BsmtQual_Ordinal_ID,BsmtCond_Ordinal_ID,BsmtExposure_Ordinal_ID,BsmtFinType1_Ordinal_ID,BsmtFinType2_Ordinal_ID,Heating_Nominal_ID,HeatingQC_Ordinal_ID,CentralAir_Ordinal_ID,Electrical_Nominal_ID,KitchenQual_Ordinal_ID,Functional_Ordinal_ID,FireplaceQu_Ordinal_ID,GarageType_Nominal_ID,GarageFinish_Ordinal_ID,GarageQual_Ordinal_ID,GarageCond_Ordinal_ID,PavedDrive_Ordinal_ID,PoolQC_Ordinal_ID,Fence_Ordinal_ID,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1461,20,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,882.0,896,0,0,896,0.0,0.0,1,0,2,1,5,0,1961.0,1.0,730.0,140,0,0,0,120,0,0,6,2010,0,5,1,1,4,1,5,1,3,12,2,1,1,2,1,1,1,1,2,3,3,2,3,3,1,3,2,1,3,2,1,4,9,0,1,1,3,3,3,0,3,1,1,1
1,1462,20,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,1329.0,1329,0,0,1329,0.0,0.0,1,1,3,1,6,0,1958.0,1.0,312.0,393,36,0,0,0,0,12500,6,2010,0,1,1,1,3,1,5,3,3,12,1,1,1,2,2,1,3,6,1,3,3,2,3,3,1,5,1,1,3,2,1,5,9,0,1,1,3,3,3,0,0,3,1,1
2,1463,60,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,928.0,928,701,0,1629,0.0,0.0,2,1,3,1,6,1,1997.0,2.0,482.0,212,34,0,0,0,0,0,3,2010,0,1,1,1,3,1,5,1,3,18,1,1,1,1,1,1,1,1,2,3,3,1,4,3,1,6,1,1,4,2,1,4,9,3,1,3,3,3,3,0,3,1,1,1
3,1464,60,78.0,9978,6,6,1998,1998,20.0,602.0,0.0,324.0,926.0,926,678,0,1604,0.0,0.0,2,1,3,1,7,1,1998.0,2.0,470.0,360,36,0,0,0,0,0,6,2010,0,1,1,1,3,1,5,1,3,18,1,1,1,1,1,1,1,1,1,3,3,1,3,3,1,6,1,1,5,2,1,5,9,4,1,3,3,3,3,0,0,1,1,1
4,1465,120,43.0,5005,8,5,1992,1992,0.0,263.0,0.0,1017.0,1280.0,1280,0,0,1280,0.0,0.0,2,0,2,1,5,0,1992.0,2.0,506.0,0,82,0,0,144,0,0,1,2010,0,1,1,1,3,4,5,1,3,19,1,1,4,2,1,1,4,4,2,4,3,1,4,3,1,5,1,1,5,2,1,5,9,0,1,2,3,3,3,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2915,160,21.0,1936,4,7,1970,1970,0.0,0.0,0.0,546.0,546.0,546,546,0,1092,0.0,0.0,1,1,3,1,5,0,,0.0,0.0,0,0,0,0,0,0,0,6,2006,0,2,1,1,4,1,5,1,3,15,1,1,5,1,1,1,7,7,2,3,3,2,3,3,1,1,1,1,4,2,1,4,9,0,5,0,0,0,3,0,0,1,1,1
1455,2916,160,21.0,1894,4,5,1970,1970,0.0,252.0,0.0,294.0,546.0,546,546,0,1092,0.0,0.0,1,1,3,1,6,0,1970.0,1.0,286.0,0,24,0,0,0,0,0,4,2006,0,2,1,1,4,1,5,1,3,15,1,1,4,1,1,1,7,7,2,3,3,2,3,3,1,3,1,1,3,2,1,4,9,0,4,1,3,3,3,0,0,1,1,2
1456,2917,20,160.0,20000,5,7,1960,1996,0.0,1224.0,0.0,0.0,1224.0,1224,0,0,1224,1.0,0.0,1,0,4,1,7,1,1960.0,2.0,576.0,474,0,0,0,0,0,0,9,2006,0,1,1,1,4,1,5,1,3,5,1,1,1,2,1,1,1,1,2,3,3,2,3,3,1,5,1,1,5,2,1,4,9,3,2,1,3,3,3,0,0,1,1,2
1457,2918,85,62.0,10441,5,5,1992,1992,0.0,337.0,0.0,575.0,912.0,970,0,0,970,0.0,1.0,1,0,3,1,6,0,,0.0,0.0,80,32,0,0,0,0,700,7,2006,0,1,1,1,4,1,5,1,3,5,1,1,1,5,1,1,4,3,2,3,3,1,4,3,3,6,1,1,3,2,1,4,9,0,5,0,0,0,3,0,3,2,1,1


In [178]:
train

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,MSZoning_Nominal_ID,Street_Nominal_ID,Alley_Nominal_ID,LotShape_Ordinal_ID,LandContour_Nominal_ID,Utilities_Ordinal_ID,LotConfig_Nominal_ID,LandSlope_Ordinal_ID,Neighborhood_Nominal_ID,Condition1_Nominal_ID,Condition2_Nominal_ID,BldgType_Nominal_ID,HouseStyle_Nominal_ID,RoofStyle_Nominal_ID,RoofMatl_Nominal_ID,Exterior1st_Nominal_ID,Exterior2nd_Nominal_ID,MasVnrType_Nominal_ID,ExterQual_Ordinal_ID,ExterCond_Ordinal_ID,Foundation_Nominal_ID,BsmtQual_Ordinal_ID,BsmtCond_Ordinal_ID,BsmtExposure_Ordinal_ID,BsmtFinType1_Ordinal_ID,BsmtFinType2_Ordinal_ID,Heating_Nominal_ID,HeatingQC_Ordinal_ID,CentralAir_Ordinal_ID,Electrical_Nominal_ID,KitchenQual_Ordinal_ID,Functional_Ordinal_ID,FireplaceQu_Ordinal_ID,GarageType_Nominal_ID,GarageFinish_Ordinal_ID,GarageQual_Ordinal_ID,GarageCond_Ordinal_ID,PavedDrive_Ordinal_ID,PoolQC_Ordinal_ID,Fence_Ordinal_ID,MiscFeature_Nominal_ID,SaleType_Nominal_ID,SaleCondition_Nominal_ID
0,1,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,208500,1,1,1,4,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,1,6,1,1,5,2,1,5,9,0,1,2,3,3,3,0,0,1,1,1
1,2,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,181500,1,1,1,4,1,5,2,3,2,2,1,1,2,1,1,2,2,2,3,3,2,4,3,4,5,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
2,3,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,223500,1,1,1,3,1,5,1,3,1,1,1,1,1,1,1,1,1,1,4,3,1,4,3,2,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
3,4,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,140000,1,1,1,3,1,5,3,3,3,1,1,1,1,1,1,3,3,2,3,3,3,3,4,1,5,1,1,4,2,1,5,9,4,2,1,3,3,3,0,0,1,1,2
4,5,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,250000,1,1,1,3,1,5,2,3,4,1,1,1,1,1,1,1,1,1,4,3,1,4,3,3,6,1,1,5,2,1,5,9,3,1,2,3,3,3,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,62.0,7917,6,5,1999,2000,0.0,0.0,0.0,953.0,953.0,953,694,0,1647,0.0,0.0,2,1,3,1,7,1,1999.0,2.0,460.0,0,40,0,0,0,0,0,8,2007,175000,1,1,1,4,1,5,1,3,18,1,1,1,1,1,1,1,1,2,3,3,1,4,3,1,1,1,1,5,2,1,4,9,3,1,2,3,3,3,0,0,1,1,1
1456,1457,20,85.0,13175,6,6,1978,1988,119.0,790.0,163.0,589.0,1542.0,2073,0,0,2073,1.0,0.0,2,0,3,1,7,2,1978.0,2.0,500.0,349,0,0,0,0,0,0,2,2010,210000,1,1,1,4,1,5,1,3,7,1,1,1,2,1,1,8,5,3,3,3,2,4,3,1,5,3,1,3,2,1,4,8,3,1,1,3,3,3,0,3,1,1,1
1457,1458,70,66.0,9042,7,9,1941,2006,0.0,275.0,0.0,877.0,1152.0,1188,1152,0,2340,0.0,0.0,2,0,4,1,9,2,1941.0,1.0,252.0,0,60,0,0,0,0,2500,5,2010,266500,1,1,1,4,1,5,1,3,3,1,1,1,1,1,1,7,7,2,5,4,6,3,4,1,6,1,1,5,2,1,5,9,4,1,2,3,3,3,0,4,2,1,1
1458,1459,20,68.0,9717,5,6,1950,1996,0.0,49.0,1029.0,0.0,1078.0,1078,0,0,1078,1.0,0.0,1,0,2,1,5,0,1950.0,1.0,240.0,366,0,112,0,0,0,0,4,2010,142125,1,1,1,4,1,5,1,3,12,1,1,1,2,2,1,2,2,2,3,3,2,3,3,2,6,3,1,4,2,3,5,9,0,1,1,3,3,3,0,0,1,1,1


In [186]:
## MODEL STUFF

from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split

X_train = train.drop('SalePrice', axis=1)
y_train = train['SalePrice']

X_test = test.drop('SalePrice', axis=1)


model = HistGradientBoostingRegressor()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)

test_ids = X_test['Id']
predictions_df = pd.DataFrame({'ID': test_ids, 'SalePrice': y_pred})

print(predictions_df.head())

predictions_df.to_csv('predictions.csv', index=False)


     ID      SalePrice
0  1461  125733.859670
1  1462  156831.128955
2  1463  183233.306745
3  1464  192426.934356
4  1465  188379.822131


![Entry Screenshot](houseprices.png)

4620 out of 98586 entries. Top 4,67% entry!