In [2]:
#==================================================
# Import libraries & set seed
#==================================================
import os

import pandas as pd
import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
import time

# machine learning
from sklearn.linear_model import LinearRegression

# Other sklearn funcitonality
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import roc_auc_score # classificatoin problem 
from sklearn.metrics import roc_curve # classificatoin problem
from sklearn.metrics import confusion_matrix # classificatoin problem

from sklearn.metrics import r2_score # Regression problem

from sklearn.metrics import f1_score
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score, GridSearchCV, learning_curve


pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)
np.random.seed(42)

## Set metrics

In [3]:
SCORING_METRIC = 'neg_mean_squared_error' # Update
N_JOBS = -1

## Read and investigate data

In [4]:
#Loading the relevant data
base_path = os.path.dirname(os.getcwd())
file_path = os.path.join(base_path, "data")
file_name = "train.csv"

data_destination_train = os.path.join(file_path, file_name) # Full file path + name
raw_data=pd.read_csv(data_destination_train,keep_default_na=False,na_values='')

print(str(raw_data.shape))
raw_data.head()

(1460, 81)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [6]:
# Load test dataset - we will perform the same transformations we are making to the training dataset in parallel
# in order be able to run the model on it at the end
file_name = "test.csv"
data_destination_test = os.path.join(file_path, file_name) # Full file path + name
raw_data_test=pd.read_csv(data_destination_test,keep_default_na=False,na_values='')

print(str(raw_data_test.shape))
raw_data_test.head()

(1459, 80)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0,TA,TA,CBlock,TA,TA,No,Rec,468,LwQ,144,270,882,GasA,TA,Y,SBrkr,896,0,0,896,0,0,1,0,2,1,TA,5,Typ,0,,Attchd,1961,Unf,1,730,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108,TA,TA,CBlock,TA,TA,No,ALQ,923,Unf,0,406,1329,GasA,TA,Y,SBrkr,1329,0,0,1329,0,0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958,Unf,1,312,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0,TA,TA,PConc,Gd,TA,No,GLQ,791,Unf,0,137,928,GasA,Gd,Y,SBrkr,928,701,0,1629,0,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997,Fin,2,482,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20,TA,TA,PConc,TA,TA,No,GLQ,602,Unf,0,324,926,GasA,Ex,Y,SBrkr,926,678,0,1604,0,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998,Fin,2,470,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0,Gd,TA,PConc,Gd,TA,No,ALQ,263,Unf,0,1017,1280,GasA,Ex,Y,SBrkr,1280,0,0,1280,0,0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992,RFn,2,506,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal


In [7]:
# We see that the data contains many variables which have missing data-points
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
Id               1460 non-null int64
MSSubClass       1460 non-null int64
MSZoning         1460 non-null object
LotFrontage      1460 non-null object
LotArea          1460 non-null int64
Street           1460 non-null object
Alley            1460 non-null object
LotShape         1460 non-null object
LandContour      1460 non-null object
Utilities        1460 non-null object
LotConfig        1460 non-null object
LandSlope        1460 non-null object
Neighborhood     1460 non-null object
Condition1       1460 non-null object
Condition2       1460 non-null object
BldgType         1460 non-null object
HouseStyle       1460 non-null object
OverallQual      1460 non-null int64
OverallCond      1460 non-null int64
YearBuilt        1460 non-null int64
YearRemodAdd     1460 non-null int64
RoofStyle        1460 non-null object
RoofMatl         1460 non-null object
Exterior1st      1460 non-

## Data transformation function

In [8]:
# Create the function that cleane the data

# RefId will not be used in the training but shall be used to identify the data
data_identifier_name = "Id" #An unique ID are not a good predictor
dependent_variable = "SalePrice"

# functionality to drop/keep variables
drop_variables = True # A indicator if variable shall be dropped or kept in the data_cleaning
columns_to_drop = ['Alley', 'PoolQC', 'YrSold' ]
independent_columns_to_keep = [] # excluding id variable and dependent variable

# missing values inference
default_values_for_all_missing_continous_variables = True
missing_variables_to_fix = []


# functionality to create missing variables based on the average ratio between 2 variables
scale_functoinality_disabled = True
prices_to_scale = [
                    'MMRAcquisitionAuctionAveragePrice', 
                    'MMRAcquisitionAuctionCleanPrice', 
                    'MMRAcquisitionRetailAveragePrice', 
                    'MMRAcquisitonRetailCleanPrice', 
                    'MMRCurrentAuctionAveragePrice', 
                    'MMRCurrentAuctionCleanPrice', 
                    'MMRCurrentRetailAveragePrice', 
                    'MMRCurrentRetailCleanPrice'
                    ]

price_to_scale_with = 'VehBCost'

def drop_variables(data_to_transform, columns_to_drop = [], independent_columns_to_keep = []):
    '''
    Drop the non needed columns
    '''
    if columns_to_drop:
        return data_to_transform.drop(columns_to_drop, axis = 1)
    else:
        independent_columns_to_keep.append(data_identifier_name)
        independent_columns_to_keep.append(dependent_variable)

def fix_missing_continous_variables(data_to_transform, fix_all = True, variables_to_drop = []):
    '''
    Fix missing continous variables by assigning the median value
    '''
    if fix_all:
        variables_to_drop = data_to_transform.columns[data_to_transform.dtypes != object]
        
    for col in variables_to_drop:
        data_to_transform[col].fillna(data_to_transform[col].median())
        
    return data_to_transform
    
    
def calculate_scaled_prices(data_to_transform, price_to_scale_with, prices_to_scale):
    average_price_to_scale = data_to_transform[prices_to_scale].mean()
    average_price_to_scale_with = data_to_transform[price_to_scale_with].mean()
    
    scale_factors = average_price_to_scale / average_price_to_scale_with
    
    for price_name in prices_to_scale:
        data_to_transform.loc[data_to_transform[price_name].isnull(), price_name] = data_to_transform[
            price_to_scale_with] * scale_factors[price_name]
        
    return data_to_transform


def calculate_price_ratios(data_to_transform, price_to_scale_with, prices_to_scale):
    '''
    Function that calculates the ratio between the different prices and the saleprice
    '''
    for price_variable in prices_to_scale:
        data_to_transform[price_variable + "_ratio"] = data_to_transform[
            price_variable]/data_to_transform[price_to_scale_with]
    
    return data_to_transform



def group_and_fix_missing_values_categorical_var(data_to_transform):
    '''
    Simple function to fill out missing data for the categorical funtions and to 
    group variables with less than XX entries together to create a less wide data 
    set when dummy variables are created
    '''
    categorical_columns = data_to_transform.columns[data_to_transform.dtypes == object]
    
    aggrigation_cut_off = 10
    
    for variable in categorical_columns:
        # replace missing values with OTHER
        data_to_transform.loc[data_to_transform[variable].isnull(), variable] = "OTHER"
        
        # group the variables together and replace the non frequent variables with OTHER
        variables_to_not_transform = (raw_data[variable].value_counts()>aggrigation_cut_off).index
        data_to_transform.loc[data_to_transform[variable].isin(variables_to_not_transform) == False, variable] = "OTHER"
     
    return data_to_transform


# TODO: fix create dummies funciton
def create_dummy_variables(data_to_transform, categorical_variables_to_keep, continous_variables_to_keep):
    
    # We now create dummy variables for the categorical variables
    data_continous = data_to_transform[continous_variables_to_keep]
    data_categorical = data_to_transform[categorical_variables_to_keep]

    #looping over the categorical variables and make them to "category" types
    for column in data_categorical.columns:
        data_categorical[column] = data_categorical[column].astype("category")

    # create dummy variables
    car_dummies = pd.get_dummies(data_categorical, drop_first=True)

    # create final data set
    merged_data = car_dummies.merge(data_continous, left_index=True, right_index=True, how='inner')
    
    return merged_data


def data_cleaning_transformations(train_data, test_data):
    
    id_train = train_data[data_identifier_name]
    id_test = test_data[data_identifier_name]
    
    full_data_for_scaling = pd.concat([train_data, test_data]).reset_index(drop=True)
    
    # Drop unnecicary data
    full_data_for_scaling = drop_variables(full_data_for_scaling, 
                                           columns_to_drop = columns_to_drop, 
                                           independent_columns_to_keep = independent_columns_to_keep)
    
    # Calculate the scaled prices
    if scale_functoinality_disabled == False:
        full_data_for_scaling = calculate_scaled_prices(full_data_for_scaling, price_to_scale_with, prices_to_scale)
        full_data_for_scaling = calculate_price_ratios(full_data_for_scaling, price_to_scale_with, prices_to_scale)
        
    # Fixing the missing values
    full_data_for_scaling = fix_missing_continous_variables(full_data_for_scaling, 
                                                            fix_all = default_values_for_all_missing_continous_variables, 
                                                            variables_to_drop = missing_variables_to_fix)
    full_data_for_scaling = group_and_fix_missing_values_categorical_var(full_data_for_scaling)
    
    # now create the dummy variables and drop the variables that is not needed 
    continous_variables_to_keep = full_data_for_scaling.columns[full_data_for_scaling.dtypes != object]
    categorical_variables_to_keep = full_data_for_scaling.columns[full_data_for_scaling.dtypes == object] 
    
    full_data_for_scaling = create_dummy_variables(full_data_for_scaling,
                                                   categorical_variables_to_keep,
                                                   continous_variables_to_keep)
    
    # split the data back to train and test data
    train_data = full_data_for_scaling.loc[full_data_for_scaling[data_identifier_name].isin(id_train)].reset_index(drop=True)
    test_data = full_data_for_scaling.loc[full_data_for_scaling[data_identifier_name].isin(id_test)].reset_index(drop=True)
    
    return train_data, test_data
    

In [10]:
train_data, test_data = data_cleaning_transformations(raw_data, raw_data_test)

print(train_data.shape)
print(test_data.shape)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(1460, 3525)
(1459, 3525)


## Train the model

### Create training split of data

In [8]:
# Get the dependent variable data
dv_data = train_data[dependent_variable]

# Create the independent variable dataframe that has all columns except for loan_approval_status
iv_data = train_data.drop(dependent_variable, axis=1)

# Create the test and train data sets
X_train, X_test, y_train, y_test = train_test_split(iv_data, dv_data, test_size=0.25)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1095, 264) (1095,)
(365, 264) (365,)


### First training iteration of model

In [12]:
# Test multiple funcitons with GridSearchCV
# This method needs X_train and Y_train to be 

# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

models_to_train = [LinearRegression()]

model_names = ['LinearRegression'
              ]

best_models = []
best_model_R2 = []

parameters = {
    'LinearRegression':{
        'normalize':[False]
             }
    }

# Find out the best model
for i in range(0,len(model_names)):
    
    
    act_model = models_to_train[i]
    act_model_name = model_names[i]
    print("Start training " + act_model_name)
    
    # Get the start time
    start_time = time.time()
    
    act_params = parameters[act_model_name]
    
    act_model = GridSearchCV(act_model, act_params, cv=5, scoring=SCORING_METRIC, n_jobs = N_JOBS)
    act_model.fit(X_train, y_train)

    act_best_model = act_model.best_estimator_
    best_models.append(act_best_model)
    best_model_R2.append(r2_score(y_train, act_best_model.predict_proba(X_train)[:,1])) #predict_proba [:,1]
    
    # get the end time
    time_taken = time.time() - start_time
    print("Stop training " + act_model_name + ". Time taken {} s".format(time_taken))

# displaying the results
result_df = pd.DataFrame(best_model_R2).T
result_df.columns = model_names
result_df = result_df[result_df.iloc[-1].sort_values(ascending=True).index]

result_df.T.plot(kind='bar', title = "Different models " + SCORING_METRIC, legend = False )
result_df.head()

Start training LinearRegression


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000001D52F74E4B0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\otto.ryden\AppData\Local\Continuum\Anac...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\o...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000001D52F74E4B0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\otto.ryden\AppData\Local\Continuum\Anac...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\o...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()
     17 
     18 
     19 
     20 

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 2, 11, 9, 29, 42, 696198, tzinfo=datetime.timezone.utc), 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'session': 'C945D821970240FB96941544A5F29FB3', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'C945D821970240FB96941544A5F29FB3']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 2, 11, 9, 29, 42, 696198, tzinfo=datetime.timezone.utc), 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'session': 'C945D821970240FB96941544A5F29FB3', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'C945D821970240FB96941544A5F29FB3'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 2, 11, 9, 29, 42, 696198, tzinfo=datetime.timezone.utc), 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'session': 'C945D821970240FB96941544A5F29FB3', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '0394747D168E4AAE97178A93A3DA0323', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()'
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()',), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()',)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.For object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-12-b662c619cf2c>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1d5370972b0, executio..._before_exec=None error_in_exec=None result=None>)
   2816 
   2817         try:
   2818             for i, node in enumerate(to_run_exec):
   2819                 mod = ast.Module([node])
   2820                 code = compiler(mod, cell_name, "exec")
-> 2821                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000001D537080C00, file "<ipython-input-12-b662c619cf2c>", line 21>
        result = <ExecutionResult object at 1d5370972b0, executio..._before_exec=None error_in_exec=None result=None>
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000001D537080C00, file "<ipython-input-12-b662c619cf2c>", line 21>, result=<ExecutionResult object at 1d5370972b0, executio..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000001D537080C00, file "<ipython-input-12-b662c619cf2c>", line 21>
        self.user_global_ns = {'FuncFormatter': <class 'matplotlib.ticker.FuncFormatter'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', "#===============================================...ption('display.max_rows', 100)\nnp.random.seed(42)", "SCORING_METRIC = 'neg_mean_squared_error' # Update\nN_JOBS = -1", '#Loading the relevant data\nraw_data = pd.read_cs....csv")\nprint(str(raw_data.shape))\nraw_data.head()', '# Loading in the test data that needs to be pred...nt(str(raw_data_test.shape))\nraw_data_test.head()', '# We see that the data contains many variables which have missing data-points\nraw_data.info()', '# Create the function that cleane the data\n\n# Re...=True)\n    \n    return train_data, test_data\n    ', '# Test the data cleaning funciton\nraw_data = pd....)\n\nprint(train_data.shape)\nprint(test_data.shape)', '# Get the dependent variable data\ndv_data = trai... y_train.shape)\nprint(X_test.shape, y_test.shape)', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', '# Create the function that cleane the data\n\n# Re...=True)\n    \n    return train_data, test_data\n    ', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()'], 'LinearRegression': <class 'sklearn.linear_model.base.LinearRegression'>, 'N_JOBS': -1, 'Out': {3:    Id  MSSubClass MSZoning  LotFrontage  LotArea...   140000  
4       WD        Normal     250000  , 4:      Id  MSSubClass MSZoning  LotFrontage  LotAr...    WD        Normal  
4       WD        Normal  }, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'SCORING_METRIC': 'neg_mean_squared_error', 'X_test':       BldgType_2fmCon  BldgType_Duplex  BldgType...         1963    2008  

[365 rows x 264 columns], 'X_train':       BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], ...}
        self.user_ns = {'FuncFormatter': <class 'matplotlib.ticker.FuncFormatter'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', "#===============================================...ption('display.max_rows', 100)\nnp.random.seed(42)", "SCORING_METRIC = 'neg_mean_squared_error' # Update\nN_JOBS = -1", '#Loading the relevant data\nraw_data = pd.read_cs....csv")\nprint(str(raw_data.shape))\nraw_data.head()', '# Loading in the test data that needs to be pred...nt(str(raw_data_test.shape))\nraw_data_test.head()', '# We see that the data contains many variables which have missing data-points\nraw_data.info()', '# Create the function that cleane the data\n\n# Re...=True)\n    \n    return train_data, test_data\n    ', '# Test the data cleaning funciton\nraw_data = pd....)\n\nprint(train_data.shape)\nprint(test_data.shape)', '# Get the dependent variable data\ndv_data = trai... y_train.shape)\nprint(X_test.shape, y_test.shape)', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', '# Create the function that cleane the data\n\n# Re...=True)\n    \n    return train_data, test_data\n    ', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()', '# Test multiple funcitons with GridSearchCV\n# Th...SCORING_METRIC, legend = False )\nresult_df.head()'], 'LinearRegression': <class 'sklearn.linear_model.base.LinearRegression'>, 'N_JOBS': -1, 'Out': {3:    Id  MSSubClass MSZoning  LotFrontage  LotArea...   140000  
4       WD        Normal     250000  , 4:      Id  MSSubClass MSZoning  LotFrontage  LotAr...    WD        Normal  
4       WD        Normal  }, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'SCORING_METRIC': 'neg_mean_squared_error', 'X_test':       BldgType_2fmCon  BldgType_Duplex  BldgType...         1963    2008  

[365 rows x 264 columns], 'X_train':       BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
C:\Users\otto.ryden\workspace\kaggle\House Prices\<ipython-input-12-b662c619cf2c> in <module>()
     29     start_time = time.time()
     30     
     31     act_params = parameters[act_model_name]
     32     
     33     act_model = GridSearchCV(act_model, act_params, cv=5, scoring=SCORING_METRIC, n_jobs = N_JOBS)
---> 34     act_model.fit(X_train, y_train)
     35 
     36     act_best_model = act_model.best_estimator_
     37     best_models.append(act_best_model)
     38     best_model_R2.append(r2_score(y_train, act_best_model.predict_proba(X_train)[:,1])) #predict_proba [:,1]

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...     scoring='neg_mean_squared_error', verbose=0), X=      BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], y=1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64, groups=None)
    940 
    941         groups : array-like, with shape (n_samples,), optional
    942             Group labels for the samples used while splitting the dataset into
    943             train/test set.
    944         """
--> 945         return self._fit(X, y, groups, ParameterGrid(self.param_grid))
        self._fit = <bound method BaseSearchCV._fit of GridSearchCV(...    scoring='neg_mean_squared_error', verbose=0)>
        X =       BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns]
        y = 1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64
        groups = None
        self.param_grid = {'normalize': [False]}
    946 
    947 
    948 class RandomizedSearchCV(BaseSearchCV):
    949     """Randomized search on hyper parameters.

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _fit(self=GridSearchCV(cv=5, error_score='raise',
       e...     scoring='neg_mean_squared_error', verbose=0), X=      BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], y=1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64, groups=None, parameter_iterable=<sklearn.model_selection._search.ParameterGrid object>)
    559                                   fit_params=self.fit_params,
    560                                   return_train_score=self.return_train_score,
    561                                   return_n_test_samples=True,
    562                                   return_times=True, return_parameters=True,
    563                                   error_score=self.error_score)
--> 564           for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.model_selection._search.ParameterGrid object>
    565           for train, test in cv_iter)
    566 
    567         # if one choose to see train score, "out" will contain train score info
    568         if self.return_train_score:

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV._fit.<locals>.<genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Mon Feb 11 10:29:48 2019
PID: 23328Python 3.6.1: C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\python.exe
...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False),       BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], 1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64, make_scorer(mean_squared_error, greater_is_better=False), array([ 219,  220,  221,  222,  223,  224,  225,...       1088, 1089, 1090, 1091, 1092, 1093, 1094]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...09, 210, 211, 212, 213, 214, 215, 216, 217, 218]), 0, {'normalize': False}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False),       BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], 1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64, make_scorer(mean_squared_error, greater_is_better=False), array([ 219,  220,  221,  222,  223,  224,  225,...       1088, 1089, 1090, 1091, 1092, 1093, 1094]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...09, 210, 211, 212, 213, 214, 215, 216, 217, 218]), 0, {'normalize': False})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False), X=      BldgType_2fmCon  BldgType_Duplex  BldgType...        2007    2009  

[1095 rows x 264 columns], y=1023    191000.0
810     181000.0
1384    105000...0.0
Name: SalePrice, Length: 1095, dtype: float64, scorer=make_scorer(mean_squared_error, greater_is_better=False), train=array([ 219,  220,  221,  222,  223,  224,  225,...       1088, 1089, 1090, 1091, 1092, 1093, 1094]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ...09, 210, 211, 212, 213, 214, 215, 216, 217, 218]), verbose=0, parameters={'normalize': False}, fit_params={}, return_train_score=True, return_parameters=True, return_n_test_samples=True, return_times=True, error_score='raise')
    233 
    234     try:
    235         if y_train is None:
    236             estimator.fit(X_train, **fit_params)
    237         else:
--> 238             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method LinearRegression.fit of LinearRegr..., fit_intercept=True, n_jobs=1, normalize=False)>
        X_train =       BldgType_2fmCon  BldgType_Duplex  BldgType...         2007    2009  

[876 rows x 264 columns]
        y_train = 314     178000.0
442     162900.0
319     187500...00.0
Name: SalePrice, Length: 876, dtype: float64
        fit_params = {}
    239 
    240     except Exception as e:
    241         # Note fit time as time until error
    242         fit_time = time.time() - start_time

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\linear_model\base.py in fit(self=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False), X=      BldgType_2fmCon  BldgType_Duplex  BldgType...         2007    2009  

[876 rows x 264 columns], y=314     178000.0
442     162900.0
319     187500...00.0
Name: SalePrice, Length: 876, dtype: float64, sample_weight=None)
    507         self : returns an instance of self.
    508         """
    509 
    510         n_jobs_ = self.n_jobs
    511         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
--> 512                          y_numeric=True, multi_output=True)
    513 
    514         if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
    515             raise ValueError("Sample weights must be 1D array or scalar")
    516 

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X=      BldgType_2fmCon  BldgType_Duplex  BldgType...         2007    2009  

[876 rows x 264 columns], y=314     178000.0
442     162900.0
319     187500...00.0
Name: SalePrice, Length: 876, dtype: float64, accept_sparse=['csr', 'csc', 'coo'], dtype='numeric', order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, multi_output=True, ensure_min_samples=1, ensure_min_features=1, y_numeric=True, warn_on_dtype=False, estimator=None)
    516     y_converted : object
    517         The converted and validated y.
    518     """
    519     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
    520                     ensure_2d, allow_nd, ensure_min_samples,
--> 521                     ensure_min_features, warn_on_dtype, estimator)
        ensure_min_features = 1
        warn_on_dtype = False
        estimator = None
    522     if multi_output:
    523         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
    524                         dtype=None)
    525     else:

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array=array([[   0.,    0.,    0., ..., 1925., 1990., ...[   0.,    0.,    0., ..., 2007., 2007., 2009.]]), accept_sparse=['csr', 'csc', 'coo'], dtype=None, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator=None)
    402             array = array.astype(np.float64)
    403         if not allow_nd and array.ndim >= 3:
    404             raise ValueError("Found array with dim %d. %s expected <= 2."
    405                              % (array.ndim, estimator_name))
    406         if force_all_finite:
--> 407             _assert_all_finite(array)
        array = array([[   0.,    0.,    0., ..., 1925., 1990., ...[   0.,    0.,    0., ..., 2007., 2007., 2009.]])
    408 
    409     shape_repr = _shape_repr(array.shape)
    410     if ensure_min_samples > 0:
    411         n_samples = _num_samples(array)

...........................................................................
C:\Users\otto.ryden\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X=array([[   0.,    0.,    0., ..., 1925., 1990., ...[   0.,    0.,    0., ..., 2007., 2007., 2009.]]))
     53     # everything is finite; fall back to O(n) space np.isfinite to prevent
     54     # false positives from overflow in sum method.
     55     if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
     56             and not np.isfinite(X).all()):
     57         raise ValueError("Input contains NaN, infinity"
---> 58                          " or a value too large for %r." % X.dtype)
        X.dtype = dtype('float64')
     59 
     60 
     61 def assert_all_finite(X):
     62     """Throw a ValueError if X contains NaN or infinity.

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
___________________________________________________________________________

In [13]:
X_train.describe()

Unnamed: 0,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,BsmtCond_Gd,BsmtCond_OTHER,BsmtCond_Po,BsmtCond_TA,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_OTHER,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_OTHER,BsmtFinType1_Rec,BsmtFinType1_Unf,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_OTHER,BsmtFinType2_Rec,BsmtFinType2_Unf,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_OTHER,BsmtQual_TA,CentralAir_Y,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_OTHER,Electrical_SBrkr,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_OTHER,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_OTHER,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_OTHER,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_OTHER,FireplaceQu_Po,FireplaceQu_TA,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_OTHER,Functional_Sev,Functional_Typ,GarageCond_Fa,GarageCond_Gd,GarageCond_OTHER,GarageCond_Po,GarageCond_TA,GarageFinish_OTHER,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_OTHER,GarageQual_Po,GarageQual_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_OTHER,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_OTHER,KitchenQual_TA,LandContour_HLS,LandContour_Low,LandContour_Lvl,LandSlope_Mod,LandSlope_Sev,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LotShape_IR2,LotShape_IR3,LotShape_Reg,MSZoning_FV,MSZoning_OTHER,MSZoning_RH,MSZoning_RL,MSZoning_RM,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_OTHER,MasVnrType_Stone,MiscFeature_OTHER,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,PavedDrive_P,PavedDrive_Y,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_OTHER,SaleType_Oth,SaleType_WD,Street_Pave,Utilities_NoSeWa,Utilities_OTHER,1stFlrSF,2ndFlrSF,3SsnPorch,BedroomAbvGr,BsmtFinSF1,BsmtFinSF2,BsmtFullBath,BsmtHalfBath,BsmtUnfSF,EnclosedPorch,Fireplaces,FullBath,GarageArea,GarageCars,GarageYrBlt,GrLivArea,HalfBath,Id,KitchenAbvGr,LotArea,LotFrontage,LowQualFinSF,MSSubClass,MasVnrArea,MiscVal,MoSold,OpenPorchSF,OverallCond,OverallQual,PoolArea,ScreenPorch,TotRmsAbvGrd,TotalBsmtSF,WoodDeckSF,YearBuilt,YearRemodAdd,YrSold
count,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1037.0,1095.0,1095.0,1095.0,1095.0,1095.0,895.0,1095.0,1095.0,1091.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0
mean,0.024658,0.03379,0.029224,0.076712,0.047489,0.024658,0.000913,0.892237,0.089498,0.080365,0.656621,0.024658,0.102283,0.285845,0.052055,0.024658,0.092237,0.287671,0.018265,0.009132,0.030137,0.024658,0.038356,0.863927,0.025571,0.421005,0.024658,0.442922,0.928767,0.053881,0.863927,0.005479,0.012785,0.009132,0.015525,0.000913,0.004566,0.00274,0.991781,0.0,0.001826,0.000913,0.000913,0.0,0.021005,0.00274,0.0,0.000913,0.915068,0.021918,0.09863,0.000913,0.876712,0.010046,0.331507,0.622831,0.000913,0.001826,0.034703,0.000913,0.038356,0.152511,0.000913,0.146119,0.0,0.068493,0.000913,0.017352,0.359817,0.145205,0.017352,0.00274,0.005479,0.013699,0.000913,0.037443,0.140639,0.005479,0.139726,0.0,0.000913,0.094064,0.00274,0.019178,0.351598,0.140639,0.029224,0.039269,0.108676,0.008219,0.800913,0.023744,0.261187,0.46758,0.014612,0.215525,0.427397,0.449315,0.017352,0.004566,0.00274,0.003653,0.024658,0.025571,0.011872,0.0,0.000913,0.925114,0.024658,0.008219,0.052968,0.003653,0.908676,0.052968,0.293151,0.410046,0.031963,0.010959,0.052968,0.000913,0.900457,0.594521,0.013699,0.063014,0.006393,0.263927,0.052968,0.977169,0.013699,0.003653,0.001826,0.00274,0.035616,0.165297,0.000913,0.291324,0.010046,0.494064,0.006393,0.009132,0.309589,0.021005,0.045662,0.029224,0.401826,0.0,0.502283,0.029224,0.023744,0.905936,0.045662,0.008219,0.073059,0.03379,0.00274,0.699543,0.031963,0.007306,0.621918,0.042922,0.0,0.012785,0.790868,0.149772,0.315068,0.579909,0.003653,0.090411,0.960731,0.000913,0.03653,0.000913,0.000913,0.011872,0.040183,0.017352,0.102283,0.035616,0.075799,0.053881,0.022831,0.009132,0.034703,0.152511,0.006393,0.054795,0.028311,0.052968,0.079452,0.016438,0.046575,0.03653,0.056621,0.018265,0.024658,0.008219,0.021918,0.916895,0.982648,0.0,0.000913,0.000913,0.008219,0.00274,0.003653,0.769863,0.008219,0.205479,0.004566,0.001826,0.003653,0.004566,0.015525,0.829224,0.082192,0.003653,0.001826,0.006393,0.00274,0.003653,0.081279,0.0,0.000913,0.872146,0.996347,0.000913,0.0,1170.582648,354.767123,3.918721,2.896804,448.178082,43.790868,0.421005,0.057534,568.794521,21.118721,0.619178,1.578995,477.539726,1.784475,1978.694311,1531.213699,0.380822,727.669406,1.044749,10747.046575,70.394413,5.863927,57.118721,103.368469,40.52968,6.361644,49.63379,5.56895,6.12968,3.152511,15.917808,6.564384,1060.76347,96.194521,1971.107763,1984.854795,2007.818265
std,0.15515,0.180771,0.16851,0.266256,0.212779,0.15515,0.03022,0.310222,0.285591,0.271982,0.475054,0.15515,0.303159,0.452022,0.222239,0.15515,0.289493,0.452884,0.133969,0.09517,0.171042,0.15515,0.192142,0.343023,0.157923,0.493946,0.15515,0.496958,0.257331,0.225886,0.343023,0.073854,0.112399,0.09517,0.123685,0.03022,0.06745,0.052295,0.090328,0.0,0.042718,0.03022,0.03022,0.0,0.143465,0.052295,0.0,0.03022,0.278907,0.146482,0.298301,0.03022,0.328917,0.099769,0.47097,0.484899,0.03022,0.042718,0.183111,0.03022,0.192142,0.35968,0.03022,0.353387,0.0,0.252706,0.03022,0.130637,0.480166,0.352469,0.130637,0.052295,0.073854,0.11629,0.03022,0.189931,0.347808,0.073854,0.346861,0.0,0.03022,0.292051,0.052295,0.137213,0.477687,0.347808,0.16851,0.194324,0.311374,0.090328,0.399496,0.152321,0.439482,0.499176,0.120048,0.411374,0.494927,0.497652,0.130637,0.06745,0.052295,0.060357,0.15515,0.157923,0.10836,0.0,0.03022,0.263327,0.15515,0.090328,0.224072,0.060357,0.288201,0.224072,0.455415,0.492066,0.175983,0.104157,0.224072,0.03022,0.299527,0.491209,0.11629,0.243099,0.079735,0.440962,0.224072,0.149433,0.11629,0.060357,0.042718,0.052295,0.185417,0.371618,0.03022,0.45458,0.099769,0.500193,0.079735,0.09517,0.462535,0.143465,0.208847,0.16851,0.490491,0.0,0.500223,0.16851,0.152321,0.292051,0.208847,0.090328,0.260353,0.180771,0.052295,0.458666,0.175983,0.085201,0.48513,0.202775,0.0,0.112399,0.406875,0.357011,0.464755,0.493799,0.060357,0.286901,0.194324,0.03022,0.18769,0.03022,0.03022,0.10836,0.196477,0.130637,0.303159,0.185417,0.264797,0.225886,0.149433,0.09517,0.183111,0.35968,0.079735,0.227683,0.165934,0.224072,0.270567,0.127212,0.210824,0.18769,0.231223,0.133969,0.15515,0.090328,0.146482,0.276167,0.130637,0.0,0.03022,0.03022,0.090328,0.052295,0.060357,0.421113,0.090328,0.404236,0.06745,0.042718,0.060357,0.06745,0.123685,0.376485,0.274782,0.060357,0.042718,0.079735,0.052295,0.060357,0.273387,0.0,0.03022,0.33408,0.060357,0.03022,0.0,386.930314,440.752719,32.186042,0.806361,457.898505,153.896464,0.517444,0.236858,444.865511,61.315608,0.644338,0.544976,210.563755,0.733836,24.810675,527.197908,0.496972,424.545069,0.215504,11065.878057,25.219301,48.961376,42.624531,171.644715,328.354566,2.680894,69.134174,1.10448,1.366746,43.008391,56.84411,1.625103,443.87948,130.396336,30.605697,20.732472,1.325752
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,334.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1900.0,334.0,0.0,1.0,0.0,1300.0,21.0,0.0,20.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,1872.0,1950.0,2006.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,892.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,221.5,0.0,0.0,1.0,349.5,1.0,1961.0,1148.0,0.0,360.5,1.0,7589.0,59.0,0.0,20.0,0.0,0.0,5.0,0.0,5.0,5.0,0.0,0.0,5.0,795.5,0.0,1953.5,1966.0,2007.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1096.0,0.0,0.0,3.0,387.0,0.0,0.0,0.0,474.0,0.0,1.0,2.0,484.0,2.0,1980.0,1478.0,0.0,728.0,1.0,9600.0,70.0,0.0,50.0,0.0,0.0,6.0,27.0,5.0,6.0,0.0,0.0,6.0,996.0,0.0,1972.0,1994.0,2008.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1393.0,730.5,0.0,3.0,724.0,0.0,1.0,0.0,810.5,0.0,1.0,2.0,576.0,2.0,2002.0,1795.0,1.0,1096.5,1.0,11700.0,80.0,0.0,70.0,166.0,0.0,8.0,74.0,6.0,7.0,0.0,0.0,7.0,1299.5,168.0,2001.0,2004.0,2009.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,4692.0,2065.0,508.0,8.0,5644.0,1127.0,3.0,2.0,2336.0,552.0,3.0,3.0,1418.0,4.0,2010.0,5642.0,2.0,1460.0,3.0,215245.0,313.0,572.0,190.0,1378.0,8300.0,12.0,547.0,9.0,10.0,738.0,480.0,14.0,6110.0,857.0,2010.0,2010.0,2010.0


In [16]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1095 entries, 1023 to 1126
Columns: 264 entries, BldgType_2fmCon to YrSold
dtypes: float64(11), int64(26), uint8(227)
memory usage: 567.8 KB


## Predict and create output for the test set