In [218]:
# sklearn packages
import sklearn.metrics
from sklearn.feature_selection import RFE, SelectKBest, f_regression, RFECV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler ,minmax_scale, PolynomialFeatures, StandardScaler

# Statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statistics as stats
import statsmodels.api as sm

# Geolocation
import geopy
from geopy import distance

import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats

# utility libraries 
from statsmodels.formula.api import ols
from scipy.stats import f_oneway, norm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy import stats
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations
import pickle

import warnings 

sns.set_style('whitegrid')

# filter warnings
warnings.filterwarnings("ignore")

# magic inline
%matplotlib inline
pd.set_option('display.max_columns', 300)

## Step 1: Read in hold out data, scalers, and best model

In [219]:
df = pd.read_csv('kc_house_data_test_features.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,0,1974300020,20140827T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
1,1,1974300020,20150218T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
2,2,3630020380,20141107T000000,3,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,98029,47.5472,-121.998,1470,1576
3,3,1771000290,20141203T000000,3,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,98077,47.7427,-122.071,1160,10565
4,4,5126310470,20150115T000000,4,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,98059,47.4863,-122.14,2830,7916


In [220]:
df.drop('date', axis = 1, inplace = True)

In [221]:
df.drop('Unnamed: 0', axis = 1, inplace = True)

In [222]:
df.drop(columns = ['id'], axis = 1, inplace = True)

In [223]:
import pickle

final_scaler = pd.read_pickle("transform.pickle")
final_model = pd.read_pickle("model.pickle")

In [224]:
df.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
1,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
2,3,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,98029,47.5472,-121.998,1470,1576
3,3,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,98077,47.7427,-122.071,1160,10565
4,4,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,98059,47.4863,-122.14,2830,7916


## Step 2: Feature Engineering for holdout set

Remember we have to perform the same transformations on our holdout data (feature engineering, extreme values, and scaling) that we performed on the original data.

In [225]:
df[df.bedrooms > 15]
#df['bedrooms'] = df['bedrooms'].replace(['33'],'3')

df['bedrooms'] = np.where((df['bedrooms'] + 1)/(df['bathrooms'] + 1) > 4, 
                                      df['bedrooms'].median(), 
                                      df['bedrooms'])

In [226]:
df["renovated"] = df.yr_renovated.apply(lambda x: 1 if x > 0 else 0)
renovated = df['renovated']

In [227]:
df["basement"] = df.sqft_basement.apply(lambda x: 1 if x != 0 else 0)

In [228]:
df['bathrooms'] = np.where((df['bathrooms'] + 1)/(df['bathrooms'] + 1) > 4,
                           df['bathrooms'].median(),
                           df['bathrooms'])

In [229]:
df['bedrooms']

0       4.0
1       4.0
2       3.0
3       3.0
4       4.0
       ... 
4318    3.0
4319    4.0
4320    2.0
4321    3.0
4322    2.0
Name: bedrooms, Length: 4323, dtype: float64

In [230]:
from geopy import Point

In [231]:
def haversine(lat1, lon1, lat2, lon2):
    MILES = 3959
    lat1, lon1, lat2, lon2 = map(np.deg2rad, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1 
    dlon = lon2 - lon1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    total_miles = MILES * c
    return total_miles

In [232]:
df['pike_place'] = haversine(47.6086, -122.3401, df['lat'].values, df['long'].values)

In [233]:
new_features = pd.DataFrame()
new_features['renovated'] = renovated
new_features['pike_place'] = df['pike_place']
new_features.shape

(4323, 2)

In [234]:
# Create dummy variables for zip code 
zip_dummies = pd.get_dummies(df['zipcode'].astype(str), dtype=int, drop_first=True)
df.drop(columns=['zipcode'],inplace=True)
new_cols = 'zip'+zip_dummies.columns
zip_dummies.columns = new_cols
zip_dummies.head()

Unnamed: 0,zip98002,zip98003,zip98004,zip98005,zip98006,zip98007,zip98008,zip98010,zip98011,zip98014,zip98019,zip98022,zip98023,zip98024,zip98027,zip98028,zip98029,zip98030,zip98031,zip98032,zip98033,zip98034,zip98038,zip98039,zip98040,zip98042,zip98045,zip98052,zip98053,zip98055,zip98056,zip98058,zip98059,zip98065,zip98070,zip98072,zip98074,zip98075,zip98077,zip98092,zip98102,zip98103,zip98105,zip98106,zip98107,zip98108,zip98109,zip98112,zip98115,zip98116,zip98117,zip98118,zip98119,zip98122,zip98125,zip98126,zip98133,zip98136,zip98144,zip98146,zip98148,zip98155,zip98166,zip98168,zip98177,zip98178,zip98188,zip98198,zip98199
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [235]:
df.bedrooms

0       4.0
1       4.0
2       3.0
3       3.0
4       4.0
       ... 
4318    3.0
4319    4.0
4320    2.0
4321    3.0
4322    2.0
Name: bedrooms, Length: 4323, dtype: float64

In [236]:
top_54 = ['sqft_living,pike_place', 'grade,pike_place', 'sqft_above,pike_place', 'sqft_living,grade', 'sqft_living15,pike_place', 'bathrooms,grade', 'bathrooms,sqft_living', 'sqft_living,sqft_above', 'grade,sqft_above', 'grade,sqft_basement', 'bathrooms,sqft_above', 'sqft_above,sqft_basement', 'bathrooms,pike_place', 'sqft_living,sqft_basement', 'lat,pike_place', 'sqft_living,view', 'sqft_living,waterfront', 'sqft_living,sqft_living15', 'view,sqft_above', 'sqft_basement,sqft_living15', 'view,grade', 'bathrooms,sqft_basement', 'sqft_living,basement', 'sqft_above,basement', 'waterfront,sqft_above', 'bathrooms,view', 'bathrooms,sqft_living15', 'sqft_living,lat', 'grade,basement', 'grade,long', 'bathrooms,waterfront', 'bedrooms,grade', 'view,sqft_living15', 'grade,lat', 'sqft_living,long', 'sqft_above,long', 'grade,sqft_living15', 'long,sqft_living15', 'waterfront,sqft_living15', 'bedrooms,sqft_living', 'bedrooms,pike_place', 'waterfront,grade', 'bedrooms,sqft_above', 'sqft_living15,basement', 'sqft_above,sqft_living15', 'sqft_above,lat', 'sqft_basement,pike_place', 'waterfront,lat', 'floors,pike_place', 'bedrooms,waterfront', 'sqft_living,yr_renovated', 'sqft_living,renovated', 'floors,sqft_basement', 'lat,sqft_living15']
for feature in top_54:
    feat1,feat2 = feature.split(',')[0],feature.split(',')[1]
    new_features[feat1+'_X_'+feat2] = df[feat1] * df[feat2]

In [237]:
df.bedrooms

0       4.0
1       4.0
2       3.0
3       3.0
4       4.0
       ... 
4318    3.0
4319    4.0
4320    2.0
4321    3.0
4322    2.0
Name: bedrooms, Length: 4323, dtype: float64

## Step 3: Predict the holdout set

In [238]:
X_all = pd.concat([df, zip_dummies, new_features], axis=1)
X_all.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,renovated,basement,pike_place,zip98002,zip98003,zip98004,zip98005,zip98006,zip98007,zip98008,zip98010,zip98011,zip98014,zip98019,zip98022,zip98023,zip98024,zip98027,zip98028,zip98029,zip98030,zip98031,zip98032,zip98033,zip98034,zip98038,zip98039,zip98040,zip98042,zip98045,zip98052,zip98053,zip98055,zip98056,zip98058,zip98059,zip98065,zip98070,zip98072,zip98074,zip98075,zip98077,zip98092,zip98102,zip98103,zip98105,zip98106,zip98107,zip98108,zip98109,zip98112,zip98115,zip98116,zip98117,zip98118,zip98119,zip98122,zip98125,zip98126,zip98133,zip98136,zip98144,zip98146,zip98148,zip98155,zip98166,zip98168,zip98177,zip98178,zip98188,zip98198,zip98199,renovated.1,pike_place.1,sqft_living_X_pike_place,grade_X_pike_place,sqft_above_X_pike_place,sqft_living_X_grade,sqft_living15_X_pike_place,bathrooms_X_grade,bathrooms_X_sqft_living,sqft_living_X_sqft_above,grade_X_sqft_above,grade_X_sqft_basement,bathrooms_X_sqft_above,sqft_above_X_sqft_basement,bathrooms_X_pike_place,sqft_living_X_sqft_basement,lat_X_pike_place,sqft_living_X_view,sqft_living_X_waterfront,sqft_living_X_sqft_living15,view_X_sqft_above,sqft_basement_X_sqft_living15,view_X_grade,bathrooms_X_sqft_basement,sqft_living_X_basement,sqft_above_X_basement,waterfront_X_sqft_above,bathrooms_X_view,bathrooms_X_sqft_living15,sqft_living_X_lat,grade_X_basement,grade_X_long,bathrooms_X_waterfront,bedrooms_X_grade,view_X_sqft_living15,grade_X_lat,sqft_living_X_long,sqft_above_X_long,grade_X_sqft_living15,long_X_sqft_living15,waterfront_X_sqft_living15,bedrooms_X_sqft_living,bedrooms_X_pike_place,waterfront_X_grade,bedrooms_X_sqft_above,sqft_living15_X_basement,sqft_above_X_sqft_living15,sqft_above_X_lat,sqft_basement_X_pike_place,waterfront_X_lat,floors_X_pike_place,bedrooms_X_waterfront,sqft_living_X_yr_renovated,sqft_living_X_renovated,floors_X_sqft_basement,lat_X_sqft_living15
0,4.0,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,0,1,8.324874,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.324874,18897.463346,66.59899,12820.30553,18160,16816.244916,20.0,5675.0,3495800,12320,5840,3850.0,1124200,20.812184,1657100,397.170568,0,0,4585400,0,1474600,0,1825.0,2270,1540,0,0.0,5050.0,108299.203,8,-977.928,0.0,32.0,0,381.6712,-277487.07,-188251.14,16160,-246926.82,0,9080.0,33.299495,0,6160.0,2020,3110800,73471.706,6077.157816,0.0,8.324874,0.0,0,0,730.0,96371.978
1,4.0,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,0,1,8.324874,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.324874,18897.463346,66.59899,12820.30553,18160,16816.244916,20.0,5675.0,3495800,12320,5840,3850.0,1124200,20.812184,1657100,397.170568,0,0,4585400,0,1474600,0,1825.0,2270,1540,0,0.0,5050.0,108299.203,8,-977.928,0.0,32.0,0,381.6712,-277487.07,-188251.14,16160,-246926.82,0,9080.0,33.299495,0,6160.0,2020,3110800,73471.706,6077.157816,0.0,8.324874,0.0,0,0,730.0,96371.978
2,3.0,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,47.5472,-121.998,1470,1576,0,1,16.500807,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16.500807,24256.186066,132.006455,19140.935943,11760,24256.186066,20.0,3675.0,1705200,9280,2480,2900.0,359600,41.252017,455700,784.567163,0,0,2160900,0,455700,0,775.0,1470,1160,0,0.0,3675.0,69894.384,8,-975.984,0.0,24.0,0,380.3776,-179337.06,-141517.68,11760,-179337.06,0,4410.0,49.502421,0,3480.0,1470,1705200,55154.752,5115.250123,0.0,33.001614,0.0,0,0,620.0,69894.384
3,3.0,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,47.7427,-122.071,1160,10565,0,1,15.575849,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15.575849,19937.086311,124.606789,16043.124141,10240,18067.984469,14.0,2240.0,1318400,8240,2000,1802.5,257500,27.257735,320000,743.633071,0,0,1484800,0,290000,0,437.5,1280,1030,0,0.0,2030.0,61110.656,8,-976.568,0.0,24.0,0,381.9416,-156250.88,-125733.13,9280,-141602.36,0,3840.0,46.727546,0,3090.0,1160,1194800,49174.981,3893.96217,0.0,15.575849,0.0,0,0,250.0,55381.532
4,4.0,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,47.4863,-122.14,2830,7916,0,0,12.590059,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12.590059,35629.865744,100.720469,35629.865744,22640,35629.865744,22.0,7782.5,8008900,22640,0,7782.5,0,34.622661,0,597.855298,0,0,8008900,0,0,0,0.0,0,0,0,0.0,7782.5,134386.229,0,-977.12,0.0,32.0,0,379.8904,-345656.2,-345656.2,22640,-345656.2,0,11320.0,50.360234,0,11320.0,0,8008900,134386.229,0.0,0.0,25.180117,0.0,0,0,0.0,134386.229


In [239]:
#X_all.drop('distance_from_center', axis = 1, inplace = True)
X_all.drop('zipnan', axis = 1, inplace = True)

KeyError: "['zipnan'] not found in axis"

In [240]:
X_all.isna().sum()

bedrooms                      0
bathrooms                     0
sqft_living                   0
sqft_lot                      0
floors                        0
                             ..
bedrooms_X_waterfront         0
sqft_living_X_yr_renovated    0
sqft_living_X_renovated       0
floors_X_sqft_basement        0
lat_X_sqft_living15           0
Length: 145, dtype: int64

In [241]:
X_all[X_all.bathrooms.isna()]

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,renovated,basement,pike_place,zip98002,zip98003,zip98004,zip98005,zip98006,zip98007,zip98008,zip98010,zip98011,zip98014,zip98019,zip98022,zip98023,zip98024,zip98027,zip98028,zip98029,zip98030,zip98031,zip98032,zip98033,zip98034,zip98038,zip98039,zip98040,zip98042,zip98045,zip98052,zip98053,zip98055,zip98056,zip98058,zip98059,zip98065,zip98070,zip98072,zip98074,zip98075,zip98077,zip98092,zip98102,zip98103,zip98105,zip98106,zip98107,zip98108,zip98109,zip98112,zip98115,zip98116,zip98117,zip98118,zip98119,zip98122,zip98125,zip98126,zip98133,zip98136,zip98144,zip98146,zip98148,zip98155,zip98166,zip98168,zip98177,zip98178,zip98188,zip98198,zip98199,renovated.1,pike_place.1,sqft_living_X_pike_place,grade_X_pike_place,sqft_above_X_pike_place,sqft_living_X_grade,sqft_living15_X_pike_place,bathrooms_X_grade,bathrooms_X_sqft_living,sqft_living_X_sqft_above,grade_X_sqft_above,grade_X_sqft_basement,bathrooms_X_sqft_above,sqft_above_X_sqft_basement,bathrooms_X_pike_place,sqft_living_X_sqft_basement,lat_X_pike_place,sqft_living_X_view,sqft_living_X_waterfront,sqft_living_X_sqft_living15,view_X_sqft_above,sqft_basement_X_sqft_living15,view_X_grade,bathrooms_X_sqft_basement,sqft_living_X_basement,sqft_above_X_basement,waterfront_X_sqft_above,bathrooms_X_view,bathrooms_X_sqft_living15,sqft_living_X_lat,grade_X_basement,grade_X_long,bathrooms_X_waterfront,bedrooms_X_grade,view_X_sqft_living15,grade_X_lat,sqft_living_X_long,sqft_above_X_long,grade_X_sqft_living15,long_X_sqft_living15,waterfront_X_sqft_living15,bedrooms_X_sqft_living,bedrooms_X_pike_place,waterfront_X_grade,bedrooms_X_sqft_above,sqft_living15_X_basement,sqft_above_X_sqft_living15,sqft_above_X_lat,sqft_basement_X_pike_place,waterfront_X_lat,floors_X_pike_place,bedrooms_X_waterfront,sqft_living_X_yr_renovated,sqft_living_X_renovated,floors_X_sqft_basement,lat_X_sqft_living15


In [242]:
X_all.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,renovated,basement,pike_place,zip98002,zip98003,zip98004,zip98005,zip98006,zip98007,zip98008,zip98010,zip98011,zip98014,zip98019,zip98022,zip98023,zip98024,zip98027,zip98028,zip98029,zip98030,zip98031,zip98032,zip98033,zip98034,zip98038,zip98039,zip98040,zip98042,zip98045,zip98052,zip98053,zip98055,zip98056,zip98058,zip98059,zip98065,zip98070,zip98072,zip98074,zip98075,zip98077,zip98092,zip98102,zip98103,zip98105,zip98106,zip98107,zip98108,zip98109,zip98112,zip98115,zip98116,zip98117,zip98118,zip98119,zip98122,zip98125,zip98126,zip98133,zip98136,zip98144,zip98146,zip98148,zip98155,zip98166,zip98168,zip98177,zip98178,zip98188,zip98198,zip98199,renovated.1,pike_place.1,sqft_living_X_pike_place,grade_X_pike_place,sqft_above_X_pike_place,sqft_living_X_grade,sqft_living15_X_pike_place,bathrooms_X_grade,bathrooms_X_sqft_living,sqft_living_X_sqft_above,grade_X_sqft_above,grade_X_sqft_basement,bathrooms_X_sqft_above,sqft_above_X_sqft_basement,bathrooms_X_pike_place,sqft_living_X_sqft_basement,lat_X_pike_place,sqft_living_X_view,sqft_living_X_waterfront,sqft_living_X_sqft_living15,view_X_sqft_above,sqft_basement_X_sqft_living15,view_X_grade,bathrooms_X_sqft_basement,sqft_living_X_basement,sqft_above_X_basement,waterfront_X_sqft_above,bathrooms_X_view,bathrooms_X_sqft_living15,sqft_living_X_lat,grade_X_basement,grade_X_long,bathrooms_X_waterfront,bedrooms_X_grade,view_X_sqft_living15,grade_X_lat,sqft_living_X_long,sqft_above_X_long,grade_X_sqft_living15,long_X_sqft_living15,waterfront_X_sqft_living15,bedrooms_X_sqft_living,bedrooms_X_pike_place,waterfront_X_grade,bedrooms_X_sqft_above,sqft_living15_X_basement,sqft_above_X_sqft_living15,sqft_above_X_lat,sqft_basement_X_pike_place,waterfront_X_lat,floors_X_pike_place,bedrooms_X_waterfront,sqft_living_X_yr_renovated,sqft_living_X_renovated,floors_X_sqft_basement,lat_X_sqft_living15
0,4.0,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,0,1,8.324874,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.324874,18897.463346,66.59899,12820.30553,18160,16816.244916,20.0,5675.0,3495800,12320,5840,3850.0,1124200,20.812184,1657100,397.170568,0,0,4585400,0,1474600,0,1825.0,2270,1540,0,0.0,5050.0,108299.203,8,-977.928,0.0,32.0,0,381.6712,-277487.07,-188251.14,16160,-246926.82,0,9080.0,33.299495,0,6160.0,2020,3110800,73471.706,6077.157816,0.0,8.324874,0.0,0,0,730.0,96371.978
1,4.0,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,0,1,8.324874,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.324874,18897.463346,66.59899,12820.30553,18160,16816.244916,20.0,5675.0,3495800,12320,5840,3850.0,1124200,20.812184,1657100,397.170568,0,0,4585400,0,1474600,0,1825.0,2270,1540,0,0.0,5050.0,108299.203,8,-977.928,0.0,32.0,0,381.6712,-277487.07,-188251.14,16160,-246926.82,0,9080.0,33.299495,0,6160.0,2020,3110800,73471.706,6077.157816,0.0,8.324874,0.0,0,0,730.0,96371.978
2,3.0,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,47.5472,-121.998,1470,1576,0,1,16.500807,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16.500807,24256.186066,132.006455,19140.935943,11760,24256.186066,20.0,3675.0,1705200,9280,2480,2900.0,359600,41.252017,455700,784.567163,0,0,2160900,0,455700,0,775.0,1470,1160,0,0.0,3675.0,69894.384,8,-975.984,0.0,24.0,0,380.3776,-179337.06,-141517.68,11760,-179337.06,0,4410.0,49.502421,0,3480.0,1470,1705200,55154.752,5115.250123,0.0,33.001614,0.0,0,0,620.0,69894.384
3,3.0,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,47.7427,-122.071,1160,10565,0,1,15.575849,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15.575849,19937.086311,124.606789,16043.124141,10240,18067.984469,14.0,2240.0,1318400,8240,2000,1802.5,257500,27.257735,320000,743.633071,0,0,1484800,0,290000,0,437.5,1280,1030,0,0.0,2030.0,61110.656,8,-976.568,0.0,24.0,0,381.9416,-156250.88,-125733.13,9280,-141602.36,0,3840.0,46.727546,0,3090.0,1160,1194800,49174.981,3893.96217,0.0,15.575849,0.0,0,0,250.0,55381.532
4,4.0,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,47.4863,-122.14,2830,7916,0,0,12.590059,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12.590059,35629.865744,100.720469,35629.865744,22640,35629.865744,22.0,7782.5,8008900,22640,0,7782.5,0,34.622661,0,597.855298,0,0,8008900,0,0,0,0.0,0,0,0,0.0,7782.5,134386.229,0,-977.12,0.0,32.0,0,379.8904,-345656.2,-345656.2,22640,-345656.2,0,11320.0,50.360234,0,11320.0,0,8008900,134386.229,0.0,0.0,25.180117,0.0,0,0,0.0,134386.229


In [244]:
#X_test_transformed_2 = final_scaler.transform(X_all)    # Scaling to test data

y_pred_2 = final_model.predict(X_all)

In [207]:
X_all.isna()

Unnamed: 0,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,renovated,basement,distance_from_center,pike_place,zip98002,zip98003,zip98004,zip98005,zip98006,zip98007,zip98008,zip98010,zip98011,zip98014,zip98019,zip98022,zip98023,zip98024,zip98027,zip98028,zip98029,zip98030,zip98031,zip98032,zip98033,zip98034,zip98038,zip98039,zip98040,zip98042,zip98045,zip98052,zip98053,zip98055,zip98056,zip98058,zip98059,zip98065,zip98070,zip98072,zip98074,zip98075,zip98077,zip98092,zip98102,zip98103,zip98105,zip98106,zip98107,zip98108,zip98109,zip98112,zip98115,zip98116,zip98117,zip98118,zip98119,zip98122,zip98125,zip98126,zip98133,zip98136,zip98144,zip98146,zip98148,zip98155,zip98166,zip98168,zip98177,zip98178,zip98188,zip98198,zip98199,renovated.1,pike_place.1,sqft_living_X_pike_place,grade_X_pike_place,sqft_above_X_pike_place,sqft_living_X_grade,sqft_living15_X_pike_place,bathrooms_X_grade,bathrooms_X_sqft_living,sqft_living_X_sqft_above,grade_X_sqft_above,grade_X_sqft_basement,bathrooms_X_sqft_above,sqft_above_X_sqft_basement,bathrooms_X_pike_place,sqft_living_X_sqft_basement,lat_X_pike_place,sqft_living_X_view,sqft_living_X_waterfront,sqft_living_X_sqft_living15,view_X_sqft_above,sqft_basement_X_sqft_living15,view_X_grade,bathrooms_X_sqft_basement,sqft_living_X_basement,sqft_above_X_basement,waterfront_X_sqft_above,bathrooms_X_view,bathrooms_X_sqft_living15,sqft_living_X_lat,grade_X_basement,grade_X_long,bathrooms_X_waterfront,bedrooms_X_grade,view_X_sqft_living15,grade_X_lat,sqft_living_X_long,sqft_above_X_long,grade_X_sqft_living15,long_X_sqft_living15,waterfront_X_sqft_living15,bedrooms_X_sqft_living,bedrooms_X_pike_place,waterfront_X_grade,bedrooms_X_sqft_above,sqft_living15_X_basement,sqft_above_X_sqft_living15,sqft_above_X_lat,sqft_basement_X_pike_place,waterfront_X_lat,floors_X_pike_place,bedrooms_X_waterfront,sqft_living_X_yr_renovated,sqft_living_X_renovated,floors_X_sqft_basement,lat_X_sqft_living15
count,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,146.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4324.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0,4323.0
mean,3.273973,2.058219,1968.041096,23869.52,1.517123,0.0,0.205479,3.30137,7.513699,1688.041096,280.0,1967.534247,123.191781,47.579773,-122.196822,1940.582192,12764.041096,0.061644,0.363014,11.881701,0.011563,0.008557,0.01457,0.002544,0.018964,0.006013,0.008326,0.002313,0.008788,0.004625,0.01087,0.009713,0.017345,0.003469,0.032377,0.012951,0.017345,0.012951,0.008788,0.004394,0.02197,0.018964,0.02914,0.002544,0.008788,0.026364,0.011332,0.022895,0.02197,0.011101,0.02012,0.011332,0.031684,0.027983,0.0037,0.008326,0.019426,0.014339,0.00555,0.017345,0.006475,0.041397,0.010638,0.02197,0.019889,0.010176,0.006707,0.011563,0.027058,0.016883,0.025439,0.024052,0.009482,0.016883,0.01827,0.015726,0.017345,0.015726,0.021739,0.014107,0.002544,0.012488,0.007401,0.008557,0.008557,0.009019,0.004163,0.009944,0.014801,0.032146,11.48068,26218.730249,90.690432,23980.666721,18094.448068,24751.381169,18.961485,5624.867742,5012297.0,16063.444136,2031.003932,4968.318066,461274.6,27.168601,689123.6,545.458071,613.81957,19.059681,4973862.0,476.608142,555580.9,1.771918,656.549676,859.312977,612.392551,15.198936,0.551758,5021.384802,103841.909098,2.929678,-966.00049,0.016597,27.164238,532.769836,375.967421,-266781.480225,-236597.634398,16595.844784,-248279.697853,16.955355,7952.693963,39.894949,0.056442,7021.897988,743.88619,4418281.0,92091.505444,2238.063529,0.307871,19.53811,0.020125,148298.6,74.24867,363.957784,96637.70482
std,0.906076,0.744544,809.120552,105884.8,0.542708,0.0,0.787119,0.678435,1.121798,688.052172,446.520649,30.562128,482.307061,0.131294,0.157343,666.706137,24268.302084,0.241335,0.482524,6.544927,0.106922,0.092118,0.119837,0.050379,0.136413,0.077319,0.090875,0.04804,0.093343,0.06786,0.103701,0.098087,0.130569,0.058803,0.177021,0.113076,0.130569,0.113076,0.093343,0.06615,0.146604,0.136413,0.168217,0.050379,0.093343,0.160235,0.10586,0.149588,0.146604,0.104786,0.140428,0.10586,0.175177,0.164944,0.060724,0.090875,0.138034,0.118896,0.074303,0.130569,0.080219,0.199229,0.102604,0.146604,0.139635,0.100372,0.081629,0.106922,0.162272,0.128846,0.157474,0.153228,0.096924,0.128846,0.133942,0.124428,0.130569,0.124428,0.145847,0.117947,0.050379,0.111065,0.085717,0.092118,0.092118,0.094552,0.064393,0.099236,0.12077,0.176409,7.002219,20560.136178,56.605338,19775.483859,10705.961303,18775.96287,8.551234,4330.027288,4835118.0,9740.803677,3573.607325,3817.119581,982090.3,19.456035,1484415.0,331.573117,2397.090667,256.298719,4033841.0,1874.310799,1113749.0,6.421438,1261.30879,1295.123962,943.482865,206.106835,2.078959,3161.506652,46000.681616,3.917315,142.458395,0.217853,9.458003,2024.995726,55.659463,118029.720598,109267.23964,8147.428745,88807.198201,217.97094,5170.567859,26.894622,0.713172,4598.650824,1078.937205,3623674.0,42580.887684,4781.976925,3.813497,13.329342,0.263132,884278.1,442.596607,630.127064,34625.603221
min,1.0,0.75,480.0,779.0,1.0,0.0,0.0,2.0,4.0,480.0,0.0,1902.0,0.0,47.282,-122.505,910.0,794.0,0.0,0.0,1.673198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001829,725.411804,7.012805,725.411804,290.0,831.518273,0.0,0.0,84100.0,290.0,0.0,0.0,0.0,0.0,0.0,47.692883,0.0,0.0,384400.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13783.932,0.0,-1590.81,0.0,0.0,0.0,47.5308,-979955.78,-979955.78,1620.0,-757508.22,0.0,0.0,0.0,0.0,0.0,0.0,384400.0,13783.932,0.0,0.0,1.291717,0.0,0.0,0.0,0.0,29468.104
25%,3.0,1.5,1395.0,4906.25,1.0,0.0,0.0,3.0,7.0,1132.5,0.0,1947.25,0.0,47.4963,-122.3295,1432.5,4620.5,0.0,0.0,6.246053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.544329,8864.059895,41.074564,7506.344102,10710.0,8294.328303,13.5,2877.5,1843200.0,9100.0,0.0,2345.0,0.0,10.856365,0.0,264.237039,0.0,0.0,2183280.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2852.5,69247.07,0.0,-1097.0325,0.0,21.0,0.0,332.99105,-333090.03,-307475.967,10850.0,-300533.28,0.0,4267.5,16.722675,0.0,3720.0,0.0,1890000.0,59175.8825,0.0,0.0,8.763741,0.0,0.0,0.0,0.0,70210.728
50%,3.0,2.25,1935.0,7779.0,1.5,0.0,0.0,3.0,7.0,1535.0,0.0,1975.5,0.0,47.60725,-122.2185,1865.0,7516.0,0.0,0.0,11.112199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.323203,21957.38594,80.985055,18424.644984,15200.0,20593.887299,18.0,4660.0,3348900.0,13040.0,0.0,4050.0,0.0,23.21043,0.0,491.808694,0.0,0.0,3724000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4425.0,94869.2,0.0,-977.16,0.0,24.0,0.0,380.1376,-244334.0,-206859.38,14400.0,-228063.33,0.0,6680.0,35.180819,0.0,5610.0,0.0,3126600.0,80550.47,0.0,0.0,16.00933,0.0,0.0,0.0,0.0,88738.554
75%,4.0,2.5,2462.5,11470.75,2.0,0.0,0.0,3.0,8.0,2157.5,570.0,1993.0,0.0,47.6852,-122.0735,2337.5,10650.25,0.0,1.0,16.433333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.201479,38025.547378,131.845855,35848.904323,22800.0,36912.35328,22.5,7175.0,6708100.0,20880.0,3200.0,6580.0,529700.0,40.012385,725000.0,770.14569,0.0,0.0,6528300.0,0.0,709450.0,0.0,900.0,1700.0,1180.0,0.0,0.0,6435.0,129643.059,7.0,-856.009,0.0,32.0,0.0,425.9646,-177500.157,-152410.0,20330.0,-180912.24,0.0,10720.0,58.137499,0.0,9660.0,1580.0,5941934.0,119729.1305,2216.575043,0.0,27.326156,0.0,0.0,0.0,600.0,117042.7475
max,7.0,4.25,4960.0,1164794.0,3.0,0.0,4.0,5.0,11.0,3680.0,2020.0,2012.0,2013.0,47.7723,-121.698,3920.0,198414.0,1.0,1.0,31.187982,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,45.93843,193032.312189,353.385458,142702.547637,94200.0,138043.890573,78.0,50125.0,64320400.0,94200.0,33720.0,50125.0,11436700.0,168.342133,19332800.0,2191.603042,28480.0,5480.0,48748500.0,21920.0,13178900.0,52.0,16860.0,7710.0,6420.0,5480.0,24.0,28140.0,381491.35,13.0,-121.888,4.5,96.0,17370.0,620.4848,-35347.52,-35347.52,74520.0,-75873.12,4140.0,61680.0,148.538685,11.0,48720.0,5790.0,48748500.0,381491.35,78840.232159,47.7336,66.641425,6.0,10380000.0,5190.0,5670.0,295227.126


In [None]:
# final_answers = final_model.predict(transformed_holdout)

In [159]:
X_all.isna().sum()

bedrooms                      4178
bathrooms                     4178
sqft_living                   4178
sqft_lot                      4178
floors                        4178
                              ... 
bedrooms_X_waterfront            1
sqft_living_X_yr_renovated       1
sqft_living_X_renovated          1
floors_X_sqft_basement           1
lat_X_sqft_living15              1
Length: 145, dtype: int64

In [119]:
len(X_all.columns)

146

In [204]:

X_all.columns = ['bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above',
 'sqft_basement',
 'yr_built',
 'yr_renovated',
 'lat',
 'long',
 'sqft_living15',
 'sqft_lot15',
 'renovated',
 'basement',
 'distance_from_center',
 'pike_place',
 'zip98002',
 'zip98003',
 'zip98004',
 'zip98005',
 'zip98006',
 'zip98007',
 'zip98008',
 'zip98010',
 'zip98011',
 'zip98014',
 'zip98019',
 'zip98022',
 'zip98023',
 'zip98024',
 'zip98027',
 'zip98028',
 'zip98029',
 'zip98030',
 'zip98031',
 'zip98032',
 'zip98033',
 'zip98034',
 'zip98038',
 'zip98039',
 'zip98040',
 'zip98042',
 'zip98045',
 'zip98052',
 'zip98053',
 'zip98055',
 'zip98056',
 'zip98058',
 'zip98059',
 'zip98065',
 'zip98070',
 'zip98072',
 'zip98074',
 'zip98075',
 'zip98077',
 'zip98092',
 'zip98102',
 'zip98103',
 'zip98105',
 'zip98106',
 'zip98107',
 'zip98108',
 'zip98109',
 'zip98112',
 'zip98115',
 'zip98116',
 'zip98117',
 'zip98118',
 'zip98119',
 'zip98122',
 'zip98125',
 'zip98126',
 'zip98133',
 'zip98136',
 'zip98144',
 'zip98146',
 'zip98148',
 'zip98155',
 'zip98166',
 'zip98168',
 'zip98177',
 'zip98178',
 'zip98188',
 'zip98198',
 'zip98199',
 'renovated',
 'pike_place',
 'sqft_living_X_pike_place',
 'grade_X_pike_place',
 'sqft_above_X_pike_place',
 'sqft_living_X_grade',
 'sqft_living15_X_pike_place',
 'bathrooms_X_grade',
 'bathrooms_X_sqft_living',
 'sqft_living_X_sqft_above',
 'grade_X_sqft_above',
 'grade_X_sqft_basement',
 'bathrooms_X_sqft_above',
 'sqft_above_X_sqft_basement',
 'bathrooms_X_pike_place',
 'sqft_living_X_sqft_basement',
 'lat_X_pike_place',
 'sqft_living_X_view',
 'sqft_living_X_waterfront',
 'sqft_living_X_sqft_living15',
 'view_X_sqft_above',
 'sqft_basement_X_sqft_living15',
 'view_X_grade',
 'bathrooms_X_sqft_basement',
 'sqft_living_X_basement',
 'sqft_above_X_basement',
 'waterfront_X_sqft_above',
 'bathrooms_X_view',
 'bathrooms_X_sqft_living15',
 'sqft_living_X_lat',
 'grade_X_basement',
 'grade_X_long',
 'bathrooms_X_waterfront',
 'bedrooms_X_grade',
 'view_X_sqft_living15',
 'grade_X_lat',
 'sqft_living_X_long',
 'sqft_above_X_long',
 'grade_X_sqft_living15',
 'long_X_sqft_living15',
 'waterfront_X_sqft_living15',
 'bedrooms_X_sqft_living',
 'bedrooms_X_pike_place',
 'waterfront_X_grade',
 'bedrooms_X_sqft_above',
 'sqft_living15_X_basement',
 'sqft_above_X_sqft_living15',
 'sqft_above_X_lat',
 'sqft_basement_X_pike_place',
 'waterfront_X_lat',
 'floors_X_pike_place',
 'bedrooms_X_waterfront',
 'sqft_living_X_yr_renovated',
 'sqft_living_X_renovated',
 'floors_X_sqft_basement',
 'lat_X_sqft_living15']

## Step 4: Export your predictions

In [246]:
pd.DataFrame(y_pred_2).to_csv('housing_pred_Mike_Rozenvasser')
# final_answer.to_csv('housing_preds_your_name.csv')