In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

%run 00_Functions.ipynb
# Citation: Stackoverflow for the idea to keep functions in a separate notebook so they're accessible every through %run!

# Read in data

In [2]:
props = pd.read_csv('../datasets/train.csv')

In [3]:
props.head(10)

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type,SalePrice
0,109,533352170,60,RL,,13517,Pave,,IR1,Lvl,...,0,0,,,,0,3,2010,WD,130500
1,544,531379050,60,RL,43.0,11492,Pave,,IR1,Lvl,...,0,0,,,,0,4,2009,WD,220000
2,153,535304180,20,RL,68.0,7922,Pave,,Reg,Lvl,...,0,0,,,,0,1,2010,WD,109000
3,318,916386060,60,RL,73.0,9802,Pave,,Reg,Lvl,...,0,0,,,,0,4,2010,WD,174000
4,255,906425045,50,RL,82.0,14235,Pave,,IR1,Lvl,...,0,0,,,,0,3,2010,WD,138500
5,138,535126040,20,RL,137.0,16492,Pave,,IR1,Lvl,...,0,0,,,,0,6,2010,WD,190000
6,2827,908186070,180,RM,35.0,3675,Pave,,Reg,Lvl,...,0,0,,,,0,6,2006,New,140000
7,145,535154050,20,RL,,12160,Pave,,IR1,Lvl,...,0,0,,MnPrv,,0,5,2010,COD,142000
8,1942,535353130,20,RL,,15783,Pave,,Reg,Lvl,...,0,0,,MnPrv,Shed,400,6,2007,WD,112500
9,1956,535426130,60,RL,70.0,11606,Pave,,IR1,HLS,...,0,0,,,,0,9,2007,WD,135000


In [436]:
props.shape

(2051, 81)

In [6]:
props['Yr Sold'].describe()

count    2051.000000
mean     2007.775719
std         1.312014
min      2006.000000
25%      2007.000000
50%      2008.000000
75%      2009.000000
max      2010.000000
Name: Yr Sold, dtype: float64

In [437]:
props.set_index('Id', inplace = True)

# Check and categorize numeric vs categorical columns

## Numeric columns

In [438]:
initial_numeric_columns = props.select_dtypes(include = [np.number])

## Non-numeric (presumably categorical) columns

In [439]:
initial_nonnumeric_columns = props.select_dtypes(exclude = [np.number])

# Look at missing values

In [440]:
null_cols = [(column, props[column].isnull().sum()) for column in props.columns if props[column].isnull().sum() > 0]
null_cols_df = pd.DataFrame(null_cols, columns= ['Feature', 'Missing Values'])
null_cols_df

Unnamed: 0,Feature,Missing Values
0,Lot Frontage,330
1,Alley,1911
2,Mas Vnr Type,22
3,Mas Vnr Area,22
4,Bsmt Qual,55
5,Bsmt Cond,55
6,Bsmt Exposure,58
7,BsmtFin Type 1,55
8,BsmtFin SF 1,1
9,BsmtFin Type 2,56


In [441]:
# Check nulls
null_cols = [(column, props[column].isnull().sum()) for column in props.columns if props[column].isnull().sum() > 0]
null_cols_df = pd.DataFrame(null_cols, columns= ['Feature', 'Missing Values'])
null_cols_df

Unnamed: 0,Feature,Missing Values
0,Lot Frontage,330
1,Alley,1911
2,Mas Vnr Type,22
3,Mas Vnr Area,22
4,Bsmt Qual,55
5,Bsmt Cond,55
6,Bsmt Exposure,58
7,BsmtFin Type 1,55
8,BsmtFin SF 1,1
9,BsmtFin Type 2,56


### Investigate and address missing Fireplace Qu values 

In [442]:
NaN_fireplaces = props.loc[props['Fireplace Qu'].isnull(), ['Fireplaces', 'Fireplace Qu']]
NaN_fireplaces.shape

(1000, 2)

In [443]:
NaN_fireplaces[NaN_fireplaces['Fireplaces'] == 0].shape

(1000, 2)

From the above we can determine that every NaN in Fireplace Qu is aligned with an intentional "0" fireplaces, indicating that these should have been "NA" (per the 
data dictionary), not None
Decision: **Replace NaN with 'NA'**

In [444]:
# props['Fireplace Qu'].replace(np.NaN, 'NA', inplace = True)

replace_NaN_with_NA(props, ['Fireplace Qu'])

In [445]:
props['Fireplace Qu'].value_counts()

NA    1000
Gd     523
TA     407
Fa      59
Po      31
Ex      31
Name: Fireplace Qu, dtype: int64

### Investigate and address missing Lot Frontage values

In [446]:
# Cond 1 and 2 seem to be the closest category related to lot frontage (abutting streets/railroads)
NaN_LF = props.loc[props['Lot Frontage'].isnull(), ['Condition 1', 'Condition 2']]
NaN_LF['Condition 1'].value_counts()

Norm      290
Feedr      14
PosN        9
RRAe        5
RRNn        3
RRAn        3
Artery      2
RRNe        2
PosA        2
Name: Condition 1, dtype: int64

It is difficult to infer why these are missing and there are too many to lose ( > 10% of the data). 

**Decision: Will likely be imputed or not included in model**

### Investigate and address missing Alley values

In [447]:
NaN_Alley = props.loc[props['Alley'].isnull(), ['Condition 1', 'Condition 2']]
NaN_Alley['Condition 1'].value_counts()

Norm      1644
Feedr      105
Artery      58
RRAn        36
PosN        26
RRAe        21
PosA        12
RRNn         6
RRNe         3
Name: Condition 1, dtype: int64

It is difficult to infer why these are missing and there are too many to lose ( > 90% of the data). 

**Decision: Will likely not be included in model**

### Investigate and address missing Garage values

In [448]:
# Based on MissingNo matrix above, it seems very likely these missing values are on same rows

In [449]:
#NaN_garage = 
props.loc[(props['Garage Qual'].isnull()), ['Garage Cars', 'Garage Area']].value_counts()

Garage Cars  Garage Area
0.0          0.0            113
dtype: int64

Seems that these were deliberately marked as 0 car garages, which indicates No Garage or "NA" rather than NaN. Also, garage year is dependent on the year the house was built.

**Decision: Garage NaNs will be replaced with NA, with the exception of Garage Yr Blt which will in all likelihood be dropped**

In [450]:
replace_NaN_with_NA(props, ['Garage Type', 'Garage Finish', 'Garage Qual', 'Garage Cond'])

In [451]:
# Check nulls
null_cols = [(column, props[column].isnull().sum()) for column in props.columns if props[column].isnull().sum() > 0]
null_cols_df = pd.DataFrame(null_cols, columns= ['Feature', 'Missing Values'])
null_cols_df

Unnamed: 0,Feature,Missing Values
0,Lot Frontage,330
1,Alley,1911
2,Mas Vnr Type,22
3,Mas Vnr Area,22
4,Bsmt Qual,55
5,Bsmt Cond,55
6,Bsmt Exposure,58
7,BsmtFin Type 1,55
8,BsmtFin SF 1,1
9,BsmtFin Type 2,56


### Investigate and address missing Basement values

In [452]:
# None of the basement values had NA recorded, indicating that the NaN is likely intended to be NA but was left blank

replace_NaN_with_NA(props, ['Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2'])

### Investigate and address missing Pool QC values

In [453]:
props.loc[(props['Pool QC'].notnull()), ['Pool Area']]

Unnamed: 0_level_0,Pool Area
Id,Unnamed: 1_level_1
2351,519
1567,576
2499,800
2231,228
1499,480
2358,648
2736,738
1941,368
2279,561


This pool data seems too sparse to be of use. There are only 8 properties with pools.

**Decision: Pool QC and Pool Area to be dropped**

### Investigate and address missing Fence values

In [454]:
props['Fence'].value_counts()

MnPrv    227
GdPrv     83
GdWo      80
MnWw      10
Name: Fence, dtype: int64

There are no naturally occurring "NA", which would suggest a missing value was likely meant to be NA


**Decision: Replace NaN with NA, Fence may or may not be used in model**

In [455]:
replace_NaN_with_NA(props, ['Fence'])

### Investigate and address missing "MiscFeature" values

In [456]:
props['Misc Feature'].value_counts()

Shed    56
Gar2     4
Othr     3
TenC     1
Elev     1
Name: Misc Feature, dtype: int64

Misc Feature data is too sparse to be of use, with 62 of ~1900 rows of data


**Decision: No cleaning needed, will be dropped**

In [457]:
# Check nulls
null_cols = [(column, props[column].isnull().sum()) for column in props.columns if props[column].isnull().sum() > 0]
null_cols_df = pd.DataFrame(null_cols, columns= ['Feature', 'Missing Values'])
null_cols_df

Unnamed: 0,Feature,Missing Values
0,Lot Frontage,330
1,Alley,1911
2,Mas Vnr Type,22
3,Mas Vnr Area,22
4,BsmtFin SF 1,1
5,BsmtFin SF 2,1
6,Bsmt Unf SF,1
7,Total Bsmt SF,1
8,Bsmt Full Bath,2
9,Bsmt Half Bath,2


### Drop missing rows for certain columns only

**Which columns and why?** 
- Msn Vnr Type, Mas Vnr Area
    - Because "None" is already a value on this scale, and appears in the dataset already, indicating these were not mistaken but truly missing.  
- Bsmt SF fields
    - These are just a handful of missing numeric values, can't be interpreted as "NA" or "None"
- Garage Cars, Garage Area
    - There is only one row missing these values. Could be imputed but given it's a single line, will drop

In [458]:
props['Bsmt Cond'].value_counts()

TA    1834
Gd      89
Fa      65
NA      55
Po       5
Ex       3
Name: Bsmt Cond, dtype: int64

In [459]:
columns_with_NaN_rows_to_drop = ['Mas Vnr Type', 'Mas Vnr Area','Garage Cars', 'Garage Area', 'BsmtFin SF 1', 'BsmtFin SF 2', 'Total Bsmt SF', 'Bsmt Full Bath', 'Bsmt Half Bath']

props.dropna(axis=0, subset = columns_with_NaN_rows_to_drop, inplace = True)




In [460]:
# Check nulls
null_cols = [(column, props[column].isnull().sum()) for column in props.columns if props[column].isnull().sum() > 0]
null_cols_df = pd.DataFrame(null_cols, columns= ['Feature', 'Missing Values'])
null_cols_df

Unnamed: 0,Feature,Missing Values
0,Lot Frontage,324
1,Alley,1888
2,Garage Yr Blt,112
3,Pool QC,2017
4,Misc Feature,1961


### What we have left
- Lot frontage will likely be imputed in the model
- Alley will not be included in the model due to its sparse nature
- Garage Yr Blt will likely be dropped because it is dependent on the age of the house
- Pool QC will not be included in the model due to its sparse nature
- Misc Feature will not be included in the model due to its sparse nature

## A fresh look at the dataframe, with relevant NaNs cleaned 

In [461]:
# props.drop(['PID','Alley', 'Garage Yr Blt', 'Pool QC', 'Misc Feature'], axis = 1,  inplace = True)

### Up first, non-numeric columns

In [462]:
# Get the collection of numeric columns we'll look at in a bit
post_clean_numeric_columns = props.select_dtypes(include = [np.number])
# Same for the non-numeric
post_clean_nonnumeric_columns = props.select_dtypes(exclude = [np.number])

### Look through the number of unique values for each non-numeric value and see if we can make it boolean or simplify it

In [463]:
unique_values_per_nonnumeric_column = [(column, len(post_clean_nonnumeric_columns[column].unique())) for column in post_clean_nonnumeric_columns.columns]

In [464]:
unique_values_per_nonnumeric_column

[('MS Zoning', 7),
 ('Street', 2),
 ('Alley', 3),
 ('Lot Shape', 4),
 ('Land Contour', 4),
 ('Utilities', 3),
 ('Lot Config', 5),
 ('Land Slope', 3),
 ('Neighborhood', 28),
 ('Condition 1', 9),
 ('Condition 2', 8),
 ('Bldg Type', 5),
 ('House Style', 8),
 ('Roof Style', 6),
 ('Roof Matl', 6),
 ('Exterior 1st', 15),
 ('Exterior 2nd', 15),
 ('Mas Vnr Type', 4),
 ('Exter Qual', 4),
 ('Exter Cond', 5),
 ('Foundation', 6),
 ('Bsmt Qual', 6),
 ('Bsmt Cond', 6),
 ('Bsmt Exposure', 5),
 ('BsmtFin Type 1', 7),
 ('BsmtFin Type 2', 7),
 ('Heating', 5),
 ('Heating QC', 5),
 ('Central Air', 2),
 ('Electrical', 5),
 ('Kitchen Qual', 4),
 ('Functional', 8),
 ('Fireplace Qu', 6),
 ('Garage Type', 7),
 ('Garage Finish', 4),
 ('Garage Qual', 6),
 ('Garage Cond', 6),
 ('Paved Drive', 3),
 ('Pool QC', 5),
 ('Fence', 5),
 ('Misc Feature', 6),
 ('Sale Type', 9)]

In [465]:
possible_bools = [ (column, counts) for column, counts in unique_values_per_nonnumeric_column if (counts == 2) | (counts == 3)]
possible_bools

[('Street', 2),
 ('Alley', 3),
 ('Utilities', 3),
 ('Land Slope', 3),
 ('Central Air', 2),
 ('Paved Drive', 3)]

In [466]:
list_of_columns_to_binarize = []

## Change Street into a bool called Paved Street

In [467]:
# j = pd.DataFrame(columns=['A', 'B'], index= [1, 2, 3])
# j.iloc[:2, 0] = 'Hungry'
# j.iloc[2, 0] = 'Hippo'
# j.iloc[:1, 1] = 'Rock'
# j.iloc[1:, 1] = 'Star'
# ct = [('A', 'C'), ('B', None)]
# binarize_to_most_common_str_val(j, ct)
# j

In [468]:
props['Street'].value_counts()

Pave    2019
Grvl       7
Name: Street, dtype: int64

In [469]:
list_of_columns_to_binarize.append(('Street', 'Paved Street'))

## Change Utilities into a bool called Has All Utilities

Went this route because there is only one single row without all utilities

In [470]:
props['Utilities'].value_counts()

AllPub    2024
NoSeWa       1
NoSewr       1
Name: Utilities, dtype: int64

In [471]:
list_of_columns_to_binarize.append(('Utilities', 'Has All Utilities'))

## Change Land Slope into a bool called Gentle Slope

In [472]:
props['Land Slope'].value_counts()

Gtl    1928
Mod      88
Sev      10
Name: Land Slope, dtype: int64

In [473]:
list_of_columns_to_binarize.append(('Land Slope', 'Gentle Slope'))

## Transform Central Air into a bool


In [474]:
props['Central Air'].value_counts()

Y    1885
N     141
Name: Central Air, dtype: int64

In [475]:
list_of_columns_to_binarize.append(('Central Air', None))

## Transform Paved Drive into a bool

In [476]:
props['Paved Drive'].value_counts()

Y    1837
N     150
P      39
Name: Paved Drive, dtype: int64

In [477]:
list_of_columns_to_binarize.append(('Paved Drive', None))

In [478]:
list_of_columns_to_binarize

[('Street', 'Paved Street'),
 ('Utilities', 'Has All Utilities'),
 ('Land Slope', 'Gentle Slope'),
 ('Central Air', None),
 ('Paved Drive', None)]

In [479]:
binarize_to_most_common_str_val(props, list_of_columns_to_binarize)

In [480]:
post_clean_nonnumeric_columns.iloc[:, 20:].describe()

Unnamed: 0,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin Type 2,Heating,Heating QC,Central Air,Electrical,...,Fireplace Qu,Garage Type,Garage Finish,Garage Qual,Garage Cond,Paved Drive,Pool QC,Fence,Misc Feature,Sale Type
count,2026,2026,2026,2026,2026,2026,2026,2026,2026,2026,...,2026.0,2026,2026,2026,2026,2026,9,2026.0,65,2026
unique,6,6,6,5,7,7,5,5,2,5,...,6.0,7,4,6,6,3,4,5.0,5,9
top,PConc,TA,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,...,,Attchd,Unf,TA,TA,Y,Gd,,Shed,WD
freq,904,886,1813,1321,604,1727,1993,1043,1885,1844,...,990.0,1194,845,1809,1845,1837,4,1628.0,56,1768


## Explore numeric data 

In [481]:
post_clean_numeric_columns.shape

(2026, 38)

In [482]:
post_clean_numeric_columns.iloc[:, :19].describe()

Unnamed: 0,PID,MS SubClass,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath
count,2026.0,2026.0,1702.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0
mean,714551700.0,57.134748,68.960047,10038.427937,6.101185,5.567127,1971.415597,1983.98075,99.843534,442.010365,48.380553,566.524186,1056.915104,1162.434353,328.574531,5.580948,1496.589832,0.427443,0.064166
std,188704900.0,42.943214,23.218783,6707.696525,1.425042,1.10741,30.145229,21.03601,175.050567,460.792112,165.804215,444.362373,449.006257,391.544288,425.284319,51.379446,498.902093,0.522971,0.253039
min,526301100.0,20.0,21.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,0.0,0.0,0.0,334.0,0.0,0.0,334.0,0.0,0.0
25%,528477000.0,20.0,58.0,7500.0,5.0,5.0,1953.0,1964.0,0.0,0.0,0.0,220.0,793.0,879.25,0.0,0.0,1126.25,0.0,0.0
50%,535454100.0,50.0,68.0,9402.5,6.0,5.0,1974.0,1993.0,0.0,368.0,0.0,474.0,994.0,1092.0,0.0,0.0,1442.0,0.0,0.0
75%,907180100.0,70.0,80.0,11498.0,7.0,6.0,2000.0,2004.0,161.75,733.0,0.0,810.0,1315.5,1402.0,689.0,0.0,1728.0,1.0,0.0
max,924152000.0,190.0,313.0,159000.0,10.0,9.0,2010.0,2010.0,1600.0,5644.0,1474.0,2336.0,6110.0,5095.0,1862.0,1064.0,5642.0,3.0,2.0


In [483]:
post_clean_numeric_columns.iloc[:, 19:].describe()

Unnamed: 0,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,SalePrice
count,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,1914.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0
mean,1.57157,0.36772,2.843534,1.042942,6.434353,0.589339,1978.356322,1.773445,472.959526,94.153998,47.002468,22.548371,2.623396,16.715202,2.427443,52.21076,6.210267,2007.778875,180903.863771
std,0.547101,0.499406,0.826535,0.209955,1.559957,0.638025,24.898844,0.765623,216.184467,128.923298,66.076293,59.760393,25.383222,57.697759,38.014135,576.893785,2.74248,1.31486,79112.847055
min,0.0,0.0,0.0,0.0,2.0,0.0,1895.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,12789.0
25%,1.0,0.0,2.0,1.0,5.0,0.0,1961.0,1.0,317.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2007.0,129500.0
50%,2.0,0.0,3.0,1.0,6.0,1.0,1980.0,2.0,480.0,0.0,26.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,161000.0
75%,2.0,1.0,3.0,1.0,7.0,1.0,2002.0,2.0,576.0,168.0,70.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,213430.0
max,4.0,2.0,8.0,3.0,15.0,4.0,2010.0,5.0,1418.0,1424.0,547.0,432.0,508.0,490.0,800.0,17000.0,12.0,2010.0,611657.0


In [484]:
# PID, convert years to ages, month

### Convert numeric data to categorical data where appropriate

In [485]:
not_actually_numeric = ['PID', 'MS SubClass', 'Overall Qual', 'Overall Cond', 'Mo Sold']

convert_num_to_cat(props, not_actually_numeric)
# props['PID'] = props['PID'].astype(str)
# props['MS SubClass'] = props['MS SubClass'].astype(str)
# props['Overall Qual'] = props['Overall Qual'].astype(str)
# props['Overall Cond'] = props['Overall Cond'].astype(str)
# props['Mo Sold'] = props['Mo Sold'].astype(str)

In [486]:
props.dtypes

PID              object
MS SubClass      object
MS Zoning        object
Lot Frontage    float64
Lot Area          int64
                 ...   
Misc Val          int64
Mo Sold          object
Yr Sold           int64
Sale Type        object
SalePrice         int64
Length: 80, dtype: object

### Transform years into ages

In [487]:
props['Year Built'].max()

2010

In [488]:
list_of_year_columns = [
    ('Year Built', 'Years since built'),
    ('Year Remod/Add', 'Years since remodel'),
    ('Yr Sold', 'Years since last sold')
]

In [489]:
convert_years_to_ages(props, 2010, list_of_year_columns)

In [490]:
props['Years since built'].describe()

count    2026.000000
mean       38.584403
std        30.145229
min         0.000000
25%        10.000000
50%        36.000000
75%        57.000000
max       138.000000
Name: Years since built, dtype: float64

### Re-check our re-casted columns

In [491]:
post_clean_numeric_columns = props.select_dtypes(include = [np.number])
post_clean_nonnumeric_columns = props.select_dtypes(exclude = [np.number])

In [492]:
post_clean_numeric_columns.shape

(2026, 38)

In [493]:
post_clean_nonnumeric_columns.dtypes

PID               object
MS SubClass       object
MS Zoning         object
Alley             object
Lot Shape         object
Land Contour      object
Lot Config        object
Neighborhood      object
Condition 1       object
Condition 2       object
Bldg Type         object
House Style       object
Overall Qual      object
Overall Cond      object
Roof Style        object
Roof Matl         object
Exterior 1st      object
Exterior 2nd      object
Mas Vnr Type      object
Exter Qual        object
Exter Cond        object
Foundation        object
Bsmt Qual         object
Bsmt Cond         object
Bsmt Exposure     object
BsmtFin Type 1    object
BsmtFin Type 2    object
Heating           object
Heating QC        object
Electrical        object
Kitchen Qual      object
Functional        object
Fireplace Qu      object
Garage Type       object
Garage Finish     object
Garage Qual       object
Garage Cond       object
Pool QC           object
Fence             object
Misc Feature      object


In [494]:
post_clean_numeric_columns.dtypes

Lot Frontage             float64
Lot Area                   int64
Paved Street               int32
Has All Utilities          int32
Gentle Slope               int32
Years since built          int64
Years since remodel        int64
Mas Vnr Area             float64
BsmtFin SF 1             float64
BsmtFin SF 2             float64
Bsmt Unf SF              float64
Total Bsmt SF            float64
Central Air                int32
1st Flr SF                 int64
2nd Flr SF                 int64
Low Qual Fin SF            int64
Gr Liv Area                int64
Bsmt Full Bath           float64
Bsmt Half Bath           float64
Full Bath                  int64
Half Bath                  int64
Bedroom AbvGr              int64
Kitchen AbvGr              int64
TotRms AbvGrd              int64
Fireplaces                 int64
Garage Yr Blt            float64
Garage Cars              float64
Garage Area              float64
Paved Drive                int32
Wood Deck SF               int64
Open Porch

### Save our data in CSV form

In [495]:
# uncomment to repeat CSV creation
# props.to_csv('../datasets/model-ready-data.csv', index = True)