# Select Features Notebook
---

## This notebook is to help select features for future models.

If you want to see benchmarks after selecting, click [here](https://git.generalassemb.ly/laternader/project_2/blob/master/deliverables/code/4%20-%20Modeling-Benchmarks.ipynb) or [here for logged features](https://git.generalassemb.ly/laternader/project_2/blob/master/deliverables/code/4.5%20-%20Log-Benchmarks.ipynb).

##### Skeleton for executing AFTER mapping    
    df = pd.read_csv('./datasets/train_cleaned.csv')
    df_test = pd.read_csv('./datasets/test_cleaned.csv')
    df.drop(columns='Unnamed: 0', inplace=True)
    df_test.drop(columns='Unnamed: 0', inplace=True)
    X_features = ['col1', 'col2']
    X = df[X_features]
    X = pd.get_dummies(data=X, columns=['cat_col1', 'cat_col2'], drop_first=True)
    X_test = df_test[X_features]
    X_test = pd.get_dummies(data=X_test, columns=['cat_col1', 'cat_col2'], drop_first=True)
    y = df['target_col']
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    train_pred = lr.predict(X_train)   # This may not be necessary to get a completed model; I think I added it for additional metrics  
    SalePrice = lr.predict(X_test)
    print(SalePrice)

### Import Packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Import Original Dataframes `train.csv` and `test.csv` 

In [3]:
train = pd.read_csv('datasets/train.csv')
test = pd.read_csv('datasets/test.csv')

In [4]:
test.columns = [name.lower().replace(' ','_') for name in test.columns]
train.columns = [name.lower().replace(' ','_') for name in train.columns]

In [5]:
train.head()

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,saleprice
0,109,533352170,60,RL,,13517,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289.0,Gd,TA,CBlock,TA,TA,No,GLQ,533.0,Unf,0.0,192.0,725.0,GasA,Ex,Y,SBrkr,725,754,0,1479,0.0,0.0,2,1,3,1,Gd,6,Typ,0,,Attchd,1976.0,RFn,2.0,475.0,TA,TA,Y,0,44,0,0,0,0,,,,0,3,2010,WD,130500
1,544,531379050,60,RL,43.0,11492,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,132.0,Gd,TA,PConc,Gd,TA,No,GLQ,637.0,Unf,0.0,276.0,913.0,GasA,Ex,Y,SBrkr,913,1209,0,2122,1.0,0.0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1997.0,RFn,2.0,559.0,TA,TA,Y,0,74,0,0,0,0,,,,0,4,2009,WD,220000
2,153,535304180,20,RL,68.0,7922,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,Gd,CBlock,TA,TA,No,GLQ,731.0,Unf,0.0,326.0,1057.0,GasA,TA,Y,SBrkr,1057,0,0,1057,1.0,0.0,1,0,3,1,Gd,5,Typ,0,,Detchd,1953.0,Unf,1.0,246.0,TA,TA,Y,0,52,0,0,0,0,,,,0,1,2010,WD,109000
3,318,916386060,60,RL,73.0,9802,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,Unf,0.0,Unf,0.0,384.0,384.0,GasA,Gd,Y,SBrkr,744,700,0,1444,0.0,0.0,2,1,3,1,TA,7,Typ,0,,BuiltIn,2007.0,Fin,2.0,400.0,TA,TA,Y,100,0,0,0,0,0,,,,0,4,2010,WD,174000
4,255,906425045,50,RL,82.0,14235,Pave,,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1.5Fin,6,8,1900,1993,Gable,CompShg,Wd Sdng,Plywood,,0.0,TA,TA,PConc,Fa,Gd,No,Unf,0.0,Unf,0.0,676.0,676.0,GasA,TA,Y,SBrkr,831,614,0,1445,0.0,0.0,2,0,3,1,TA,6,Typ,0,,Detchd,1957.0,Unf,2.0,484.0,TA,TA,N,0,59,0,0,0,0,,,,0,3,2010,WD,138500


In [6]:
test.head()

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,GasA,Gd,N,FuseP,908,1020,0,1928,0,0,2,0,4,2,Fa,9,Typ,0,,Detchd,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,GasA,TA,Y,SBrkr,1967,0,0,1967,0,0,2,0,6,2,TA,10,Typ,0,,Attchd,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,GasA,Ex,Y,SBrkr,664,832,0,1496,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,968,968,GasA,TA,Y,SBrkr,968,0,0,968,0,0,1,0,2,1,TA,5,Typ,0,,Detchd,1935.0,Unf,2,480,Fa,TA,N,0,0,184,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,TA,TA,CBlock,Gd,TA,No,BLQ,609,Unf,0,785,1394,GasA,Gd,Y,SBrkr,1394,0,0,1394,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,TA,TA,Y,0,76,0,0,185,0,,,,0,7,2009,WD


# Data dictionary 

<i>This data dictionary is formatted in a way to help me understand what to select for my features. The numbers of nulls come from the original datasets that we were given and do not change. The only thing that changes if it is commented out or not to help me know what features I selected.</i>
    
    <column name> [Number of nulls] (Decision, if needed): small description (number of unique values)

In [1]:
# Data Dictionary
## column_name(Decision)[null values]: description(unique values)

 mssubclass(Add): building class (16)
# mszoning(Ignore)
# lotfrontage[160]: linear feet of street connected to property
# lotarea: lot size in sq ft
# street: type of road access to property (2)
# alley[820]: type of alley access (3)
# lot_shape(add): shape of property (4)
# landcontour(Ignore): flatness of property
# utilities(Add): (4)
# lotconfig(Maybe): (5)
# landslope(Ignore): slope of property(3)
 neighboorhood(Add): street names(25)
# condition1(Maybe): proximity to main(railroad)(9)
# condition2(Maybe): railroad(second)(9)
 bldgtype(Add): building type(5)
 housestyle(Add): style of dwelling(8)
# overallqual(Add): overall material and finish quality(10)
 overallcond(Add): overall condition rating(10)
 yearbuilt(Maybe): 
# yearremodadd: remodel date(same as yearbuilt if none)
# roofstyle: type of roof(6)
# roofmatl: roof material(8)
# exterior1st: exterior covering house(17)
# exterior2nd: second layer(17)
# masvnrtype[1]: masonry veneer type(5)
# masvnrarea[1]: masonry veneer area in sqft
# exterqual: exterior material quality(5)
# foundation: type of foundation(6)
# bsmtqual[25]: height of basement(6)
    ##Ex Excellent (100+ inches)
    ##Gd Good (90-99 inches)
    ##TA Typical (80-89 inches)
    ##Fa Fair (70-79 inches)
    ##Po Poor (<70 inches)
    ##NA No Basement
# bsmtcond[25]: general condition
# bsmtexposure[25]: walkout or garden level basement walls
# bsmtfintype1[25]: quality of basement finished area
# bsmtfinsf1[]: type 1 finished sqft
# bsmtfintype2[25]: second area
# bsmtfinsf2[]: type2 area
# bsmtunfsf[]: unfinished sqft area
# totalbsmtsf[]: total sqft of basement area
#  heating(Add): type of heating(6)
 heatingqc(Add): heating quality and condition(5)
 centralair: central air conditioning(replace yes/no to 1/0)
 electrical(Maybe)[1]: electrical system(5)
# 1stfloorsf: first floor square feet
# 2ndfloorsf: second floor sqft
# lowqualfinsf: low quality finished square feet (all floors)
# grlivarea: above grade (ground) living area sqft
# bsmtfullbath[2]: basement full bathrooms
# bsmthalfbath[2]: basement half bath
fullbath: full bathrooms 
halfbath: half bathrooms
 bedroom(add): number of bedrooms
# kitchen(add): number of kitchens
 kitchenqual(add): kitchen quality(5)
 totrmsabvgrd(add): total rooms above grade (no bathrooms)
 functional(add): home functionality rating(8)
 fireplaces(add)[422]: number of fireplaces
# fireplacequ[1000]: fireplace quality(6)
    ## check if nulls are no fireplace
# garagetype[44]: garage location(7)
# garageyrblt[45]: year garage built
# garagefinish[45]: interior finish of the garage(4)
 garagecars[](add): size of garage in car capacity
#  garagearea[](add): size of garage 
 garagequal[45]: garage quality(6)
 garagecond[45]: garage condition(6)
 paveddrive(add): paved driveway(3)
# wooddecksf: wood deck area in sqft
# openporchsf: open porch area in sqft
# 3ssnporch(ignore): enclosed porch area in sqft
# screenporch(ignore): screen porch area in sqft
# poolarea: pool area in sqft
# poolqc[874]: pool quality(5)
# fence[706]: fence quality(5)
# miscfeature[837]: miscellaneous feature not covered in categories(6)
# miscval: $value of miscellaneous feature 
 mosold(add): month sold
 yrsold(maybe): year sold
 saletype: type of sale(10)

### Feature Selection

The features selected below do not reflect the current features on final presentation. It is purely for organization purposes.

In [24]:
# ctrl + ? easy (un)comment

# Select the features you want to use
features = [
#             'id', 
#             'pid', 
#             'ms_subclass', 
#             'ms_zoning', 
#             'lot_frontage', 
#             'lot_area',
#             'street', 
#             'alley', 
#             'lot_shape', 
#             'land_contour', 
#             'utilities',
#             'lot_config', 
#             'land_slope', 
#             'neighborhood', 
#             'condition_1',
#             'condition_2', 
#             'bldg_type', 
#             'house_style', 
            'overall_qual',
#             'overall_cond', 
            'year_built', 
            'year_remod/add', 
#             'roof_style',
#             'roof_matl', 
#             'exterior_1st',
#             'exterior_2nd',
#             'mas_vnr_type',
            'mas_vnr_area', 
#             'exter_qual', 
#             'exter_cond', 
#             'foundation', 
#             'bsmt_qual',
#             'bsmt_cond', 
#             'bsmt_exposure',
#             'bsmtfin_type_1', 
#             'bsmtfin_sf_1',
#             'bsmtfin_type_2',
#             'bsmtfin_sf_2',
#             'bsmt_unf_sf',
            'total_bsmt_sf',
#             'heating', 
#             'heating_qc', 
#             'central_air', 
#             'electrical', 
            '1st_flr_sf',
#             '2nd_flr_sf',
#             'low_qual_fin_sf', 
            'gr_liv_area',
#             'bsmt_full_bath',
#             'bsmt_half_bath',
            'full_bath',
#             'half_bath',
#             'bedroom_abvgr',
#             'kitchen_abvgr',
#             'kitchen_qual',
#             'totrms_abvgrd',
#             'functional',
#             'fireplaces',
#             'fireplace_qu', 
#             'garage_type',
            'garage_yr_blt',
#             'garage_finish',
            'garage_cars',
            'garage_area',
#             'garage_qual',
#             'garage_cond',
#             'paved_drive',
#             'wood_deck_sf',
#             'open_porch_sf',
#             'enclosed_porch', 
#             '3ssn_porch',
#             'screen_porch',
#             'pool_area',
#             'pool_qc',
#             'fence',
#             'misc_feature',
#             'misc_val',
#             'mo_sold',
#             'yr_sold',
#             'sale_type'
            ]

### Mapping Tracker

In [None]:
# ## WHICH ONES HAVE BEEN MAPPED

#             'id', 
#             'pid', 
            'ms_subclass', 
#             'ms_zoning', 
#             'lot_frontage', 
#             'lot_area',
#             'street', 
#             'alley', 
#             'lot_shape', 
#             'land_contour', 
#             'utilities',
#             'lot_config', 
#             'land_slope', 
            'neighborhood', 
#             'condition_1',
#             'condition_2', 
#             'bldg_type', 
#             'house_style', 
            'overall_qual',
#             'overall_cond', 
#             'year_built', 
#             'year_remod/add', 
#             'roof_style',
#             'roof_matl', 
#             'exterior_1st',
#             'exterior_2nd',
#             'mas_vnr_type',
#             'mas_vnr_area', 
#             'exter_qual', 
#             'exter_cond', 
#             'foundation', 
#             'bsmt_qual',
#             'bsmt_cond', 
#             'bsmt_exposure',
#             'bsmtfin_type_1', 
#             'bsmtfin_sf_1',
#             'bsmtfin_type_2',
#             'bsmtfin_sf_2',
#             'bsmt_unf_sf',
#             'total_bsmt_sf',
            'heating', 
#             'heating_qc', 
#             'central_air', 
            'electrical', 
#             '1st_flr_sf',
#             '2nd_flr_sf',
#             'low_qual_fin_sf', 
#             'gr_liv_area',
#             'bsmt_full_bath',
#             'bsmt_half_bath',
#             'full_bath',
#             'half_bath',
#             'bedroom_abvgr',
#             'kitchen_abvgr',
            'kitchen_qual',
#             'totrms_abvgrd',
            'functional',
#             'fireplaces',
#             'fireplace_qu', 
#             'garage_type',
            'garage_yr_blt',
#             'garage_finish',
#             'garage_cars',
            'garage_area',
            'garage_qual',
#             'garage_cond',
#             'paved_drive',
#             'wood_deck_sf',
#             'open_porch_sf',
#             'enclosed_porch', 
#             '3ssn_porch',
#             'screen_porch',
#             'pool_area',
#             'pool_qc',
#             'fence',
#             'misc_feature',
#             'misc_val',
#             'mo_sold',
#             'yr_sold',
            'sale_type'

#### The code below helped determine categorical columns that needed to be converted into dummies or one-hot encode.

In [8]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.select_dtypes.html
# dummies_columns = list(train[X_features].select_dtypes(include='object'))

# Archived Features

These cells contain a history of the features I have used.

## FEATURES 1

In [24]:
features1 = ['ms_subclass',

  'neighborhood',
  'bldg_type',
  'house_style',
  'overall_cond',
  'year_built',

  'heating_qc',
  'central_air',
  'electrical',
  'full_bath',
  'half_bath',
  'bedroom_abvgr',
  'kitchen_qual',
  'totrms_abvgrd',
  'functional',
  'fireplaces',
  'garage_cars',
  'garage_qual',
  'garage_cond',
  'mo_sold',
  'yr_sold',
  'sale_type']
features1, len(features1)

(['ms_subclass',
  'neighborhood',
  'bldg_type',
  'house_style',
  'overall_cond',
  'year_built',
  'heating_qc',
  'central_air',
  'electrical',
  'full_bath',
  'half_bath',
  'bedroom_abvgr',
  'kitchen_qual',
  'totrms_abvgrd',
  'functional',
  'fireplaces',
  'garage_cars',
  'garage_qual',
  'garage_cond',
  'mo_sold',
  'yr_sold',
  'sale_type'],
 22)

## FEATURES 2

In [26]:
features2 = ['ms_subclass',
  'street',
  'alley',
  'neighborhood',
  'bldg_type',
  'house_style',
  'overall_qual',
  'overall_cond',
  'year_built',
  'bsmt_qual',
  'bsmt_cond',
  'heating_qc',
  'central_air',
  'electrical',
  'full_bath',
  'half_bath',
  'bedroom_abvgr',
  'kitchen_qual',
  'totrms_abvgrd',
  'functional',
  'fireplaces',
  'garage_cars',
  'garage_qual',
  'garage_cond',
  'mo_sold',
  'yr_sold',
  'sale_type']
features2, len(features2)

(['ms_subclass',
  'street',
  'alley',
  'neighborhood',
  'bldg_type',
  'house_style',
  'overall_qual',
  'overall_cond',
  'year_built',
  'bsmt_qual',
  'bsmt_cond',
  'heating_qc',
  'central_air',
  'electrical',
  'full_bath',
  'half_bath',
  'bedroom_abvgr',
  'kitchen_qual',
  'totrms_abvgrd',
  'functional',
  'fireplaces',
  'garage_cars',
  'garage_qual',
  'garage_cond',
  'mo_sold',
  'yr_sold',
  'sale_type'],
 27)

## FEATURES 3

In [29]:
features3 = features
features3, len(features3)

(['ms_subclass',
  'street',
  'alley',
  'neighborhood',
  'bldg_type',
  'house_style',
  'overall_qual',
  'overall_cond',
  'year_built',
  'year_remod/add',
  'bsmt_qual',
  'bsmt_cond',
  'total_bsmt_sf',
  'heating_qc',
  'central_air',
  'electrical',
  '1st_flr_sf',
  'gr_liv_area',
  'full_bath',
  'half_bath',
  'bedroom_abvgr',
  'kitchen_qual',
  'totrms_abvgrd',
  'functional',
  'fireplaces',
  'garage_yr_blt',
  'garage_cars',
  'garage_area',
  'garage_qual',
  'garage_cond',
  'mo_sold',
  'yr_sold',
  'sale_type'],
 33)

## FEATURES 4

In [25]:
features4 = features
features4, len(features4)

(['overall_qual',
  'year_built',
  'year_remod/add',
  'mas_vnr_area',
  'total_bsmt_sf',
  '1st_flr_sf',
  'gr_liv_area',
  'full_bath',
  'garage_yr_blt',
  'garage_cars',
  'garage_area'],
 11)

## CHECKING

The information in the next few cells are not indicative of the final work. They are only there to help me check for mistakes or what my selected dataframe looks like.

In [85]:
# Set X = dataframe containing features you want
X = train[features3]
X.head()

Unnamed: 0,ms_subclass,street,alley,neighborhood,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,bsmt_qual,bsmt_cond,total_bsmt_sf,heating_qc,central_air,electrical,1st_flr_sf,gr_liv_area,full_bath,half_bath,bedroom_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,garage_yr_blt,garage_cars,garage_area,garage_qual,garage_cond,mo_sold,yr_sold,sale_type
0,60,Pave,,Sawyer,1Fam,2Story,6,8,1976,2005,TA,TA,725.0,Ex,Y,SBrkr,725,1479,2,1,3,Gd,6,Typ,0,1976.0,2.0,475.0,TA,TA,3,2010,WD
1,60,Pave,,SawyerW,1Fam,2Story,7,5,1996,1997,Gd,TA,913.0,Ex,Y,SBrkr,913,2122,2,1,4,Gd,8,Typ,1,1997.0,2.0,559.0,TA,TA,4,2009,WD
2,20,Pave,,NAmes,1Fam,1Story,5,7,1953,2007,TA,TA,1057.0,TA,Y,SBrkr,1057,1057,1,0,3,Gd,5,Typ,0,1953.0,1.0,246.0,TA,TA,1,2010,WD
3,60,Pave,,Timber,1Fam,2Story,5,5,2006,2007,Gd,TA,384.0,Gd,Y,SBrkr,744,1444,2,1,3,TA,7,Typ,0,2007.0,2.0,400.0,TA,TA,4,2010,WD
4,50,Pave,,SawyerW,1Fam,1.5Fin,6,8,1900,1993,Fa,Gd,676.0,TA,Y,SBrkr,831,1445,2,0,3,TA,6,Typ,0,1957.0,2.0,484.0,TA,TA,3,2010,WD


In [86]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2051 entries, 0 to 2050
Data columns (total 33 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ms_subclass     2051 non-null   int64  
 1   street          2051 non-null   object 
 2   alley           140 non-null    object 
 3   neighborhood    2051 non-null   object 
 4   bldg_type       2051 non-null   object 
 5   house_style     2051 non-null   object 
 6   overall_qual    2051 non-null   int64  
 7   overall_cond    2051 non-null   int64  
 8   year_built      2051 non-null   int64  
 9   year_remod/add  2051 non-null   int64  
 10  bsmt_qual       1996 non-null   object 
 11  bsmt_cond       1996 non-null   object 
 12  total_bsmt_sf   2050 non-null   float64
 13  heating_qc      2051 non-null   object 
 14  central_air     2051 non-null   object 
 15  electrical      2051 non-null   object 
 16  1st_flr_sf      2051 non-null   int64  
 17  gr_liv_area     2051 non-null   i

In [87]:
X.isnull().sum()

ms_subclass          0
street               0
alley             1911
neighborhood         0
bldg_type            0
house_style          0
overall_qual         0
overall_cond         0
year_built           0
year_remod/add       0
bsmt_qual           55
bsmt_cond           55
total_bsmt_sf        1
heating_qc           0
central_air          0
electrical           0
1st_flr_sf           0
gr_liv_area          0
full_bath            0
half_bath            0
bedroom_abvgr        0
kitchen_qual         0
totrms_abvgrd        0
functional           0
fireplaces           0
garage_yr_blt      114
garage_cars          1
garage_area          1
garage_qual        114
garage_cond        114
mo_sold              0
yr_sold              0
sale_type            0
dtype: int64