# House Prices - Advanced Regression Techniques

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


df = pd.read_csv('./train.csv')

# df.info() # 미친 ㅅㅂ 컬럼이 80개네
# 결측치 개수와 비율 정리
na_info = (
    df.isnull().sum()
      .to_frame("결측치개수")
      .assign(결측치비율=lambda x: (x["결측치개수"] / len(df) * 100).round(2))
      .query("결측치개수 > 0")
      .sort_values("결측치비율", ascending=False)
)

print(na_info)

#               결측치개수  결측치비율
# PoolQC         1453  99.52 # 수영장 퀄리티?
# MiscFeature    1406  96.30 # 기타 특성(비고)
# Alley          1369  93.77 # 이게 뭐임.
# Fence          1179  80.75 # 울타리
# 여기까지 drop
# MasVnrType      872  59.73 # 벽돌 유형(채워서 진행)
# FireplaceQu     690  47.26 # 벽난로 퀄리티
# LotFrontage     259  17.74 # 도로 접한 면적: 이거 결측값 0으로 채워
# GarageType       81   5.55 # 차고
# GarageYrBlt      81   5.55
# GarageFinish     81   5.55
# GarageQual       81   5.55
# GarageCond       81   5.55
# BsmtFinType2     38   2.60 # 지하실 # No Bsmt
# BsmtExposure     38   2.60
# BsmtFinType1     37   2.53
# BsmtCond         37   2.53
# BsmtQual         37   2.53
# MasVnrArea        8   0.55 # 벽돌외장처리: 0으로 채워.
# Electrical        1   0.07 # 이거는 최빈값으로 채우고

# df[df['BsmtFinType2'].isna()][['BsmtFinType2', 'BsmtExposure', 'BsmtFinType1', 'BsmtCond']]

              결측치개수  결측치비율
PoolQC         1453  99.52
MiscFeature    1406  96.30
Alley          1369  93.77
Fence          1179  80.75
MasVnrType      872  59.73
FireplaceQu     690  47.26
LotFrontage     259  17.74
GarageType       81   5.55
GarageYrBlt      81   5.55
GarageFinish     81   5.55
GarageQual       81   5.55
GarageCond       81   5.55
BsmtFinType2     38   2.60
BsmtExposure     38   2.60
BsmtFinType1     37   2.53
BsmtCond         37   2.53
BsmtQual         37   2.53
MasVnrArea        8   0.55
Electrical        1   0.07


결측값 채우기

In [None]:
def fill_missing_values(df_o: pd.DataFrame) -> pd.DataFrame:
    df = df_o.copy()
    
    # 1) Drop 대상
    drop_cols = ['PoolQC', 'MiscFeature', 'Alley', 'Fence']
    df = df.drop(columns=drop_cols, errors='ignore')
    
    # 2) 벽돌 외장
    df['MasVnrType'] = df['MasVnrType'].fillna('None')
    df['MasVnrArea'] = df['MasVnrArea'].fillna(0)
    
    # 3) 벽난로
    df['FireplaceQu'] = df['FireplaceQu'].fillna('None')
    
    # 4) LotFrontage → 0으로
    df['LotFrontage'] = df['LotFrontage'].fillna(0)
    
    # 5) Garage 관련
    garage_cat = ['GarageType','GarageFinish','GarageQual','GarageCond']
    for col in garage_cat:
        df[col] = df[col].fillna('NoGarage')
    df['GarageYrBlt'] = df['GarageYrBlt'].fillna(0)
    
    # 6) Basement 관련
    bsmt_cat = ['BsmtFinType2','BsmtExposure','BsmtFinType1','BsmtCond','BsmtQual']
    for col in bsmt_cat:
        df[col] = df[col].fillna('NoBsmt')
    
    # 7) Electrical → 최빈값
    if df['Electrical'].isnull().any():
        mode_val = df['Electrical'].mode()[0]
        df['Electrical'] = df['Electrical'].fillna(mode_val)
    
    return df


df = fill_missing_values(df)

na_info = (
    df.isnull().sum()
      .to_frame("결측치개수")
      .assign(결측치비율=lambda x: (x["결측치개수"] / len(df) * 100).round(2))
      .query("결측치개수 > 0")
      .sort_values("결측치비율", ascending=False)
)

df

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,Reg,Lvl,AllPub,Inside,...,0,0,0,0,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,Reg,Lvl,AllPub,FR2,...,0,0,0,0,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,IR1,Lvl,AllPub,Inside,...,0,0,0,0,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,IR1,Lvl,AllPub,Corner,...,272,0,0,0,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,IR1,Lvl,AllPub,FR2,...,0,0,0,0,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,Reg,Lvl,AllPub,Inside,...,0,0,0,0,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,Reg,Lvl,AllPub,Inside,...,0,0,0,0,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,Reg,Lvl,AllPub,Inside,...,0,0,0,0,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,Reg,Lvl,AllPub,Inside,...,112,0,0,0,0,4,2010,WD,Normal,142125


원핫인코딩

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# num, cat 구분
# 숫자형 컬럼
num_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 범주형 컬럼
cat_cols = df.select_dtypes(include=['object']).columns.tolist()

print("숫자형:", len(num_cols), num_cols[:10])   # 앞 10개만 확인
print("범주형:", len(cat_cols), cat_cols[:10])




ohe = OneHotEncoder(sparse_output=False, drop=None)

cat_encoded = ohe.fit_transform(df[cat_cols])

# 인코딩 결과 → DataFrame 변환
cat_encoded_df = pd.DataFrame(cat_encoded, columns=ohe.get_feature_names_out(cat_cols), index=df.index)

# 숫자형 + 인코딩된 범주형 합치기
df_encoded = pd.concat([df[num_cols], cat_encoded_df], axis=1)


print(df_encoded.shape)

X = df_encoded.drop(columns=['SalePrice'])

y = np.log1p(df['SalePrice'])

X_train, X_test, y_train, y_test = train_test_split(X, y)




숫자형: 38 ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1']
범주형: 39 ['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2']
(1460, 287)


LinearRegression 

In [4]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
print('train score: ', lr.score(X_train, y_train))
print('test score: ', lr.score(X_test, y_test))

from sklearn.linear_model import Ridge, Lasso

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
print('train score: ', ridge.score(X_train, y_train))
print('test score: ', ridge.score(X_test, y_test))

train score:  0.9516024255163575
test score:  0.8654227338684897
train score:  0.9425742905133341
test score:  0.8643650089032844


In [5]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_jobs=-1, )

scores = cross_validate(rf, X_train, y_train,scoring="neg_root_mean_squared_error",  return_train_score=True, cv=5)

# #      훈련셋의 일부                      검증셋(훈련셋의 일부)
print(np.mean(-scores['train_score']), np.mean(-scores['test_score']))

# # 진짜 학습 시키기
rf.fit(X_train, y_train)
print(rf.score(X_train, y_train))
print(rf.score(X_test, y_test))


# # 트리(100개)들의 특성별 중요도 수치화
# # print(rf.feature_importances_)
# 0.08979389468885608 0.1520311633457924

0.054109350010370295 0.14548079984570156
0.9819652061081366
0.834566533227729


In [21]:
from scipy.stats import randint, uniform

# RandomForest
params_rf = {
    "n_estimators": randint(300, 1200),
    "max_depth": randint(10, 40),          # None은 randint로 못 줌, 일단 수치 제한
    "min_samples_leaf": randint(1, 10),
    "min_samples_split": randint(2, 20),
    "max_features": uniform(0.5, 0.5),     # 0.5~1.0
    "bootstrap": [True, False]
}

params_hgb = {
    "learning_rate": uniform(0.03, 0.12-0.03),   # 0.03~0.12
    "max_iter": randint(400, 1201),              # 상한 포함하려면 +1
    "max_leaf_nodes": randint(15, 64),           # 15~63
    "min_samples_leaf": randint(5, 31),          # 5~30
    "l2_regularization": uniform(0.0, 0.3),      # 0.0~0.3
    "max_bins": randint(128, 256),               # 128~255
    "early_stopping": [True],
    "validation_fraction": uniform(0.1, 0.2-0.1) # 0.1~0.2
}

# XGBoost
params_xgb = {
    "n_estimators": randint(800, 2001),     # 800~2000
    "learning_rate": uniform(0.03, 0.17),   # 0.03~0.20
    "max_depth": randint(3, 11),            # 3~10
    "subsample": uniform(0.6, 0.4),         # 0.6~1.0
    "colsample_bytree": uniform(0.6, 0.4),  # 0.6~1.0
    "min_child_weight": randint(1, 11),     # 1~10
    "gamma": uniform(0.0, 5.0),             # 0~5
    "reg_lambda": uniform(0.0, 10.0),       # 0~10
    "reg_alpha": uniform(0.0, 2.0)          # 0~2
}

In [None]:
from sklearn.model_selection import RandomizedSearchCV

gs = RandomizedSearchCV(
    RandomForestRegressor(random_state=42), # rf
    params_rf,
    n_iter = 100,
    n_jobs = -1,
    random_state=42
    )

gs.fit(X_train, y_train)
print(gs.best_params_)

# {'bootstrap': True, 'max_depth': 27, 'max_features': np.float64(0.5804040257087493), 'min_samples_leaf': 2, 'min_samples_split': 3, 'n_estimators': 1121}

{'bootstrap': True, 'max_depth': 27, 'max_features': np.float64(0.5804040257087493), 'min_samples_leaf': 2, 'min_samples_split': 3, 'n_estimators': 1121}


In [None]:
best_raw = {'bootstrap': True, 'max_depth': 27,
            'max_features': np.float64(0.5804040257087493),
            'min_samples_leaf': 2, 'min_samples_split': 3,
            'n_estimators': 1121}

# numpy → 파이썬 기본형
best_params = {
    'bootstrap': bool(best_raw['bootstrap']),
    'max_depth' : int(best_raw['max_depth']),
    'max_features': float(best_raw['max_features']),
    'min_samples_leaf': int(best_raw['min_samples_leaf']),
    'min_samples_split': int(best_raw['min_samples_split']),
    'n_estimators': int(best_raw['n_estimators']),
    'n_jobs': -1,
    'random_state': 42,
}

rf = RandomForestRegressor(**best_params)

scores = cross_validate(rf, X_train, y_train,scoring="neg_root_mean_squared_error",  return_train_score=True, cv=5)

# #      훈련셋의 일부                      검증셋(훈련셋의 일부)
print(np.mean(-scores['train_score']), np.mean(-scores['test_score']))

# # 진짜 학습 시키기
rf.fit(X_train, y_train)
print(rf.score(X_train, y_train)) # 0.975628325040225
print(rf.score(X_test, y_test)) # 0.8502624511660225



0.06308621729312208 0.13924146903979348
0.9756283250402253
0.8502624511660225


### HGB

In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor

hgb = HistGradientBoostingRegressor()
scores = cross_validate(hgb, X_train, y_train, return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))
# 0.9900170923050708 0.8815417009129604

hgb.fit(X_train, y_train)
print(hgb.score(X_train, y_train)) # 0.9901513072880985
print(hgb.score(X_test, y_test)) # 0.8688289758306995

0.9900170923050708 0.8815417009129604
0.9901513072880985
0.8688289758306995


In [None]:
search = RandomizedSearchCV(
    HistGradientBoostingRegressor(random_state=42),
    param_distributions=params_hgb,
    n_iter=30, cv=5, scoring="neg_root_mean_squared_error",
    n_jobs=-1, random_state=42, verbose=1
)

search.fit(X_train, y_train)
print(search.best_params_)
# Fitting 5 folds for each of 30 candidates, totalling 150 fits
# {'early_stopping': True, 'l2_regularization': np.float64(0.04442607898601997), 'learning_rate': np.float64(0.11979664365440476), 'max_bins': 186, 'max_iter': 512, 'max_leaf_nodes': 16, 'min_samples_leaf': 6, 'validation_fraction': np.float64(0.14110370133182315)}

In [None]:
best_params = {
    "early_stopping": True,
    "l2_regularization": float(0.04442607898601997),
    "learning_rate": float(0.11979664365440476),
    "max_bins": 186,
    "max_iter": 512,
    "max_leaf_nodes": 16,
    "min_samples_leaf": 6,
    "validation_fraction": float(0.14110370133182315),
}

hgb = HistGradientBoostingRegressor(random_state=42, **best_params)
hgb.fit(X_train, y_train)
print("train:", hgb.score(X_train, y_train)) # train: 0.9422398250810949
print("test :", hgb.score(X_test, y_test)) # test : 0.8488937101740044

train: 0.9422398250810949
test : 0.8488937101740044


In [20]:
from xgboost import XGBRegressor

xgb = XGBRegressor()
scores = cross_validate(xgb, X_train, y_train, return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))
# 0.9900170923050708 0.8815417009129604

xgb.fit(X_train, y_train)
print(xgb.score(X_train, y_train)) # 0.9901513072880985
print(xgb.score(X_test, y_test)) # 0.8688289758306995

0.999931099568545 0.8702618937321709
0.9997896573836578
0.8365975189010924


In [31]:
search = RandomizedSearchCV(
    XGBRegressor(random_state=42),
    param_distributions=params_xgb,
    n_iter=30, cv=5, scoring="neg_root_mean_squared_error",
    n_jobs=-1, random_state=42, verbose=1
)

search.fit(X_train, y_train)
print(search.best_params_)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
{'colsample_bytree': np.float64(0.8654007076432223), 'gamma': np.float64(0.025307919231093434), 'learning_rate': np.float64(0.05733736874097477), 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 1621, 'reg_alpha': np.float64(0.8968482859724947), 'reg_lambda': np.float64(9.944574626108206), 'subsample': np.float64(0.6703701010709381)}


In [None]:
best_params = {'colsample_bytree': np.float64(0.8654007076432223), 'gamma': np.float64(0.025307919231093434), 'learning_rate': np.float64(0.05733736874097477), 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 1621, 'reg_alpha': np.float64(0.8968482859724947), 'reg_lambda': np.float64(9.944574626108206), 'subsample': np.float64(0.6703701010709381)}
xgb = XGBRegressor(random_state=42, **best_params)
xgb.fit(X_train, y_train)
print("train:", xgb.score(X_train, y_train)) # train: 0.9533430408898599
print("test :", xgb.score(X_test, y_test)) # test : 0.8836256427243503

train: 0.9533430408898599
test : 0.8836256427243503


In [24]:
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# 준비: 너가 만든 최종 모델 인스턴스들로 채워라
models = {
    "rf": rf,
    "hgb": hgb,
    "xgb": xgb,
}

kf = KFold(n_splits=5, shuffle=True, random_state=42)

results = {}
for name, model in models.items():
    # 1) CV log RMSE (학습은 내부에서 다시 함)
    cv_scores = -cross_val_score(model, X_train, y_train,
                                 scoring="neg_root_mean_squared_error",
                                 cv=kf, n_jobs=-1)
    cv_mean, cv_std = cv_scores.mean(), cv_scores.std()

    # 2) 홀드아웃 log RMSE (현재 split 기준)
    model.fit(X_train, y_train)
    holdout = rmse(y_test, model.predict(X_test))

    results[name] = {"cv_logRMSE_mean": cv_mean,
                     "cv_logRMSE_std": cv_std,
                     "holdout_logRMSE": holdout}

for k,v in results.items():
    print(f"{k:>3} | CV {v['cv_logRMSE_mean']:.4f} ±{v['cv_logRMSE_std']:.4f} | Holdout {v['holdout_logRMSE']:.4f}")

 rf | CV 0.1392 ±0.0134 | Holdout 0.1594
hgb | CV 0.1338 ±0.0148 | Holdout 0.1601
xgb | CV 0.1352 ±0.0111 | Holdout 0.1557


테스트 전처리

In [37]:
final_test = pd.read_csv('./Test Data.csv')
final_test


ft = fill_missing_values(final_test)

def encode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """
    DataFrame을 받아서:
    - 숫자형 컬럼은 그대로 두고
    - 범주형 컬럼은 원핫인코딩(OHE)하여 합친 뒤
    - 인코딩된 DataFrame을 반환
    """
    # 숫자형, 범주형 구분
    num_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    cat_cols = df.select_dtypes(include=['object']).columns.tolist()

    # 원핫인코딩
    ohe = OneHotEncoder(sparse_output=False, drop=None, handle_unknown="ignore")
    cat_encoded = ohe.fit_transform(df[cat_cols])

    # OHE 결과 DataFrame
    cat_encoded_df = pd.DataFrame(
        cat_encoded,
        columns=ohe.get_feature_names_out(cat_cols),
        index=df.index
    )

    # 숫자형 + 인코딩 결과 합치기
    df_encoded = pd.concat([df[num_cols], cat_encoded_df], axis=1)

    return df_encoded

ft = encode_dataframe(ft)

# 결측값 있는 수치형 컬럼
fill_zero_cols = [
    "BsmtFullBath", "BsmtHalfBath",
    "BsmtFinSF1", "BsmtFinSF2", "BsmtUnfSF", "TotalBsmtSF",
    "GarageCars", "GarageArea"
]

ft[fill_zero_cols] = ft[fill_zero_cols].fillna(0)

ft.columns.unique() # 277
df.columns.unique()

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
       'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
       'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd',
       'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt',
       'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond',
       'PavedDrive', 'WoodD

In [34]:
xgb = XGBRegressor(random_state=42, **best_params)
xgb.fit(X_train, y_train)
xgb.predict(ft)

ValueError: feature_names mismatch: ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold', 'MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RH', 'MSZoning_RL', 'MSZoning_RM', 'Street_Grvl', 'Street_Pave', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LotShape_Reg', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'Utilities_AllPub', 'Utilities_NoSeWa', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LotConfig_Inside', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blmngtn', 'Neighborhood_Blueste', 'Neighborhood_BrDale', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber', 'Neighborhood_Veenker', 'Condition1_Artery', 'Condition1_Feedr', 'Condition1_Norm', 'Condition1_PosA', 'Condition1_PosN', 'Condition1_RRAe', 'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn', 'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm', 'Condition2_PosA', 'Condition2_PosN', 'Condition2_RRAe', 'Condition2_RRAn', 'Condition2_RRNn', 'BldgType_1Fam', 'BldgType_2fmCon', 'BldgType_Duplex', 'BldgType_Twnhs', 'BldgType_TwnhsE', 'HouseStyle_1.5Fin', 'HouseStyle_1.5Unf', 'HouseStyle_1Story', 'HouseStyle_2.5Fin', 'HouseStyle_2.5Unf', 'HouseStyle_2Story', 'HouseStyle_SFoyer', 'HouseStyle_SLvl', 'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel', 'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed', 'RoofMatl_ClyTile', 'RoofMatl_CompShg', 'RoofMatl_Membran', 'RoofMatl_Metal', 'RoofMatl_Roll', 'RoofMatl_Tar&Grv', 'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng', 'Exterior1st_AsphShn', 'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock', 'Exterior1st_CemntBd', 'Exterior1st_HdBoard', 'Exterior1st_ImStucc', 'Exterior1st_MetalSd', 'Exterior1st_Plywood', 'Exterior1st_Stone', 'Exterior1st_Stucco', 'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng', 'Exterior1st_WdShing', 'Exterior2nd_AsbShng', 'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn', 'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_ImStucc', 'Exterior2nd_MetalSd', 'Exterior2nd_Other', 'Exterior2nd_Plywood', 'Exterior2nd_Stone', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'Exterior2nd_Wd Shng', 'MasVnrType_BrkCmn', 'MasVnrType_BrkFace', 'MasVnrType_None', 'MasVnrType_Stone', 'ExterQual_Ex', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA', 'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_Po', 'ExterCond_TA', 'Foundation_BrkTil', 'Foundation_CBlock', 'Foundation_PConc', 'Foundation_Slab', 'Foundation_Stone', 'Foundation_Wood', 'BsmtQual_Ex', 'BsmtQual_Fa', 'BsmtQual_Gd', 'BsmtQual_NoBsmt', 'BsmtQual_TA', 'BsmtCond_Fa', 'BsmtCond_Gd', 'BsmtCond_NoBsmt', 'BsmtCond_Po', 'BsmtCond_TA', 'BsmtExposure_Av', 'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No', 'BsmtExposure_NoBsmt', 'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_GLQ', 'BsmtFinType1_LwQ', 'BsmtFinType1_NoBsmt', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf', 'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_GLQ', 'BsmtFinType2_LwQ', 'BsmtFinType2_NoBsmt', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf', 'Heating_Floor', 'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'Heating_OthW', 'Heating_Wall', 'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_Po', 'HeatingQC_TA', 'CentralAir_N', 'CentralAir_Y', 'Electrical_FuseA', 'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_Mix', 'Electrical_SBrkr', 'KitchenQual_Ex', 'KitchenQual_Fa', 'KitchenQual_Gd', 'KitchenQual_TA', 'Functional_Maj1', 'Functional_Maj2', 'Functional_Min1', 'Functional_Min2', 'Functional_Mod', 'Functional_Sev', 'Functional_Typ', 'FireplaceQu_Ex', 'FireplaceQu_Fa', 'FireplaceQu_Gd', 'FireplaceQu_None', 'FireplaceQu_Po', 'FireplaceQu_TA', 'GarageType_2Types', 'GarageType_Attchd', 'GarageType_Basment', 'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd', 'GarageType_NoGarage', 'GarageFinish_Fin', 'GarageFinish_NoGarage', 'GarageFinish_RFn', 'GarageFinish_Unf', 'GarageQual_Ex', 'GarageQual_Fa', 'GarageQual_Gd', 'GarageQual_NoGarage', 'GarageQual_Po', 'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd', 'GarageCond_NoGarage', 'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_N', 'PavedDrive_P', 'PavedDrive_Y', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con', 'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD', 'SaleCondition_Abnorml', 'SaleCondition_AdjLand', 'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal', 'SaleCondition_Partial'] ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold', 'MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RH', 'MSZoning_RL', 'MSZoning_RM', 'MSZoning_nan', 'Street_Grvl', 'Street_Pave', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LotShape_Reg', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'Utilities_AllPub', 'Utilities_nan', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LotConfig_Inside', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blmngtn', 'Neighborhood_Blueste', 'Neighborhood_BrDale', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber', 'Neighborhood_Veenker', 'Condition1_Artery', 'Condition1_Feedr', 'Condition1_Norm', 'Condition1_PosA', 'Condition1_PosN', 'Condition1_RRAe', 'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn', 'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm', 'Condition2_PosA', 'Condition2_PosN', 'BldgType_1Fam', 'BldgType_2fmCon', 'BldgType_Duplex', 'BldgType_Twnhs', 'BldgType_TwnhsE', 'HouseStyle_1.5Fin', 'HouseStyle_1.5Unf', 'HouseStyle_1Story', 'HouseStyle_2.5Unf', 'HouseStyle_2Story', 'HouseStyle_SFoyer', 'HouseStyle_SLvl', 'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel', 'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed', 'RoofMatl_CompShg', 'RoofMatl_Tar&Grv', 'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng', 'Exterior1st_AsphShn', 'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock', 'Exterior1st_CemntBd', 'Exterior1st_HdBoard', 'Exterior1st_MetalSd', 'Exterior1st_Plywood', 'Exterior1st_Stucco', 'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng', 'Exterior1st_WdShing', 'Exterior1st_nan', 'Exterior2nd_AsbShng', 'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn', 'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_ImStucc', 'Exterior2nd_MetalSd', 'Exterior2nd_Plywood', 'Exterior2nd_Stone', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'Exterior2nd_Wd Shng', 'Exterior2nd_nan', 'MasVnrType_BrkCmn', 'MasVnrType_BrkFace', 'MasVnrType_None', 'MasVnrType_Stone', 'ExterQual_Ex', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA', 'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_Po', 'ExterCond_TA', 'Foundation_BrkTil', 'Foundation_CBlock', 'Foundation_PConc', 'Foundation_Slab', 'Foundation_Stone', 'Foundation_Wood', 'BsmtQual_Ex', 'BsmtQual_Fa', 'BsmtQual_Gd', 'BsmtQual_NoBsmt', 'BsmtQual_TA', 'BsmtCond_Fa', 'BsmtCond_Gd', 'BsmtCond_NoBsmt', 'BsmtCond_Po', 'BsmtCond_TA', 'BsmtExposure_Av', 'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No', 'BsmtExposure_NoBsmt', 'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_GLQ', 'BsmtFinType1_LwQ', 'BsmtFinType1_NoBsmt', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf', 'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_GLQ', 'BsmtFinType2_LwQ', 'BsmtFinType2_NoBsmt', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf', 'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'Heating_Wall', 'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_Po', 'HeatingQC_TA', 'CentralAir_N', 'CentralAir_Y', 'Electrical_FuseA', 'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_SBrkr', 'KitchenQual_Ex', 'KitchenQual_Fa', 'KitchenQual_Gd', 'KitchenQual_TA', 'KitchenQual_nan', 'Functional_Maj1', 'Functional_Maj2', 'Functional_Min1', 'Functional_Min2', 'Functional_Mod', 'Functional_Sev', 'Functional_Typ', 'Functional_nan', 'FireplaceQu_Ex', 'FireplaceQu_Fa', 'FireplaceQu_Gd', 'FireplaceQu_None', 'FireplaceQu_Po', 'FireplaceQu_TA', 'GarageType_2Types', 'GarageType_Attchd', 'GarageType_Basment', 'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd', 'GarageType_NoGarage', 'GarageFinish_Fin', 'GarageFinish_NoGarage', 'GarageFinish_RFn', 'GarageFinish_Unf', 'GarageQual_Fa', 'GarageQual_Gd', 'GarageQual_NoGarage', 'GarageQual_Po', 'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd', 'GarageCond_NoGarage', 'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_N', 'PavedDrive_P', 'PavedDrive_Y', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con', 'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD', 'SaleType_nan', 'SaleCondition_Abnorml', 'SaleCondition_AdjLand', 'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal', 'SaleCondition_Partial']
expected GarageQual_Ex, Condition2_RRNn, Utilities_NoSeWa, RoofMatl_Metal, RoofMatl_Membran, Exterior2nd_Other, Electrical_Mix, Exterior1st_Stone, Condition2_RRAe, Heating_OthW, Exterior1st_ImStucc, HouseStyle_2.5Fin, Heating_Floor, Condition2_RRAn, RoofMatl_ClyTile, RoofMatl_Roll in input data
training data did not have the following fields: KitchenQual_nan, Functional_nan, Exterior2nd_nan, Utilities_nan, MSZoning_nan, Exterior1st_nan, SaleType_nan