In [103]:
!pip install category_encoders

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import category_encoders as ce

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline



In [104]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

In [105]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [106]:
# separete train data to X, y
X_train = df_train.drop("SalePrice", axis=1)
y_train = df_train["SalePrice"]

In [107]:
X_train.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [108]:
# separate X_train in type(int, float, or object)
X_train_obj = pd.DataFrame()
X_train_int = pd.DataFrame()
X_train_flo = pd.DataFrame()

for col in X_train.columns:
    if X_train[col].dtype == "O":
        #print("object")
        X_train_obj = pd.concat([X_train_obj,X_train[col]], axis=1)
    elif X_train[col].dtype == "int64":
        #print("int64")
        X_train_int = pd.concat([X_train_int,X_train[col]], axis=1)
    else:
        #print("float64")
        X_train_flo = pd.concat([X_train_flo,X_train[col]], axis=1)


In [109]:
# set encoding function(how to chage object to int)
list_cols = X_train_obj.columns
encoding = ce.OrdinalEncoder(cols=list_cols)

In [110]:
# fit X_train_obj to encoding function
X_train_obj_ec = encoding.fit_transform(X_train_obj)
X_train_obj_ec

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,1,1,3,1,1,1,1,1,1,1,...,1,1,1,1,1,4,5,5,1,1
1,1,1,3,1,1,1,2,1,2,2,...,1,1,1,1,1,4,5,5,1,1
2,1,1,3,2,1,1,1,1,1,1,...,1,1,1,1,1,4,5,5,1,1
3,1,1,3,2,1,1,3,1,3,1,...,2,2,1,1,1,4,5,5,1,2
4,1,1,3,2,1,1,2,1,4,1,...,1,1,1,1,1,4,5,5,1,1
5,1,1,3,2,1,1,1,1,5,1,...,1,2,1,1,1,4,1,1,1,1
6,1,1,3,1,1,1,1,1,6,1,...,1,1,1,1,1,4,5,5,1,1
7,1,1,3,2,1,1,3,1,7,3,...,1,1,1,1,1,4,5,1,1,1
8,2,1,3,1,1,1,1,1,8,4,...,2,2,2,1,1,4,5,5,1,2
9,1,1,3,1,1,1,3,1,9,4,...,1,1,3,1,1,4,5,5,1,1


In [111]:
X_train_obj_ec["PoolQC"].unique()

array([4, 1, 2, 3])

In [112]:
X_train_coc = pd.concat([X_train_obj_ec, X_train_int, X_train_flo], axis=1)

In [113]:
X_train_coc[['OverallQual', 'Id']]

Unnamed: 0,OverallQual,Id
0,7,1
1,6,2
2,7,3
3,7,4
4,8,5
5,5,6
6,8,7
7,7,8
8,7,9
9,5,10


In [114]:
X_train_coc.shape

(1460, 80)

In [115]:
X_train_coc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1460 entries, 0 to 1459
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSZoning       1460 non-null   int64  
 1   Street         1460 non-null   int64  
 2   Alley          1460 non-null   int64  
 3   LotShape       1460 non-null   int64  
 4   LandContour    1460 non-null   int64  
 5   Utilities      1460 non-null   int64  
 6   LotConfig      1460 non-null   int64  
 7   LandSlope      1460 non-null   int64  
 8   Neighborhood   1460 non-null   int64  
 9   Condition1     1460 non-null   int64  
 10  Condition2     1460 non-null   int64  
 11  BldgType       1460 non-null   int64  
 12  HouseStyle     1460 non-null   int64  
 13  RoofStyle      1460 non-null   int64  
 14  RoofMatl       1460 non-null   int64  
 15  Exterior1st    1460 non-null   int64  
 16  Exterior2nd    1460 non-null   int64  
 17  MasVnrType     1460 non-null   int64  
 18  ExterQual    

In [147]:
X_train_coc.corr()

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,LotFrontage,MasVnrArea,GarageYrBlt
MSZoning,1.000000,0.039678,-0.250108,-0.167918,-0.013396,-0.011167,-0.094747,-0.043444,-0.067889,-0.026021,...,0.075025,-0.045145,-0.067541,-0.029301,-0.021573,0.022231,-0.010621,-0.248800,-0.040722,0.029842
Street,0.039678,1.000000,0.015720,0.010129,0.097236,-0.001682,0.004458,0.179360,0.053445,0.023914,...,-0.023082,-0.007473,0.033160,-0.004413,0.022733,-0.003690,0.025043,0.040886,-0.017326,-0.027145
Alley,-0.250108,0.015720,1.000000,0.097078,-0.017513,0.006407,0.065500,0.030096,0.043543,-0.025739,...,-0.185081,0.028470,0.023216,0.016810,0.001162,0.013094,0.020944,0.138101,0.071665,0.213135
LotShape,-0.167918,0.010129,0.097078,1.000000,0.201047,0.026616,0.308073,0.144248,-0.013295,0.066612,...,-0.095092,0.033801,0.065182,0.047100,0.028638,0.026617,-0.037391,0.231878,0.089178,0.200238
LandContour,-0.013396,0.097236,-0.017513,0.201047,1.000000,-0.007963,0.021107,0.507203,0.113336,-0.021478,...,-0.002514,0.048478,0.003305,-0.008139,-0.014892,0.063280,-0.026117,0.096766,-0.019594,-0.022793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MoSold,0.022231,-0.003690,0.013094,0.026617,0.063280,-0.051552,-0.018916,0.007072,-0.003504,-0.033828,...,-0.028887,0.029474,0.023217,-0.033737,-0.006495,1.000000,-0.145721,0.011200,-0.005965,0.005337
YrSold,-0.010621,0.025043,0.020944,-0.037391,-0.026117,0.023353,0.002288,-0.002305,0.036299,0.010751,...,-0.009916,0.018645,0.010694,-0.059689,0.004906,-0.145721,1.000000,0.007450,-0.008201,-0.001014
LotFrontage,-0.248800,0.040886,0.138101,0.231878,0.096766,,0.150762,0.088345,-0.123530,0.083031,...,0.010700,0.070029,0.041383,0.206167,0.003368,0.011200,0.007450,1.000000,0.193458,0.070250
MasVnrArea,-0.040722,-0.017326,0.071665,0.089178,-0.019594,0.063427,0.044888,-0.021815,-0.080062,-0.043386,...,-0.110204,0.018796,0.061466,0.011723,-0.029815,-0.005965,-0.008201,0.193458,1.000000,0.252691


In [146]:
df.corr()

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,LotFrontage,MasVnrArea,GarageYrBlt,SalePrice
MSZoning,1.000000,0.039678,-0.250108,-0.167918,-0.013396,-0.011167,-0.094747,-0.043444,-0.067889,-0.026021,...,-0.045145,-0.067541,-0.029301,-0.021573,0.022231,-0.010621,-0.248800,-0.040722,0.029842,-0.116047
Street,0.039678,1.000000,0.015720,0.010129,0.097236,-0.001682,0.004458,0.179360,0.053445,0.023914,...,-0.007473,0.033160,-0.004413,0.022733,-0.003690,0.025043,0.040886,-0.017326,-0.027145,-0.041036
Alley,-0.250108,0.015720,1.000000,0.097078,-0.017513,0.006407,0.065500,0.030096,0.043543,-0.025739,...,0.028470,0.023216,0.016810,0.001162,0.013094,0.020944,0.138101,0.071665,0.213135,0.139868
LotShape,-0.167918,0.010129,0.097078,1.000000,0.201047,0.026616,0.308073,0.144248,-0.013295,0.066612,...,0.033801,0.065182,0.047100,0.028638,0.026617,-0.037391,0.231878,0.089178,0.200238,0.267759
LandContour,-0.013396,0.097236,-0.017513,0.201047,1.000000,-0.007963,0.021107,0.507203,0.113336,-0.021478,...,0.048478,0.003305,-0.008139,-0.014892,0.063280,-0.026117,0.096766,-0.019594,-0.022793,0.092009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YrSold,-0.010621,0.025043,0.020944,-0.037391,-0.026117,0.023353,0.002288,-0.002305,0.036299,0.010751,...,0.018645,0.010694,-0.059689,0.004906,-0.145721,1.000000,0.007450,-0.008201,-0.001014,-0.028923
LotFrontage,-0.248800,0.040886,0.138101,0.231878,0.096766,,0.150762,0.088345,-0.123530,0.083031,...,0.070029,0.041383,0.206167,0.003368,0.011200,0.007450,1.000000,0.193458,0.070250,0.351799
MasVnrArea,-0.040722,-0.017326,0.071665,0.089178,-0.019594,0.063427,0.044888,-0.021815,-0.080062,-0.043386,...,0.018796,0.061466,0.011723,-0.029815,-0.005965,-0.008201,0.193458,1.000000,0.252691,0.477493
GarageYrBlt,0.029842,-0.027145,0.213135,0.200238,-0.022793,-0.022382,0.029105,-0.074875,-0.105479,-0.067387,...,0.023544,-0.075418,-0.014501,-0.032417,0.005337,-0.001014,0.070250,0.252691,1.000000,0.486362


In [145]:
pd.set_option('display.max_rows', 10)

In [119]:
df = pd.concat([X_train_coc, y_train], axis=1)
df.corr().iloc[:, 80].sort_values()

FireplaceQu     -0.442834
Foundation      -0.429678
HeatingQC       -0.427649
BsmtFinType1    -0.387251
GarageType      -0.273539
ExterQual       -0.265015
GarageCond      -0.261527
CentralAir      -0.251328
GarageQual      -0.245912
Electrical      -0.226145
PavedDrive      -0.208954
HouseStyle      -0.188688
BsmtCond        -0.176352
Exterior2nd     -0.164716
Neighborhood    -0.143621
BsmtFinType2    -0.137066
KitchenAbvGr    -0.135907
EnclosedPorch   -0.128578
PoolQC          -0.126070
ExterCond       -0.121706
Exterior1st     -0.120586
MSZoning        -0.116047
KitchenQual     -0.114746
BldgType        -0.112611
Functional      -0.108367
Heating         -0.106673
BsmtQual        -0.084708
MSSubClass      -0.084284
OverallCond     -0.077856
Condition1      -0.044820
Street          -0.041036
YrSold          -0.028923
LowQualFinSF    -0.025606
Id              -0.021917
MiscVal         -0.021190
BsmtHalfBath    -0.016844
Utilities       -0.014314
BsmtFinSF2      -0.011378
GarageFinish

In [120]:
import lightgbm as lgb

In [121]:
# LightGBMのモデル構築
params = {
    "objective": "regression",
    "metric": "rmse",  # RMSEを評価指標に使用
    "boosting_type": "gbdt",
    "num_leaves": 31,
    "learning_rate": 0.05,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 5,
    "verbose": 0
}

In [122]:
X_train_pra, X_train_val, y_train_pra, y_train_val = train_test_split(X_train_coc, y_train, test_size=0.2)

In [123]:
train_data = lgb.Dataset(X_train_pra, label=y_train_pra)
eval_data = lgb.Dataset(X_train_val, label=y_train_val, reference=train_data)

In [124]:
model = lgb.train(params, train_data, valid_sets=[eval_data], num_boost_round=1000)

In [125]:
y_pred = model.predict(X_train_val)
score = r2_score(y_train_val, y_pred)
score

0.8914077957818839

In [126]:
print(type(y_train_val))
print(type(y_pred))

<class 'pandas.core.series.Series'>
<class 'numpy.ndarray'>


In [127]:
df_test

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal
5,1466,60,RL,75.0,10000,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,4,2010,WD,Normal
6,1467,20,RL,,7980,Pave,,IR1,Lvl,AllPub,...,0,0,,GdPrv,Shed,500,3,2010,WD,Normal
7,1468,60,RL,63.0,8402,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,5,2010,WD,Normal
8,1469,20,RL,85.0,10176,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,2,2010,WD,Normal
9,1470,20,RL,70.0,8400,Pave,,Reg,Lvl,AllPub,...,0,0,,MnPrv,,0,4,2010,WD,Normal


In [128]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1459 non-null   int64  
 1   MSSubClass     1459 non-null   int64  
 2   MSZoning       1455 non-null   object 
 3   LotFrontage    1232 non-null   float64
 4   LotArea        1459 non-null   int64  
 5   Street         1459 non-null   object 
 6   Alley          107 non-null    object 
 7   LotShape       1459 non-null   object 
 8   LandContour    1459 non-null   object 
 9   Utilities      1457 non-null   object 
 10  LotConfig      1459 non-null   object 
 11  LandSlope      1459 non-null   object 
 12  Neighborhood   1459 non-null   object 
 13  Condition1     1459 non-null   object 
 14  Condition2     1459 non-null   object 
 15  BldgType       1459 non-null   object 
 16  HouseStyle     1459 non-null   object 
 17  OverallQual    1459 non-null   int64  
 18  OverallC

In [129]:
df_test.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [130]:

X_test_obj = pd.DataFrame()
X_test_int = pd.DataFrame()
X_test_flo = pd.DataFrame()

for col in df_test.columns:
    if df_test[col].dtype == "O":
        #print("object")
        X_test_obj = pd.concat([X_test_obj,df_test[col]], axis=1)
    elif X_train[col].dtype == "int64":
        #print("int64")
        X_test_int = pd.concat([X_test_int,df_test[col]], axis=1)
    else:
        #print("float64")
        X_test_flo = pd.concat([X_test_flo,df_test[col]], axis=1)


In [131]:
X_test_obj_ec = encoding.fit_transform(X_train_obj)
X_test_obj_ec

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,1,1,3,1,1,1,1,1,1,1,...,1,1,1,1,1,4,5,5,1,1
1,1,1,3,1,1,1,2,1,2,2,...,1,1,1,1,1,4,5,5,1,1
2,1,1,3,2,1,1,1,1,1,1,...,1,1,1,1,1,4,5,5,1,1
3,1,1,3,2,1,1,3,1,3,1,...,2,2,1,1,1,4,5,5,1,2
4,1,1,3,2,1,1,2,1,4,1,...,1,1,1,1,1,4,5,5,1,1
5,1,1,3,2,1,1,1,1,5,1,...,1,2,1,1,1,4,1,1,1,1
6,1,1,3,1,1,1,1,1,6,1,...,1,1,1,1,1,4,5,5,1,1
7,1,1,3,2,1,1,3,1,7,3,...,1,1,1,1,1,4,5,1,1,1
8,2,1,3,1,1,1,1,1,8,4,...,2,2,2,1,1,4,5,5,1,2
9,1,1,3,1,1,1,3,1,9,4,...,1,1,3,1,1,4,5,5,1,1


In [132]:
X_test_coc = pd.concat([X_test_obj_ec, X_test_int, X_test_flo], axis=1)

In [133]:
df_test.shape

(1459, 80)

In [134]:
X_test_coc.shape

(1460, 80)

In [135]:
X_test_coc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1460 entries, 0 to 1459
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSZoning       1460 non-null   int64  
 1   Street         1460 non-null   int64  
 2   Alley          1460 non-null   int64  
 3   LotShape       1460 non-null   int64  
 4   LandContour    1460 non-null   int64  
 5   Utilities      1460 non-null   int64  
 6   LotConfig      1460 non-null   int64  
 7   LandSlope      1460 non-null   int64  
 8   Neighborhood   1460 non-null   int64  
 9   Condition1     1460 non-null   int64  
 10  Condition2     1460 non-null   int64  
 11  BldgType       1460 non-null   int64  
 12  HouseStyle     1460 non-null   int64  
 13  RoofStyle      1460 non-null   int64  
 14  RoofMatl       1460 non-null   int64  
 15  Exterior1st    1460 non-null   int64  
 16  Exterior2nd    1460 non-null   int64  
 17  MasVnrType     1460 non-null   int64  
 18  ExterQual    

In [148]:
X_test_coc[['Id', 'OverallQual']]

Unnamed: 0,Id,OverallQual
0,1461.0,5.0
1,1462.0,6.0
2,1463.0,5.0
3,1464.0,6.0
4,1465.0,8.0
...,...,...
1454,2915.0,4.0
1455,2916.0,4.0
1456,2917.0,5.0
1457,2918.0,5.0


In [137]:
X_test_coc = X_test_coc.drop(X_test_coc.index[1459])

In [138]:
y_test_pred = model.predict(X_test_coc)
y_test_pred


array([135297.13485431, 175116.52876133, 181744.56450085, ...,
       171352.95313794, 140117.09965098, 207263.92897028])

In [139]:
submission = pd.DataFrame({'Id': df_test['Id'], 'SalePrice': y_test_pred})
submission.to_csv('submission_Advanced_Housing_Price_01.csv', index=False)

In [140]:
X_train_coc.columns

Index(['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities',
       'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
       'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',
       'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
       'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature',
       'SaleType', 'SaleCondition', 'Id', 'MSSubClass', 'LotArea',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'BsmtFinSF1',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd',
       'Fireplaces', 'Garag

In [141]:
X_train_coc.shape

(1460, 80)

In [142]:
X_test_coc.columns[79]

'GarageYrBlt'

In [143]:
if X_train_coc.columns[40] == X_test_coc.columns[40]:
  print('OK')

OK


In [144]:
for i in range(80):
  if X_train_coc.columns[i] == X_test_coc.columns[i]:
    continue
  else:
    print(X_test_coc.columns[i])
    break