In [None]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import itertools
from tensorflow import keras

# for dataloading
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# 오류 데이터 삭제
error_data = ['C2085', 'C1397', 'C2431', 'C1649', 'C1095', 'C2051', 'C1218', 'C1894', 'C2483', 'C1502', 'C1988']
for error in error_data:
    error_index = train[train['단지코드'] == error].index
    train.drop(error_index, inplace=True)

# for 중복값 제거
train = train.drop_duplicates()
test = test.drop_duplicates()

# for 아파트이면서 공공분양이 아닌 데이터만 사용
train = train[(train.임대건물구분 == '아파트') & (train.공급유형 != '공공분양')]
test = test[(test.임대건물구분 == '아파트') & (test.공급유형 != '공공분양')]

# 임대보증금과 임대료 타입 float로 변환
train.loc[train.임대보증금=='-', '임대보증금'] = np.nan
test.loc[test.임대보증금=='-','임대보증금' ] = np.nan
train['임대보증금'] = train['임대보증금'].astype(float)
test['임대보증금'] = test['임대보증금'].astype(float)
train.loc[train.임대료=='-', '임대료'] = np.nan
test.loc[test.임대료=='-', '임대료'] = np.nan
train['임대료'] = train['임대료'].astype(float)
test['임대료'] = test['임대료'].astype(float)

# 공급유형이 장기전세면 임대료 = 0
train.loc[(train.공급유형=='장기전세')&(train.임대료.isnull()), '임대료']=0

# 지하철역과 버스 정류장의 NULL값의 경우, 0으로 판단, 밑에서 컬럼명 바꿈(지하철, 버스)
train['도보 10분거리 내 지하철역 수(환승노선 수 반영)'].fillna(0, inplace=True)
train['도보 10분거리 내 버스정류장 수'].fillna(0, inplace=True)

test['도보 10분거리 내 지하철역 수(환승노선 수 반영)'].fillna(0, inplace=True)
test['도보 10분거리 내 버스정류장 수'].fillna(0, inplace=True)

# test의 자격유형에 있는 2개 결측치 채우기
test.loc[(test.단지코드=='C2411')&(test.자격유형.isnull()), '자격유형'] = 'A'
test.loc[(test.단지코드=='C2253')&(test.자격유형.isnull()), '자격유형'] = 'C'

# train 강원도 행복주택

# 전용면적 16.91 
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 16.91), '임대보증금'] = 13450000
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 16.91), '임대료'] = 65500
# 전용면적 26.9
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대보증금'] = 19700000
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대료'] = 96000
# 전용면적 26.9
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대보증금'] = 19150000
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대료'] = 94000
# 전용면적 26.9
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대보증금'] = 21400000
train.loc[(train['단지코드'] == 'C1786') & (train['전용면적'] == 26.9), '임대료'] = 105000

# train 부산 국민임대

# 전용면적 24.72
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 24.72), '임대보증금'] = 7000000
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 24.72), '임대료'] = 135000
# 전용면적 24.79
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 24.79), '임대보증금'] = 7000000
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 24.79), '임대료'] = 135000
# 전용면적 26.83
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 26.83), '임대보증금'] = 7600000
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 26.83), '임대료'] = 142000
# 전용면적 37.7
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 37.7), '임대보증금'] = 14800000
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 37.7), '임대료'] = 198000
# 전용면적 46.94
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 46.94), '임대보증금'] = 23100000
train.loc[(train['단지코드'] == 'C1326') & (train['전용면적'] == 46.94), '임대료'] = 259000

# train 대구 국민임대

# 전용면적 29.17
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 29.17), '임대보증금'] = 10847000
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 29.17), '임대료'] = 138600
# 전용면적 29.34
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 29.34), '임대보증금'] = 10847000
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 29.34), '임대료'] = 138600
# 전용면적 37.43
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 37.43), '임대보증금'] = 17338000
train.loc[(train['단지코드'] == 'C2186') & (train['전용면적'] == 37.43), '임대료'] = 197500

# test 대전 임대상가

# 전용면적 26.37
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 26.37), '임대보증금'] = 5787000
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 26.37), '임대료'] = 79980
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 26.37), '자격유형'] = 'C'
# 전용면적 52.74
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 52.74), '임대보증금'] = 11574000
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 52.74), '임대료'] = 159960
test.loc[(test['단지코드'] == 'C1006') & (test['전용면적'] == 52.74), '자격유형'] = 'C'

# test 강원도 영구임대

# 전용면적 24.83
test.loc[(test['단지코드'] == 'C2152') & (test['전용면적'] == 24.83), '임대보증금'] = 2129000
test.loc[(test['단지코드'] == 'C2152') & (test['전용면적'] == 24.83), '임대료'] = 42350
# 전용면적 33.84
test.loc[(test['단지코드'] == 'C2152') & (test['전용면적'] == 33.84), '임대보증금'] = 2902000
test.loc[(test['단지코드'] == 'C2152') & (test['전용면적'] == 33.84), '임대료'] = 57730

# test 경상남도 행복주택

# 전용면적 16.94
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 16.94), '임대보증금'] = 11200000
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 16.94), '임대료'] = 53200
# 전용면적 26.85
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 26.85), '임대보증금'] = 16333330
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 26.85), '임대료'] = 77580
# 전용면적 26.85
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 26.85), '임대보증금'] = 18620000
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 26.85), '임대료'] = 88440
# 전용면적 36.77
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 36.77), '임대보증금'] = 23760000
test.loc[(test['단지코드'] == 'C1267') & (test['전용면적'] == 36.77), '임대료'] = 112860

# 데이터프레임 컬럼명 변경(버스,지하철 컬럼명이 너무 길어서 변경)
train.columns = ['단지코드', '총세대수', '임대건물구분', '지역', '공급유형',
    '전용면적', '전용면적별세대수', '공가수', '자격유형', '임대보증금',
    '임대료', '지하철', '버스', '단지내주차면수', '등록차량수']
test.columns = [
    '단지코드', '총세대수', '임대건물구분', '지역', '공급유형',
    '전용면적', '전용면적별세대수', '공가수', '자격유형', '임대보증금',
    '임대료', '지하철', '버스', '단지내주차면수']

# train 공급유형
train.loc[train['공급유형'].isin(['국민임대']),'공급유형그룹'] = '국민임대'
train.loc[train['공급유형'].isin(['공공임대(50년)']),'공급유형그룹'] = '장기공공임대'
train.loc[train['공급유형'].isin(['공공임대(5년)', '공공임대(10년)', '공공임대(분납)']),'공급유형그룹'] = '단기공공임대'
train.loc[train['공급유형'].isin(['행복주택', '영구임대', '장기전세']), '공급유형그룹'] = '저소득층'

# test 공급유형
test.loc[test['공급유형'].isin(['국민임대']),'공급유형그룹'] = '국민임대'
test.loc[test['공급유형'].isin(['공공임대(50년)']),'공급유형그룹'] = '장기공공임대'
test.loc[test['공급유형'].isin(['공공임대(5년)', '공공임대(10년)', '공공임대(분납)']),'공급유형그룹'] = '단기공공임대'
test.loc[test['공급유형'].isin(['행복주택', '영구임대', '장기전세']), '공급유형그룹'] = '저소득층'

train = train.drop(labels='공급유형', axis=1)
test = test.drop(labels='공급유형', axis=1)

# train 지역
train.insert(4, 'n지역','')
train.loc[(train['지역'] == '강원도') | (train['지역'] == '제주특별자치도') | (train['지역'] == '충청남도') | (train['지역'] == '울산광역시'), 'n지역'] = 0
train.loc[(train['지역'] == '전라남도') | (train['지역'] == '전라북도') | (train['지역'] == '경상남도'), 'n지역'] = 1
train.loc[(train['지역'] == '부산광역시') | (train['지역'] == '충청북도') | (train['지역'] == '경상북도'), 'n지역'] = 2
train.loc[(train['지역'] == '대전광역시') | (train['지역'] == '광주광역시') | (train['지역'] == '서울특별시'), 'n지역'] = 3
train.loc[(train['지역'] == '경기도') | (train['지역'] == '대구광역시') | (train['지역'] == '세종특별자치시'), 'n지역'] = 4

# test 지역
test.insert(4, 'n지역','')
test.loc[(test['지역'] == '강원도') | (test['지역'] == '제주특별자치도') | (test['지역'] == '충청남도') | (test['지역'] == '울산광역시'), 'n지역'] = 0
test.loc[(test['지역'] == '전라남도') | (test['지역'] == '전라북도') | (test['지역'] == '경상남도'), 'n지역'] = 1
test.loc[(test['지역'] == '부산광역시') | (test['지역'] == '충청북도') | (test['지역'] == '경상북도'), 'n지역'] = 2
test.loc[(test['지역'] == '대전광역시') | (test['지역'] == '광주광역시') | (test['지역'] == '서울특별시'), 'n지역'] = 3
test.loc[(test['지역'] == '경기도') | (test['지역'] == '대구광역시') | (test['지역'] == '세종특별자치시'), 'n지역'] = 4


train = train.drop(labels='지역', axis=1)
test = test.drop(labels='지역', axis=1)

# train 자격유형
train.loc[train['자격유형'].isin(['E','H','I','J']),'자격유형그룹'] = 'q1'
train.loc[train['자격유형'].isin(['B','L']),'자격유형그룹'] = 'q2'
train.loc[train['자격유형'].isin(['G','K','M','N','O']),'자격유형그룹'] = 'q3'
train.loc[train['자격유형'].isin(['D','C','F']),'자격유형그룹'] = 'q4'
train.loc[train['자격유형'].isin(['A']),'자격유형그룹'] = 'q5'

# test 자격유형
test.loc[test['자격유형'].isin(['E','H','I','J']),'자격유형그룹'] = 'q1'
test.loc[test['자격유형'].isin(['B','L']),'자격유형그룹'] = 'q2'
test.loc[test['자격유형'].isin(['G','K','M','N','O']),'자격유형그룹'] = 'q3'
test.loc[test['자격유형'].isin(['D','C','F']),'자격유형그룹'] = 'q4'
test.loc[test['자격유형'].isin(['A']),'자격유형그룹'] = 'q5'

train = train.drop(labels='자격유형', axis=1)
test = test.drop(labels='자격유형', axis=1)

# 데이터프레임 인덱스 재정의
train.reset_index(drop=False, inplace=True)
test.reset_index(drop=False, inplace=True)

# 원-핫 인코딩
from sklearn.preprocessing import OneHotEncoder

train = pd.get_dummies(train, columns = ['n지역','공급유형그룹','자격유형그룹'])
test =  pd.get_dummies(test, columns = ['n지역','공급유형그룹','자격유형그룹'])

# 뒤에 단지코드 컬럼 쓰기위해 리스트로 저장
code = test[['단지코드']]
code = code.values.tolist() 

train = train[['총세대수', '전용면적', '전용면적별세대수', '공가수', '임대보증금', '임대료', '지하철', '버스',
       '단지내주차면수', 'n지역_0', 'n지역_1', 'n지역_2', 'n지역_3', 'n지역_4',
       '공급유형그룹_국민임대', '공급유형그룹_단기공공임대', '공급유형그룹_장기공공임대', '공급유형그룹_저소득층',
       '자격유형그룹_q1', '자격유형그룹_q2', '자격유형그룹_q3', '자격유형그룹_q4', '자격유형그룹_q5', '등록차량수']]
test = test[['총세대수', '전용면적', '전용면적별세대수', '공가수', '임대보증금', '임대료', '지하철', '버스',
       '단지내주차면수', 'n지역_0', 'n지역_1', 'n지역_2', 'n지역_3', 'n지역_4', '공급유형그룹_국민임대',
       '공급유형그룹_단기공공임대', '공급유형그룹_장기공공임대', '공급유형그룹_저소득층', '자격유형그룹_q1',
       '자격유형그룹_q2', '자격유형그룹_q3', '자격유형그룹_q4', '자격유형그룹_q5']]

############################################################
#######################전처리 완료##########################
############################################################

# train데이터에서  data, target 지정 
from sklearn.model_selection import train_test_split 

# data = train.drop(columns=['등록차량수'])
# target = train['등록차량수']

# # train세트와 test세트로 나누기
# train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)

  uniques = Index(uniques)
  uniques = Index(uniques)


In [None]:
# catboost를 포함하기 위하여 pycaret 업그레이드
!pip install --upgrade pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret
  Downloading pycaret-3.0.0-py3-none-any.whl (481 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.8/481.8 KB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting tbats>=1.1.0
  Downloading tbats-1.1.2-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 KB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wurlitzer
  Downloading wurlitzer-3.0.3-py3-none-any.whl (7.3 kB)
Collecting scikit-plot>=0.3.7
  Downloading scikit_plot-0.3.7-py3-none-any.whl (33 kB)
Collecting joblib>=1.2.0
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 KB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting category-encoders>=2.4.0
  Downloading category_encoders-2.6.0-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
from pycaret.regression import *

In [None]:
# setup으로 init
s = setup(train, target = '등록차량수', train_size=0.8)

Unnamed: 0,Description,Value
0,Session id,765
1,Target,등록차량수
2,Target type,Regression
3,Original data shape,"(2264, 24)"
4,Transformed data shape,"(2264, 24)"
5,Transformed train set shape,"(1811, 24)"
6,Transformed test set shape,"(453, 24)"
7,Numeric features,23
8,Preprocess,True
9,Imputation type,simple


In [None]:
# 모델 blending

best = compare_models(sort = 'RMSE',n_select=5,fold=10)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,10.0199,1198.653,31.8151,0.9926,0.1024,0.0344,0.471
xgboost,Extreme Gradient Boosting,28.0903,2851.9743,52.8277,0.9824,0.1521,0.0819,0.334
rf,Random Forest Regressor,29.9223,3259.0749,56.6379,0.9799,0.1615,0.0884,0.948
dt,Decision Tree Regressor,13.2708,3643.8881,59.0516,0.9775,0.1321,0.0378,0.071
catboost,CatBoost Regressor,39.4561,3699.6117,60.2848,0.9773,0.187,0.1157,2.802
lightgbm,Light Gradient Boosting Machine,43.3005,4522.7895,66.8946,0.9719,0.1948,0.1243,0.269
gbr,Gradient Boosting Regressor,81.0017,12697.4905,112.416,0.9218,0.2773,0.212,0.554
ada,AdaBoost Regressor,137.3587,29593.49,171.911,0.8188,0.4409,0.4479,0.214
ridge,Ridge Regression,134.8746,35605.8614,188.4694,0.7834,0.4404,0.3199,0.089
lr,Linear Regression,135.0288,35608.1357,188.4761,0.7834,0.4537,0.3214,0.424


Processing:   0%|          | 0/89 [00:00<?, ?it/s]

In [None]:
best_tune = [tune_model(i) for i in best]
blender_5 = blend_models(best_tune)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,65.3923,13094.3543,114.4306,0.9108,0.2543,0.1632
1,62.1162,9208.49,95.9609,0.9507,0.2866,0.214
2,61.5327,9212.1281,95.9798,0.9446,0.2145,0.1526
3,52.7552,6820.085,82.5838,0.9563,0.25,0.1733
4,65.5566,9370.3301,96.8005,0.9365,0.2397,0.1724
5,62.8581,11532.5643,107.3898,0.9411,0.1874,0.1274
6,69.5768,11063.3825,105.1826,0.9199,0.2481,0.1764
7,62.7567,9724.7437,98.6141,0.9551,0.2657,0.1953
8,68.9441,11184.3645,105.7562,0.9324,0.2129,0.1503
9,61.4054,8478.6683,92.0797,0.9406,0.231,0.1622


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,35.8757,4255.876,65.2371,0.971,0.1608,0.0866
1,26.847,3335.2314,57.7515,0.9821,0.1541,0.0775
2,28.8436,2957.4551,54.3825,0.9822,0.1188,0.0659
3,28.6183,4204.0225,64.8384,0.973,0.1397,0.0686
4,24.9461,1911.8605,43.7248,0.987,0.1202,0.0645
5,27.0619,3794.1233,61.5965,0.9806,0.1131,0.0572
6,33.4499,4918.4736,70.1318,0.9644,0.2058,0.1051
7,34.2667,3819.5774,61.8027,0.9824,0.1941,0.1109
8,33.3936,4206.8506,64.8602,0.9746,0.1638,0.0848
9,30.5678,3342.9607,57.8183,0.9766,0.1135,0.0626


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,58.4305,10870.4071,104.2612,0.926,0.2154,0.1355
1,50.0018,7734.4442,87.9457,0.9586,0.2616,0.1796
2,49.2581,8007.8427,89.4866,0.9519,0.1733,0.1131
3,43.3267,4900.3283,70.0023,0.9686,0.2239,0.1427
4,51.8028,6453.2105,80.3319,0.9562,0.2133,0.1395
5,51.1109,7782.0606,88.216,0.9602,0.1583,0.1022
6,56.3726,8587.3715,92.6681,0.9379,0.2261,0.1478
7,52.0161,7682.4099,87.6494,0.9646,0.2429,0.1641
8,52.7227,7343.6226,85.6949,0.9556,0.1805,0.1118
9,50.911,7347.3035,85.7164,0.9485,0.2146,0.1363


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,88.0672,21598.6429,146.9648,0.8529,0.3064,0.2124
1,88.6843,22469.8152,149.8994,0.8797,0.3482,0.2919
2,68.2912,12640.3115,112.4291,0.924,0.2312,0.1502
3,83.4399,25595.4285,159.9857,0.8358,0.3462,0.2634
4,86.4423,16886.313,129.9473,0.8855,0.2904,0.2082
5,81.6913,23350.0419,152.8072,0.8807,0.2982,0.1726
6,100.3888,26013.749,161.2878,0.8117,0.3423,0.238
7,107.396,28516.127,168.8672,0.8684,0.3234,0.2527
8,89.1223,24442.769,156.3418,0.8524,0.2688,0.1688
9,86.5774,19744.2717,140.5143,0.8616,0.2835,0.1888


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,35.3709,3714.0408,60.9429,0.9747,0.1912,0.1099
1,27.4418,2141.2671,46.2738,0.9885,0.195,0.1098
2,31.832,2359.8206,48.578,0.9858,0.1904,0.1131
3,33.05,2659.9988,51.5752,0.9829,0.207,0.1175
4,31.6228,2168.1132,46.563,0.9853,0.1406,0.0865
5,35.1213,10393.1493,101.9468,0.9469,0.1671,0.0729
6,33.9333,2989.9538,54.6805,0.9784,0.1766,0.1011
7,32.7924,2849.0386,53.3764,0.9869,0.2149,0.1309
8,39.292,3253.2333,57.0371,0.9803,0.1456,0.0908
9,28.8566,2155.7012,46.4295,0.9849,0.1663,0.0894


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,23.6888,2247.3807,47.4065,0.9847,0.1422,0.0686
1,18.574,1201.7719,34.6666,0.9936,0.1743,0.0859
2,20.5512,2206.2319,46.9705,0.9867,0.1101,0.0556
3,19.4719,1582.8088,39.7845,0.9898,0.1261,0.0622
4,19.6031,1135.9183,33.7034,0.9923,0.1005,0.0557
5,20.8021,3075.1141,55.4537,0.9843,0.0738,0.0397
6,21.7674,3103.3242,55.7075,0.9775,0.1691,0.0763
7,22.3793,1848.3178,42.9921,0.9915,0.1667,0.0821
8,21.608,1466.5927,38.2961,0.9911,0.1066,0.0517
9,18.1806,1269.7217,35.6332,0.9911,0.1279,0.0539


Processing:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
blender_5

In [None]:
# 최종 모델 확정, 전체 데이터 학습

final_model = finalize_model(blender_5)

In [None]:
final_model

In [None]:
# 트레인셋 predict
prediction = predict_model(final_model, data = train.iloc[100:])

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,3.798,32.4412,5.6957,0.9998,0.031,0.0109


In [None]:
prediction

Unnamed: 0,총세대수,전용면적,전용면적별세대수,공가수,임대보증금,임대료,지하철,버스,단지내주차면수,n지역_0,...,공급유형그룹_단기공공임대,공급유형그룹_장기공공임대,공급유형그룹_저소득층,자격유형그룹_q1,자격유형그룹_q2,자격유형그룹_q3,자격유형그룹_q4,자격유형그룹_q5,등록차량수,prediction_label
100,1396,26.370001,141,26.0,5787000.0,79980.0,1.0,4.0,277.0,0,...,0,0,1,0,0,0,1,0,133.0,132.508093
101,1396,26.370001,20,26.0,5787000.0,79980.0,1.0,4.0,277.0,0,...,0,0,1,0,0,0,1,0,133.0,134.235190
102,1396,31.320000,298,26.0,6873000.0,94990.0,1.0,4.0,277.0,0,...,0,0,1,0,0,0,1,0,133.0,132.743566
103,1396,52.740002,9,26.0,11574000.0,159960.0,1.0,4.0,277.0,0,...,0,0,1,0,0,0,1,0,133.0,137.766873
104,1396,52.740002,4,26.0,11574000.0,159960.0,1.0,4.0,277.0,0,...,0,0,1,0,0,0,1,0,133.0,137.384289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259,239,49.200001,19,7.0,11346000.0,116090.0,0.0,1.0,166.0,1,...,0,0,0,0,0,0,0,1,146.0,151.456109
2260,239,51.080002,34,7.0,14005000.0,142310.0,0.0,1.0,166.0,1,...,0,0,0,0,0,0,0,1,146.0,143.487484
2261,239,51.730000,34,7.0,14005000.0,142310.0,0.0,1.0,166.0,1,...,0,0,0,0,0,0,0,1,146.0,145.102649
2262,239,51.959999,114,7.0,14005000.0,142310.0,0.0,1.0,166.0,1,...,0,0,0,0,0,0,0,1,146.0,145.568978
