<h1> Library 

In [275]:
# Standard library imports
import os 
import glob
import numpy as np
import itertools

# Third party imports
import pandas as pd
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold 
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
import lightgbm as LGB
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import Lasso,ElasticNet,Ridge
from sklearn.svm import SVR
import re
from tqdm import tqdm
import plotly 
import plotly.express as px
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
import matplotlib
from matplotlib import font_manager, rc
import shap
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectPercentile

# Dacon plotly 그림 업로드 
pd.options.plotting.backend = 'plotly'
## plotly.io를 import 한 후 renderers 기본값을 꼭 "notebook_connected" 로 설정해주시기 바랍니다.
import plotly.io as pio
pio.renderers.default = "notebook_connected"

#options 
pd.set_option('display.max_columns', None)
rc('font', family='AppleGothic')
matplotlib.rcParams['axes.unicode_minus'] = False


<h1> Data Prep 

In [276]:
df_train = pd.read_csv("train_processing.csv",index_col  = 0)
df_test = pd.read_csv("test_processing.csv",index_col  = 0)
df_gender = pd.read_csv("age_gender_info.csv")
df_sub = pd.read_csv("sample_submission.csv")

In [277]:
df_test.공급유형.unique()

array(['국민임대', '영구임대', '임대상가', '공공임대(50년)', '공공임대(10년)', '공공임대(분납)',
       '행복주택'], dtype=object)

In [278]:
df_train = df_train.drop(df_train[df_train.지역 == "서울특별시"].index,axis = 0)

In [279]:
#train_error = ['C1095', 'C2051', 'C1218', 'C1894', 'C2483', 'C1502', 'C1988' ,
#              'C2431', 'C1649', 'C2085', 'C1397' ,'C1036' ]

#df_train = df_train.query(' 단지코드 not in @train_error')

In [280]:
df_train.임대료 = df_train.임대료.astype("float")

In [281]:
df_train[df_train.임대료.isna()]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수


단지 코드 중첩, 임대건물 구분, 지역, 공급유형, 전용면적 등 정보 병합 필요 

In [282]:
display(df_train.describe())
display(df_test.describe())

Unnamed: 0,총세대수,전용면적,전용면적별세대수,공가수,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
count,2884.0,2884.0,2884.0,2884.0,2884.0,2884.0,2673.0,2880.0,2884.0,2884.0
mean,887.848821,44.652458,103.306172,12.997573,22363780.0,202376.3,0.175084,3.687847,596.24896,553.148058
std,517.236487,32.110945,133.337497,10.815307,17100290.0,318532.1,0.427384,2.659721,393.928735,431.272884
min,26.0,12.62,1.0,0.0,0.0,0.0,0.0,0.0,13.0,13.0
25%,511.0,32.1,14.0,4.0,11530000.0,109960.0,0.0,2.0,277.0,209.0
50%,775.0,39.91,60.0,11.0,18061000.0,161020.0,0.0,3.0,515.0,479.0
75%,1116.0,51.49,144.0,20.0,27780750.0,236492.5,0.0,4.0,812.0,763.0
max,2568.0,583.4,1865.0,55.0,194080000.0,15154400.0,3.0,20.0,1798.0,2550.0


Unnamed: 0,총세대수,전용면적,전용면적별세대수,공가수,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수
count,1022.0,1022.0,1022.0,1022.0,1003.0,1003.0,980.0,1022.0,1022.0
mean,862.080235,43.712084,100.414873,15.544031,22649530.0,188660.2,0.136735,4.626223,548.771037
std,536.340894,35.8879,125.997855,11.07014,17006630.0,177111.4,0.4355,5.414568,342.636703
min,75.0,9.96,1.0,0.0,1250000.0,0.0,0.0,1.0,29.0
25%,488.0,33.135,14.0,6.0,11827000.0,109110.0,0.0,2.0,286.0
50%,745.0,39.72,60.0,15.0,18726000.0,153180.0,0.0,3.0,458.0
75%,1161.0,47.4,140.0,23.0,28723000.0,223550.0,0.0,5.0,711.0
max,2572.0,583.4,1341.0,45.0,151120000.0,4022370.0,2.0,50.0,1696.0


<h2> Step 1

In [283]:
train_tmp = df_train.iloc[:,0:-1]
train_tmp['type'] = 'train'
test_tmp = df_test
test_tmp['type'] = 'test'

df_all = pd.concat([train_tmp, test_tmp], axis=0)

names = ["자격유형", "공급유형", "임대건물구분", "지역"]
fig = make_subplots(rows=2, cols=2, subplot_titles=("자격유형", "공급유형", "임대건물구분", "지역"))
postion = {'0' : [1,1], '1' : [1,2], '2' : [2,1], '3' : [2,2]}

for order, name in enumerate(names):
    row, col = postion[str(order)][0], postion[str(order)][1]
    fig.add_trace(go.Bar(name='Train', x=df_all[name], y=df_all[name]), row=row, col=col)
    fig.add_trace(go.Bar(name='Test', x=df_all[name], y=df_all[name]), row=row, col=col)

fig.update_layout(barmode='stack', 
    autosize=False,
    width=800,
    height=600,)
# fig.show()

del train_tmp,test_tmp,df_all

<h2> Train vs Test 명목 변수 차집합 

In [255]:
columns = ['임대건물구분', '지역', '공급유형', '자격유형']
for col in columns:
    complement = list(set(df_train[col].unique()) - set(df_test[col].unique()))
    print(f"Train 데이터 기준 차집합 {col} : {complement}")
    

Train 데이터 기준 차집합 임대건물구분 : []
Train 데이터 기준 차집합 지역 : []
Train 데이터 기준 차집합 공급유형 : ['공공임대(5년)', '공공분양']
Train 데이터 기준 차집합 자격유형 : ['O', 'F', 'B']


Train 데이터에만 있는 값들 제거 필요할까? 
그럼 얼마나 포함하고 있을까? <br>
100개 정도 포함하고 있네 어떻게 처리하지?

버릴까? 다른 이름으로 병합할까?



In [256]:
options = True 

if options:
    # 방안 1  값 통일 
    df_train.loc[df_train.지역.isin(['서울특별시']), '지역'] = '이외'
    df_train.loc[df_train.공급유형.isin(['공공임대(5년)', '장기전세', '공공분양']), '공급유형'] = '이외'
    df_train.loc[df_train.자격유형.isin(['O', 'B', 'F']), '공급유형'] = '이외'
else:
    #방안 2 Drop

    df_train = df_train.loc[df_train.지역 != '서울특별시']

    mask = df_train.공급유형 != '공공분양'
    mask &= df_train.공급유형 != '장기전세'
    mask &= df_train.공급유형 != '공공임대(5년)'
    df_train = df_train.loc[mask]

    mask = df_train.자격유형 != 'F'
    mask &= df_train.자격유형 != 'B'
    mask &= df_train.자격유형 != 'O'
    df_train = df_train.loc[mask]


<h3> for Train data

In [257]:
print (f'train data \n{df_train.isnull().sum()}')

train data 
단지코드                              0
총세대수                              0
임대건물구분                            0
지역                                0
공급유형                              0
전용면적                              0
전용면적별세대수                          0
공가수                               0
자격유형                              0
임대보증금                             0
임대료                               0
도보 10분거리 내 지하철역 수(환승노선 수 반영)    211
도보 10분거리 내 버스정류장 수                4
단지내주차면수                           0
등록차량수                             0
dtype: int64


공란이 제법 있군

In [258]:
## 공란 처리 및 기타 작업 
## 문자열 처리 
df_train.rename(columns={"도보 10분거리 내 지하철역 수(환승노선 수 반영)" : "지하철", "도보 10분거리 내 버스정류장 수" : "버스"}, inplace=True)
#df_train[['임대보증금','임대료']] = df_train[['임대보증금', '임대료']].fillna("0").replace("-", "0").astype(int)

## 지하철 버스 공란 변경하기 
df_train['지하철'].fillna(0,inplace=True)
df_train['버스'].fillna(0,inplace=True)

##면적은 게속 변경 하면서 확인할것 
df_train['전용면적'] = df_train['전용면적'] //3*3
df_train['전용면적'] = np.where(df_train['전용면적'] > 100, 100, df_train['전용면적'])
df_train['전용면적'] = np.where(df_train['전용면적'] < 15, 15, df_train['전용면적'])

if df_train.isnull().sum().sum() != 0:
    print (df_train.isnull().sum())
else:
    print("공란 없어요")

## 카데코리화 
df_train.loc[:,'임대용총전용면적'] = df_train.loc[:,'전용면적'] * df_train.loc[:,'전용면적별세대수']
df_train.loc[:,"임대건물구분"] = df_train.loc[:,"임대건물구분"].astype('category').cat.codes
df_train.loc[:,'지역'] = df_train.loc[:,'지역'].astype('category').cat.codes
df_train.loc[:,'공급유형'] = df_train.loc[:,'공급유형'].astype('category').cat.codes
df_train.loc[:,'자격유형'] = df_train.loc[:,'자격유형'].astype('category').cat.codes
df_train['key'] = df_train['임대건물구분'].astype(str).str.cat(df_train['공급유형'].astype(str), sep='-').str.cat(df_train['자격유형'].astype(str), sep='-')


공란 없어요


<h3> for df_test

임대료 있고, 임대보증금 있는데 공란이네, c2411은 A로, c2253은 C로 넣으면 되겠네 

In [259]:
## 빠진 자격 유형 
df_test.loc[(df_test.자격유형.isnull()) & (df_test.단지코드 == "C2411"), '자격유형'] = 'A'
df_test.loc[(df_test.자격유형.isnull()) & (df_test.단지코드 == "C2253"), '자격유형'] = 'C'

## 문자열 처리 
df_test.rename(columns={"도보 10분거리 내 지하철역 수(환승노선 수 반영)" : "지하철", "도보 10분거리 내 버스정류장 수" : "버스"}, inplace=True)
df_test[['임대보증금','임대료']] = df_test[['임대보증금', '임대료']].fillna("0").replace("-", "0").astype(int)

## 지하철 버스 공란 변경하기 
df_test['지하철'].fillna(0,inplace=True)
df_test['버스'].fillna(0,inplace=True)

##면적은 게속 변경 하면서 확인할것 
df_test['전용면적'] = df_test['전용면적'] //3*3
df_test['전용면적'] = np.where(df_test['전용면적'] > 100, 100, df_test['전용면적'])
df_test['전용면적'] = np.where(df_test['전용면적'] < 15, 15, df_test['전용면적'])


if df_test.isnull().sum().sum() != 0:
    print(df_test.isnull().sum())
else:
    print("공란 없어요")

df_test.loc[:,'임대용총전용면적'] = df_test.loc[:,'전용면적'] * df_test.loc[:,'전용면적별세대수']
df_test.loc[:,"임대건물구분"] = df_test.loc[:,"임대건물구분"].astype('category').cat.codes
df_test.loc[:,'지역'] = df_test.loc[:,'지역'].astype('category').cat.codes
df_test.loc[:,'공급유형'] = df_test.loc[:,'공급유형'].astype('category').cat.codes
df_test.loc[:,'자격유형'] = df_test.loc[:,'자격유형'].astype('category').cat.codes
df_test['key'] = df_test['임대건물구분'].astype(str).str.cat(df_test['공급유형'].astype(str), sep='-').str.cat(df_test['자격유형'].astype(str), sep='-')

# df_test.loc[df_test.자격유형.isnull()]

공란 없어요


<h2>성별 정보 

In [260]:
# 성별 구성 지역 카테고리 
df_gender.loc[:,'지역'] = df_gender.loc[:,'지역'].astype('category').cat.codes



<h2> 명목변수 확인

어떻게 병합할 것인가? 명목 변수 중복은 없니? 


단지코드 601 88~90 모든 행열의 값이 동일 단순 실수? 
87과 비교하면 공급유형과 자격유형이 다르네, 공급유형과 자격유형에 따라 구분하기 위함인듯 

"임대건물구분", "공금유형", "자격유형"으로 Key를 구성하였을 때 같은 단지코드라도 다른 key값이 발생됨 

key 값마다 예상 등록 차량수를 예측하면 좋겠지만 훈련 데이터의 동록 차량수는 총합만 기록되어 있음 

"임대건물구분", "공금유형", "자격유형"은 제외해야 하나?

우선은 신경쓰지 말자. 나중에 고민

<h1> Test Data, Test Data <br> 
<h3> 주차 면수와 나머지 변수의 상관관계

<h1> Step 2

<h2> 면적 처리 & 코드 그룹

In [119]:
df_train.loc[:,'총전용면적'] = df_train.loc[:,'전용면적'] * df_train.loc[:,'전용면적별세대수']

codes = df_train.단지코드.unique()
areas = np.sort(df_train.전용면적.unique())

df_train_edited = pd.DataFrame()
columns = ['단지코드', '등록차량수', '총세대수', '지역', '공가수','지하철', '버스', '단지내주차면수']

for order, code in enumerate(codes):
    temp_by_code = df_train.loc[df_train.단지코드==code].reset_index(drop=True)
    ## 원 계열 값 그냥 가져오기 
    df_train_edited.loc[order, columns] = temp_by_code.loc[0, columns]             
    df_train_edited.loc[order, "총임대가구수"] = temp_by_code.전용면적별세대수.sum()

    for area in areas:
        temp_by_code_areas = temp_by_code.loc[temp_by_code.전용면적==area].reset_index(drop=True)

        if temp_by_code_areas.shape[0] !=0:
            df_train_edited.loc[order, f'면적_{int(area)}'] = temp_by_code_areas.전용면적별세대수.sum() / temp_by_code_areas.총세대수[0]
        else:
            df_train_edited.loc[order, f'면적_{int(area)}'] = 0


df_train_edited["임대비율"] = df_train_edited.총임대가구수 / df_train_edited.총세대수
df_train_edited["가구당주차면수"] = df_train_edited.단지내주차면수 / df_train_edited.총세대수

## gender 정보 병합 
df_train_edited = pd.merge(df_train_edited, df_gender, left_on= [ "지역"], right_on= ["지역"], how='left')


df_train_edited

Unnamed: 0,단지코드,등록차량수,총세대수,지역,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만(여자),10대미만(남자),10대(여자),10대(남자),20대(여자),20대(남자),30대(여자),30대(남자),40대(여자),40대(남자),50대(여자),50대(남자),60대(여자),60대(남자),70대(여자),70대(남자),80대(여자),80대(남자),90대(여자),90대(남자),100대(여자),100대(남자)
0,C2483,1015.0,900.0,3.0,38.0,0.0,3.0,1425.0,900.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.000000,0.738889,0.000000,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014
1,C2515,205.0,545.0,2.0,17.0,0.0,3.0,624.0,545.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.000000,0.242202,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010
2,C1407,1064.0,1216.0,6.0,13.0,1.0,1.0,1285.0,1216.0,0.000000,0.0,0.0,0.000000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.000000,0.101974,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066
3,C1945,730.0,755.0,1.0,6.0,1.0,3.0,734.0,755.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.000000,0.401325,0.000000,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067
4,C1470,553.0,696.0,11.0,14.0,0.0,2.0,645.0,696.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.000000,0.353448,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,C2586,57.0,90.0,12.0,7.0,0.0,3.0,66.0,90.0,0.133333,0.0,0.0,0.466667,0.000000,0.000000,0.000000,0.400000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.733333,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013
411,C2035,246.0,492.0,0.0,24.0,0.0,1.0,521.0,492.0,0.000000,0.0,0.0,0.000000,0.317073,0.000000,0.000000,0.317073,0.000000,0.0,0.365854,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.058943,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017
412,C2020,19.0,40.0,7.0,7.0,1.0,2.0,25.0,40.0,0.500000,0.0,0.0,0.125000,0.000000,0.000000,0.000000,0.375000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.625000,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028
413,C2437,16.0,90.0,14.0,12.0,0.0,1.0,30.0,90.0,0.000000,0.0,0.0,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333333,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123


In [17]:
df_test.loc[:,'총전용면적'] = df_test.loc[:,'전용면적'] * df_test.loc[:,'전용면적별세대수']

codes = df_test.단지코드.unique()
areas = np.sort(df_test.전용면적.unique())

df_test_edited = pd.DataFrame()
columns = ['단지코드', '총세대수', '지역', '공가수','지하철', '버스', '단지내주차면수']

for order, code in enumerate(codes):
    temp_by_code = df_test.loc[df_test.단지코드==code].reset_index(drop=True)
    ## 원 계열 값 그냥 가져오기 
    df_test_edited.loc[order, columns] = temp_by_code.loc[0, columns]             
    df_test_edited.loc[order, "총임대가구수"] = temp_by_code.전용면적별세대수.sum()

    for area in areas:
        temp_by_code_areas = temp_by_code.loc[temp_by_code.전용면적==area].reset_index(drop=True)

        if temp_by_code_areas.shape[0] !=0:
            df_test_edited.loc[order, f'면적_{int(area)}'] = temp_by_code_areas.전용면적별세대수.sum() / temp_by_code_areas.총세대수[0]
        else:
            df_test_edited.loc[order, f'면적_{int(area)}'] = 0


df_test_edited["임대비율"] = df_test_edited.총임대가구수 / df_test_edited.총세대수
df_test_edited["가구당주차면수"] = df_test_edited.단지내주차면수 / df_test_edited.총세대수
## gender 정보 병합 
df_test_edited = pd.merge(df_test_edited, df_gender, left_on= [ "지역"], right_on= ["지역"], how='left')


df_test_edited

Unnamed: 0,단지코드,총세대수,지역,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_72,면적_75,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만(여자),10대미만(남자),10대(여자),10대(남자),20대(여자),20대(남자),30대(여자),30대(남자),40대(여자),40대(남자),50대(여자),50대(남자),60대(여자),60대(남자),70대(여자),70대(남자),80대(여자),80대(남자),90대(여자),90대(남자),100대(여자),100대(남자)
0,C1072,754.0,1.0,14.0,0.0,2.0,683.0,754.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.153846,0.0,0.347480,0.0,0.498674,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.905836,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067
1,C1128,1354.0,1.0,9.0,0.0,3.0,1216.0,1354.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.310192,0.0,0.262925,0.0,0.426883,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.898080,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067
2,C1456,619.0,7.0,18.0,0.0,16.0,547.0,619.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.132472,0.000000,0.266559,0.0,0.316640,0.0,0.213247,0.0,0.071082,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.883683,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028
3,C1840,593.0,11.0,7.0,0.0,3.0,543.0,593.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.564924,0.0,0.293423,0.0,0.141653,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.915683,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464
4,C1332,1297.0,1.0,11.0,0.0,2.0,1112.0,1297.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.327679,0.0,0.437934,0.0,0.234387,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.857363,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,C2456,349.0,12.0,17.0,0.0,4.0,270.0,346.0,0.000000,0.0,0.000000,0.320917,0.000000,0.0,0.000000,0.395415,0.000000,0.0,0.275072,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.991404,0.773639,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013
146,C1266,596.0,14.0,35.0,0.0,1.0,593.0,591.0,0.000000,0.0,0.000000,0.302013,0.000000,0.0,0.000000,0.510067,0.000000,0.0,0.179530,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.991611,0.994966,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123
147,C2152,120.0,0.0,9.0,0.0,1.0,40.0,120.0,0.000000,0.0,0.000000,0.550000,0.000000,0.0,0.450000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.333333,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017
148,C1267,675.0,2.0,38.0,0.0,1.0,467.0,670.0,0.074074,0.0,0.162963,0.302222,0.000000,0.0,0.000000,0.186667,0.000000,0.0,0.266667,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.992593,0.691852,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010


# Features 추가 (numeric)

In [285]:
data = pd.concat([df_train,df_test],axis = 0).reset_index(drop = True)

In [313]:
data[data.지역=='세종특별자치시']

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type,비율,총세대수비율,비율2
2370,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.39,2,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0,,1.033113,1.541722,0.967949
2371,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.87,48,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0,,1.033113,1.541722,0.967949
2372,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.87,241,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0,,1.033113,1.541722,0.967949
2373,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.96,3,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0,,1.033113,1.541722,0.967949
2374,C2051,1164,아파트,세종특별자치시,공공임대(10년),65.39,24,0.0,A,39000000.0,480000.0,0.0,3.0,755.0,780.0,,1.033113,1.541722,0.967949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,C1229,453,아파트,세종특별자치시,행복주택,38.20,68,23.0,J,30101000.0,120400.0,0.0,2.0,317.0,,test,,1.429022,
3810,C1229,453,아파트,세종특별자치시,행복주택,38.20,8,23.0,J,30101000.0,120400.0,0.0,2.0,317.0,,test,,1.429022,
3811,C1229,453,아파트,세종특별자치시,행복주택,38.25,3,23.0,J,30101000.0,120400.0,0.0,2.0,317.0,,test,,1.429022,
3812,C1229,453,아파트,세종특별자치시,행복주택,38.28,4,23.0,J,30101000.0,120400.0,0.0,2.0,317.0,,test,,1.429022,


In [32]:
#양주옥정 7
#http://www.k-apt.go.kr/kaptinfo/openkaptinfo.do
data[data.단지코드=='C1095']
data.loc[2214:2221,'단지내주차면수']=1323
#주차면수 1323대

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
2214,C1095,1256,아파트,경기도,국민임대,29.95,66,37.0,A,11586000.0,151930.0,0.0,3.0,528.0,505.0
2215,C1095,1256,아파트,경기도,국민임대,36.9,36,37.0,A,13663000.0,189090.0,0.0,3.0,528.0,505.0
2216,C1095,1256,아파트,경기도,국민임대,36.98,102,37.0,A,13663000.0,189090.0,0.0,3.0,528.0,505.0
2217,C1095,1256,아파트,경기도,국민임대,36.98,320,37.0,A,13663000.0,189090.0,0.0,3.0,528.0,505.0
2218,C1095,1256,아파트,경기도,국민임대,46.94,178,37.0,A,25140000.0,240470.0,0.0,3.0,528.0,505.0
2219,C1095,1256,아파트,경기도,국민임대,46.96,240,37.0,A,25140000.0,240470.0,0.0,3.0,528.0,505.0
2220,C1095,1256,아파트,경기도,국민임대,51.7,202,37.0,A,30605000.0,262330.0,0.0,3.0,528.0,505.0
2221,C1095,1256,아파트,경기도,국민임대,59.94,112,37.0,A,38256000.0,318070.0,0.0,3.0,528.0,505.0


In [33]:
#세종 새롬동 새뜸마을7단지
#http://www.k-apt.go.kr/kaptinfo/openkaptinfo.do
data[data.단지코드=='C2051']
data.loc[2370:2392,'단지내주차면수']=1425
1425대

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
2370,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.39,2,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0
2371,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.87,48,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0
2372,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.87,241,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0
2373,C2051,1164,아파트,세종특별자치시,공공임대(10년),59.96,3,0.0,A,39000000.0,440000.0,0.0,3.0,755.0,780.0
2374,C2051,1164,아파트,세종특별자치시,공공임대(10년),65.39,24,0.0,A,39000000.0,480000.0,0.0,3.0,755.0,780.0
2375,C2051,1164,아파트,세종특별자치시,공공임대(10년),72.82,64,0.0,A,49000000.0,520000.0,0.0,3.0,755.0,780.0
2376,C2051,1164,아파트,세종특별자치시,공공임대(10년),74.55,267,0.0,A,49000000.0,535000.0,0.0,3.0,755.0,780.0
2377,C2051,1164,아파트,세종특별자치시,공공임대(10년),79.61,7,0.0,A,55000000.0,570000.0,0.0,3.0,755.0,780.0
2378,C2051,1164,아파트,세종특별자치시,공공임대(10년),79.74,35,0.0,A,55000000.0,570000.0,0.0,3.0,755.0,780.0
2379,C2051,1164,아파트,세종특별자치시,공공임대(10년),79.93,3,0.0,A,55000000.0,570000.0,0.0,3.0,755.0,780.0


In [230]:
data[data.단지코드=='C1218']

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type
2781,C1218,1048,아파트,경기도,행복주택,16.45,336,28.0,J,41200000.0,164800.0,0.0,6.0,1140.0,921.0,
2782,C1218,1048,아파트,경기도,행복주택,26.52,180,28.0,J,64400000.0,257600.0,0.0,6.0,1140.0,921.0,
2783,C1218,1048,아파트,경기도,행복주택,36.37,524,28.0,J,86800000.0,347200.0,0.0,6.0,1140.0,921.0,


In [42]:
#성남고등1
data[data.단지코드=='C1218']
data.loc[2781:2783,'총세대수']=1520
data=data.append({'단지코드' : 'C1218' , '총세대수' : 1520, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : 3,
             '전용면적': 26.52, '전용면적별세대수' : 286, '공가수' : 28.0, '자격유형': 0, '임대보증금' : 15596000,
             '임대료' : 142000, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':0,
             '단지내주차면수' : 1140.0,'등록차량수' : 921.0} , ignore_index=True)
data=data.append({'단지코드' : 'C1218' , '총세대수' : 1520, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : '국민,
             '전용면적': 44.15, '전용면적별세대수' : 194, '공가수' : 28.0, '자격유형': 'A', '임대보증금' : 42583000,
             '임대료' : 242000, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':0,
             '단지내주차면수' : 1140.0,'등록차량수' : 921.0} , ignore_index=True)






In [73]:
#평택고덕 ca1,ca2블록 
#http://www.k-apt.go.kr/kaptinfo/openkaptinfo.do

data[data.단지코드=='C1894']
data.loc[2814:2821,'총세대수']=594
data.loc[2814:2821,['총세대수','단지내주차면수']]=[594,427]
data=data.append({'단지코드' : 'C1894' , '총세대수' : 594, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : 7,
             '전용면적': 16.77, '전용면적별세대수' : 134, '공가수' : 13.0, '자격유형': 9, '임대보증금' : 18915000.0,
             '임대료' : 75660.0, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':4,
             '단지내주차면수' : 427.0,'등록차량수' : 419.0} , ignore_index=True)
data=data.append({'단지코드' : 'C1894' , '총세대수' : 594, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : 7,
             '전용면적': 23.92, '전용면적별세대수' : 60, '공가수' : 13.0, '자격유형': 9, '임대보증금' : 26316000.0,
             '임대료' : 105260.0, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':4,
             '단지내주차면수' : 427.0,'등록차량수' : 419.0} , ignore_index=True)
data=data.append({'단지코드' : 'C1894' , '총세대수' : 594, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : '행복주택,
             '전용면적': 30.10, '전용면적별세대수' : 8, '공가수' : 13.0, '자격유형': 'J', '임대보증금' : 33307000.0,
             '임대료' : 133220.0, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':4,
             '단지내주차면수' : 427.0,'등록차량수' : 419.0} , ignore_index=True)
data=data.append({'단지코드' : 'C1894' , '총세대수' : 594, '임대건물구분' : '아파트','지역' : '경기도', '공급유형' : '행복주택',
             '전용면적': 36.87, '전용면적별세대수' : 96, '공가수' : 13.0, '자격유형': 'J', '임대보증금' : 38652000.0,
             '임대료' : 154610.0, '도보 10분거리 내 지하철역 수(환승노선 수 반영)' : 0, '도보 10분거리 내 버스정류장 수':4,
             '단지내주차면수' : 427.0,'등록차량수' : 419.0} , ignore_index=True)






In [77]:
#경북 포항시 양덕동 장량휴먼시아 1단지
#http://www.k-apt.go.kr/kaptinfo/openkaptinfo.do
data[data.단지코드=='C2483']
data.loc[0:7,'단지내주차면수']=813



In [78]:
#울산 울주군 구영주공2단지
#http://www.k-apt.go.kr/kaptinfo/openkaptinfo.do
data[data.단지코드=='C1502']
data.loc[1516:1517,'단지내주차면수']=459

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
1516,C1502,407,아파트,울산광역시,국민임대,46.9,241,7.0,A,19895000.0,160400.0,0.0,5.0,552.0,438.0
1517,C1502,407,아파트,울산광역시,국민임대,46.9,166,7.0,A,19895000.0,160400.0,0.0,5.0,552.0,438.0


In [80]:
#전라남도 목포시 관해로5번길 29 포미LH 3단지
data[data.단지코드=='C1988']
data.loc[1224:1227,'단지내주차면수']=395




Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
1224,C1988,475,아파트,전라남도,국민임대,36.63,200,12.0,A,12026000.0,87940.0,0.0,2.0,722.0,402.0
1225,C1988,475,아파트,전라남도,국민임대,36.63,43,12.0,A,12026000.0,87940.0,0.0,2.0,722.0,402.0
1226,C1988,475,아파트,전라남도,국민임대,46.22,204,12.0,A,15304000.0,103850.0,0.0,2.0,722.0,402.0
1227,C1988,475,아파트,전라남도,국민임대,46.22,28,12.0,A,15304000.0,103850.0,0.0,2.0,722.0,402.0


In [22]:
data.loc[2316:2317,'단지코드']='C1649'

In [424]:
#C2431 C1649 동일단지
data.loc[2316:2317,'단지코드']='C1649'
data.loc[2259:2262,['총세대수','등록차량수']]=[1047,1214]
data.loc[2316:2317,['총세대수','등록차량수']]=[1047,1214]


In [426]:
#C1804 파주윤정가람5단지
data.loc[2028:2031,['총세대수','단지내주차면수']]=[821,991]

In [427]:
#C1036 , C2675 동일단지
data.loc[1497:1503,['총세대수','등록차량수']]=[1254,1279]
data.loc[3463:3470,'총세대수']=1279

In [92]:
#충청북도 진천군 덕산읍 연미로 131 천년나무 7단지
#https://www.myhome.go.kr/hws/portal/sch/selectRsdtSummaryView.do?pblancId=9229
data[data.단지코드=='C1490']
data.loc[2120,'전용면적별세대수']=105
data.loc[2121,'전용면적별세대수']=84
data.loc[2122,'전용면적별세대수']=472
data.loc[2123,'전용면적별세대수']=88

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
2120,C1490,749,아파트,충청북도,공공임대(10년),74.42,105,24.0,A,26775000.0,378000.0,0.0,3.0,759.0,899.0
2121,C1490,749,아파트,충청북도,공공임대(10년),74.92,75,24.0,A,26775000.0,378000.0,0.0,3.0,759.0,899.0
2122,C1490,749,아파트,충청북도,공공임대(10년),84.53,415,24.0,A,35910000.0,409500.0,0.0,3.0,759.0,899.0
2123,C1490,749,아파트,충청북도,공공임대(10년),84.86,79,24.0,A,35910000.0,409500.0,0.0,3.0,759.0,899.0


In [95]:
#경상북도 김천시 용전3로 10 천년나무 3단지
data[data.단지코드=='C2497']
data.loc[3613,'전용면적별세대수']=114
data.loc[3614,'전용면적별세대수']=60
data.loc[3615,'전용면적별세대수']=275
data.loc[3616,'전용면적별세대수']=39

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
3613,C2497,488,아파트,경상북도,공공임대(10년),59.95,89,37.0,A,31000000.0,416000.0,0.0,6.0,624.0,
3614,C2497,488,아파트,경상북도,공공임대(10년),59.99,41,37.0,A,31000000.0,416000.0,0.0,6.0,624.0,
3615,C2497,488,아파트,경상북도,공공임대(10년),74.9,215,37.0,A,35800000.0,550000.0,0.0,6.0,624.0,
3616,C2497,488,아파트,경상북도,공공임대(10년),74.98,21,37.0,A,35800000.0,550000.0,0.0,6.0,624.0,


In [100]:
#전라남도 나주시 그린로 154 빛가람LH 3단지 B8블록
data[data.단지코드=='C2620']
data.loc[2130,'전용면적별세대수']=227
data.loc[2131,'전용면적별세대수']=60
data.loc[2132,'전용면적별세대수']=316
data.loc[2133,'전용면적별세대수']=266
data.loc[2134,'전용면적별세대수']=160

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수
2130,C2620,1029,아파트,전라남도,공공임대(10년),59.89,53,28.0,A,26225000.0,450000.0,0.0,6.0,1061.0,1709.0
2131,C2620,1029,아파트,전라남도,공공임대(10년),59.9,197,28.0,A,26225000.0,450000.0,0.0,6.0,1061.0,1709.0
2132,C2620,1029,아파트,전라남도,공공임대(10년),74.91,271,28.0,A,27274000.0,535000.0,0.0,6.0,1061.0,1709.0
2133,C2620,1029,아파트,전라남도,공공임대(10년),84.72,146,28.0,A,37764000.0,590000.0,0.0,6.0,1061.0,1709.0
2134,C2620,1029,아파트,전라남도,공공임대(10년),84.96,239,28.0,A,37764000.0,590000.0,0.0,6.0,1061.0,1709.0


In [None]:
#train[train.단지코드 == 'C1740']
#김천 LH천년나무2단지아파트
train.loc[2174, '전용면적별세대수'] = 305
train.loc[2175, '전용면적별세대수'] = 126
train.loc[2176, '전용면적별세대수'] = 184
train.loc[2177, '전용면적별세대수'] = 168

#train[train.단지코드 == 'C2405']
#경남혁신도시LH5단지아파트
train.loc[2222, '전용면적별세대수'] = 100
train.loc[2223, '전용면적별세대수'] = 100
train.loc[2224, '전용면적별세대수'] = 400

#train[train.단지코드 == 'C1804']
#경기도 남양주시 별내동 808 별가람마을LH1-4단지
train.loc[2039:2042, '단지내주차면수'] = 681
train.loc[2039, '전용면적별세대수'] = 89
train.loc[2040, '전용면적별세대수'] = 279
train.loc[2041, '전용면적별세대수'] = 82
train.loc[2042, '전용면적별세대수'] = 28

In [304]:
data[data.단지코드=='C1350']

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type,비율,총세대수비율,비율2
2275,C1350,1401,아파트,대전광역시,공공분양,74.94,317,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2276,C1350,1401,아파트,대전광역시,공공분양,74.94,137,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2277,C1350,1401,아파트,대전광역시,공공분양,74.94,22,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2278,C1350,1401,아파트,대전광역시,공공분양,84.94,164,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2279,C1350,1401,아파트,대전광역시,공공분양,84.94,19,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2280,C1350,1401,아파트,대전광역시,공공분양,84.96,26,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2281,C1350,1401,아파트,대전광역시,공공분양,84.97,26,2.0,D,0.0,0.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2282,C1350,1401,아파트,대전광역시,공공임대(10년),51.99,106,2.0,A,28013000.0,408600.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2283,C1350,1401,아파트,대전광역시,공공임대(10년),59.91,13,2.0,A,37474000.0,519350.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695
2284,C1350,1401,아파트,대전광역시,공공임대(10년),59.92,223,2.0,A,37385000.0,516130.0,,6.0,1636.0,2315.0,,1.415037,0.856357,0.706695


In [310]:
data[data.임대료==0]

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type,비율,총세대수비율,비율2
140,C2416,560,상가,경상남도,임대상가,317.17,1,0.0,D,2000000.0,0.0,0.0,5.0,154.0,63.0,,0.409091,3.636364,2.444444
249,C2038,785,상가,대전광역시,임대상가,22.95,1,7.0,D,0.0,0.0,1.0,10.0,150.0,108.0,,0.72,5.233333,1.388889
266,C1859,2424,상가,대전광역시,임대상가,38.0,1,1.0,D,99050000.0,0.0,1.0,4.0,487.0,116.0,,0.238193,4.977413,4.198276
291,C1859,2424,상가,대전광역시,임대상가,39.33,1,1.0,D,88635000.0,0.0,1.0,4.0,487.0,116.0,,0.238193,4.977413,4.198276
298,C1859,2424,상가,대전광역시,임대상가,75.98,1,1.0,D,58285000.0,0.0,1.0,4.0,487.0,116.0,,0.238193,4.977413,4.198276
375,C2135,1116,상가,부산광역시,임대상가,21.46,1,10.0,D,5000000.0,0.0,1.0,3.0,262.0,127.0,,0.484733,4.259542,2.062992
395,C2034,1486,상가,부산광역시,임대상가,21.46,1,2.0,D,5000000.0,0.0,1.0,3.0,405.0,132.0,,0.325926,3.669136,3.068182
505,C2310,1988,상가,부산광역시,임대상가,37.41,1,1.0,D,0.0,0.0,0.0,2.0,217.0,189.0,,0.870968,9.16129,1.148148
506,C2310,1988,상가,부산광역시,임대상가,37.49,1,1.0,D,0.0,0.0,0.0,2.0,217.0,189.0,,0.870968,9.16129,1.148148
767,C1004,521,상가,충청남도,임대상가,22.95,1,3.0,D,0.0,0.0,,2.0,153.0,93.0,,0.607843,3.405229,1.645161


In [308]:

data[data.단지코드=='C2470']
230세대
공공분양
임대료 보증금 0원으로<<



Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type,비율,총세대수비율,비율2
2524,C2470,636,아파트,대전광역시,공공임대(5년),39.9,96,1.0,A,23000000.0,340000.0,1.0,2.0,772.0,887.0,,1.148964,0.823834,0.870349
2525,C2470,636,아파트,대전광역시,공공임대(5년),46.81,264,1.0,A,26000000.0,380000.0,1.0,2.0,772.0,887.0,,1.148964,0.823834,0.870349
2526,C2470,636,아파트,대전광역시,공공임대(5년),59.95,46,1.0,A,32000000.0,480000.0,1.0,2.0,772.0,887.0,,1.148964,0.823834,0.870349


In [307]:
data[data.단지코드=='C1206']

478 267 745 755 1988 200대 

Unnamed: 0,단지코드,총세대수,임대건물구분,지역,공급유형,전용면적,전용면적별세대수,공가수,자격유형,임대보증금,임대료,도보 10분거리 내 지하철역 수(환승노선 수 반영),도보 10분거리 내 버스정류장 수,단지내주차면수,등록차량수,type,비율,총세대수비율,비율2
643,C1206,755,아파트,강원도,영구임대,26.37,239,1.0,C,3141000.0,69900.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
644,C1206,755,아파트,강원도,영구임대,31.32,239,1.0,C,3731000.0,83020.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
645,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,13180000.0,311000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
646,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,13180000.0,311000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
647,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,13180000.0,311000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
648,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,13180000.0,311000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
649,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,13180000.0,311000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
650,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,6120000.0,151000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
651,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,6120000.0,151000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778
652,C1206,755,상가,강원도,임대상가,31.84,1,1.0,D,6120000.0,151000.0,0.0,4.0,88.0,90.0,,1.022727,8.579545,0.977778


In [498]:
features = pd.concat([df_train_edited, df_test_edited],axis = 0).reset_index(drop = True).fillna(0)

In [499]:
tr=data.drop_duplicates(['단지코드', '임대건물구분']).assign(counter=1).pivot(index='단지코드', columns='임대건물구분', values='counter').fillna(0)

features= pd.merge(features,tr,on='단지코드')
features.rename(columns={0 : "상가", 1 : "아파트"}, inplace=True)

features=pd.concat([features, features.상가+features.아파트], axis=1)
features['아파트_상가']=features[0].apply(lambda x: 1 if x == 2 else 0)
features.drop(0,axis=1,inplace=True)

In [500]:
tr=pd.pivot_table(index='단지코드',columns='임대건물구분',values='지역',aggfunc=np.size, fill_value=0, data=data[['단지코드','임대건물구분','지역']]).reset_index()


features = pd.merge(features, tr, on = '단지코드')
features.rename(columns={0 : "아파트합", 1 : "상가합"}, inplace=True)


In [501]:
train = pd.read_csv('new_train_processing.csv')#.query(' 단지코드 not in @train_error')
train = train.drop(train[train.지역 == "서울특별시"].index,axis = 0)
test = pd.read_csv('new_test_processing.csv')
row = pd.concat([train, test], axis = 0).reset_index(drop = True)

row.loc[(row.자격유형.isnull()) & (row.단지코드 == "C2411"), '자격유형'] = 'A'
row.loc[(row.자격유형.isnull()) & (row.단지코드 == "C2253"), '자격유형'] = 'C'

In [502]:
tr =row.groupby("단지코드")["지역"].agg([("지역","first")])
tr = pd.get_dummies(tr["지역"]).reset_index()
features = pd.merge(features, tr, on = '단지코드')

In [503]:
f_train = row.drop_duplicates(['단지코드', '공급유형']).assign(counter=1).pivot(index='단지코드', columns='공급유형', values='counter').fillna(0)

features = pd.merge(features, f_train, on = '단지코드')



In [504]:
tr=pd.pivot_table(index='단지코드',columns='공급유형',values='지역',aggfunc=np.size, fill_value=0, data=row[['단지코드','공급유형','지역']]).reset_index()

features = pd.merge(features, tr, on = '단지코드')

In [505]:
tr = row.drop_duplicates(['단지코드', '자격유형']).assign(counter=1).pivot(index='단지코드', columns='자격유형', values='counter').fillna(0)


features = pd.merge(features, tr, on = '단지코드')


In [506]:
tr=pd.pivot_table(index='단지코드',columns='자격유형',values='지역',aggfunc=np.size, fill_value=0, data=row[['단지코드','자격유형','지역']]).reset_index()

features = pd.merge(features, tr, on = '단지코드')

In [507]:
tr = data.groupby('단지코드').agg({
    '임대료': [('임대료_최소', 'min'),('임대료_평균', 'mean'),('임대료_최대', 'max'),('임대료_총합','sum')]
}).reset_index()
features = pd.merge(features, tr, on = '단지코드')



merging between different levels can give an unintended result (1 levels on the left,2 on the right)


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [508]:
tr = data.groupby('단지코드').agg({
    '임대보증금': [('임대보증금_최소', 'min'),('임대보증금_평균', 'mean'),('임대보증금_최대', 'max'),('임대보증금_총합','sum')]
}).reset_index()

features = pd.merge(features, tr, on = '단지코드')


In [509]:
f_train = data.groupby('단지코드')['총전용면적'].sum().reset_index()
features = pd.merge(features, f_train, on = '단지코드')


In [510]:
#총면적 / 총세대수
#총면적 / 주차면수
features['면적당 주차면수']=features.단지내주차면수/features.총전용면적
features['면적당 세대수']=features.총세대수/features.총전용면적

features['면적당 세대수_2']=features.총전용면적/features.단지내주차면수
features['면적당 세대수_2']=features.총전용면적/features.총세대수

In [511]:
features['세대수_주차면수']=features.총세대수/features.단지내주차면수
features['지하철_주차면수']=features.지하철/features.단지내주차면수
features['버스_주차면수']=features.버스/features.단지내주차면수



In [512]:
features['공가수_주차면수']=features.단지내주차면수-features.공가수
features['공가수_총세대수']=features.총세대수/features.공가수
features['주차면수_공가수']=features.단지내주차면수/features.공가수
features["공가수_세대수"] = features["총세대수"] - features["공가수"]


In [513]:
features["30~60대남자비율"] = features["30대(남자)"]  + features["40대(남자)"]  + features["50대(남자)"]  + features["60대(남자)"] 
features["30~60대여자비율"] = features["30대(여자)"]  + features["40대(여자)"]  + features["50대(여자)"]  + features["60대(여자)"]

In [514]:
for i in np.arange(0.1,1,0.1):
    print(features.공가수_주차면수.quantile(q=i))

177.00000000000006
261.6
347.0
415.20000000000005
504.0
590.4
699.3999999999999
834.2
1050.6


In [515]:
# 10분위별로 (1 ~ 10 범위)
def fnc(m) : 
    if m < 177.00000000000006:
        return 1
    elif m <261.6 :
        return 2 
    elif m < 347.0 :
        return 3 
    elif m < 415.20000000000005:
        return 4 
    elif m < 504.0 :
        return 5 
    elif m < 590.4 :
        return 6 
    elif m < 699.3999999999999:
        return 7 
    elif m < 834.2 :
        return 8
    elif m < 1050.6 :
        return 9 
    else:
        return 10

features['공가수_주차면수10분위']= features.공가수_주차면수.apply(lambda x : fnc(x))

In [516]:
for i in np.arange(0.1,1,0.1):
    print(features.총전용면적.quantile(q=i))

11763.6
15801.000000000002
19318.4
22686.0
25839.0
30196.799999999996
35467.2
42768.00000000002
53350.80000000001


In [517]:
# 10분위별로 (1 ~ 10 범위)
def fnc(m) : 
    if m < 11763.6:
        return 1
    elif m <15801 :
        return 2 
    elif m < 19318.4 :
        return 3 
    elif m < 22686.0:
        return 4 
    elif m < 25839.0 :
        return 5 
    elif m < 30196.799999999996 :
        return 6 
    elif m < 35467.2:
        return 7 
    elif m < 42768.00000000002 :
        return 8
    elif m < 53350.80000000001 :
        return 9 
    else:
        return 10

features['총전용면적 10분위']= features.총전용면적.apply(lambda x : fnc(x))

In [518]:
features=features.rename(columns = lambda x:re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]+', '', str(x)))

In [519]:
for i in np.arange(0.1,1,0.1):
    print(features['임대보증금 임대보증금_평균'].quantile(q=i))

12230850.0
13946120.634920634
15764800.0
17741900.0
20148333.333333332
22562400.0
25943799.999999996
31995323.333333336
40052600.0


In [520]:
# 10분위별로 (1 ~ 10 범위)
def fnc(m) : 
    if m < 12230850.0:
        return 1
    elif m <13946120.634920634 :
        return 2 
    elif m < 15764800.0 :
        return 3 
    elif m < 17741900.0 :
        return 4 
    elif m < 20148333.333333332 :
        return 5 
    elif m <  22562400.0:
        return 6 
    elif m < 25943799.999999996:
        return 7 
    elif m < 31995323.333333336 :
        return 8
    elif m < 40052600.0 :
        return 9 
    else:
        return 10

features['임대보증금 10분위']= features['임대보증금 임대보증금_평균'].apply(lambda x : fnc(x))

In [521]:
for i in np.arange(0.1,1,0.1):
    print(features['임대료 임대료_평균'].quantile(q=i))

99352.66666666667
115767.00000000001
129745.15151515152
141036.0
164990.0
184276.3333333333
207534.9714285714
241008.33333333346
292372.0


In [522]:
# 10분위별로 (1 ~ 10 범위)
def fnc(m) : 
    if m < 99352.66666666667:
        return 1
    elif m <115767.00000000001 :
        return 2 
    elif m < 129745.15151515152 :
        return 3 
    elif m < 141036.0 :
        return 4 
    elif m < 164990.0 :
        return 5 
    elif m <  184276.3333333333:
        return 6 
    elif m < 207534.9714285714:
        return 7 
    elif m < 241008.33333333346 :
        return 8
    elif m < 292372.0 :
        return 9 
    else:
        return 10

features['임대료 10분위']= features['임대료 임대료_평균'].apply(lambda x : fnc(x))

In [454]:
features

Unnamed: 0,단지코드,등록차량수,총세대수,지역,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C2483,1015.0,900.0,3.0,38.0,0.0,3.0,1425.0,900.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.0,0.738889,0.0,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014,0.0,1.0,0,0,8,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,0.0,0.0,0.0,8,0,0,0,0,103680.0,167910.000000,214270.0,1343280.0,15667000.0,2.477638e+07,30357000.0,198211000.0,44628.0,0.031931,0.020167,49.586667,0.631579,0.000000,0.002105,1387.0,23.684211,37.500000,862.0,0.258220,0.306926,10,9,7,6
1,C2515,205.0,545.0,2.0,17.0,0.0,3.0,624.0,545.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.0,0.242202,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,7,0,0,0,1.0,0.0,0.0,0.0,0.0,7,0,0,0,0,82940.0,139557.142857,190090.0,976900.0,9216000.0,1.678714e+07,23042000.0,117510000.0,21525.0,0.028990,0.025319,39.495413,0.873397,0.000000,0.004808,607.0,32.058824,36.705882,528.0,0.267281,0.307580,7,4,4,4
2,C1407,1064.0,1216.0,6.0,13.0,1.0,1.0,1285.0,1216.0,0.000000,0.0,0.000000,0.000000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.0,0.101974,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066,0.0,1.0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,11,0,0,0,1.0,0.0,0.0,0.0,0.0,11,0,0,0,0,127350.0,176730.000000,253520.0,1944030.0,15620000.0,2.851055e+07,42056000.0,313616000.0,47442.0,0.027086,0.025631,39.014803,0.946304,0.000778,0.000778,1272.0,93.538462,98.846154,1203.0,0.273852,0.317539,10,9,8,6
3,C1945,730.0,755.0,1.0,6.0,1.0,3.0,734.0,755.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.0,0.401325,0.0,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,6,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,6,0,0,130070.0,189106.666667,244020.0,1134640.0,15607000.0,2.265167e+07,29209000.0,135910000.0,36897.0,0.019893,0.020462,48.870199,1.028610,0.001362,0.004087,728.0,125.833333,122.333333,749.0,0.270337,0.307120,8,8,7,7
4,C1470,553.0,696.0,11.0,14.0,0.0,2.0,645.0,696.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.0,0.353448,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,102130.0,120150.000000,144180.0,480600.0,12015000.0,1.772275e+07,25233000.0,70891000.0,30510.0,0.021141,0.022812,43.836207,1.079070,0.000000,0.003101,631.0,49.714286,46.071429,682.0,0.269341,0.287767,7,7,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,C2456,0.0,349.0,12.0,17.0,0.0,4.0,270.0,346.0,0.000000,0.0,0.000000,0.320917,0.000000,0.000000,0.000000,0.395415,0.000000,0.0,0.275072,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991404,0.773639,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,4,0,0,117000.0,155082.500000,217100.0,620330.0,6992000.0,1.222875e+07,21246000.0,48915000.0,11976.0,0.022545,0.029142,34.315186,1.292593,0.000000,0.014815,253.0,20.529412,15.882353,332.0,0.255665,0.287076,2,2,1,5
560,C1266,0.0,596.0,14.0,35.0,0.0,1.0,593.0,591.0,0.000000,0.0,0.000000,0.302013,0.000000,0.000000,0.000000,0.510067,0.000000,0.0,0.179530,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991611,0.994966,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,5,0,0,149910.0,199878.000000,274570.0,999390.0,8084000.0,1.435520e+07,24190000.0,71776000.0,20079.0,0.029533,0.029683,33.689597,1.005059,0.000000,0.001686,558.0,17.028571,16.942857,561.0,0.267580,0.293533,6,4,3,7
561,C2152,0.0,120.0,0.0,9.0,0.0,1.0,40.0,120.0,0.000000,0.0,0.000000,0.550000,0.000000,0.000000,0.450000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.333333,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,2,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,2,0,42350.0,50040.000000,57730.0,100080.0,2129000.0,2.515500e+06,2902000.0,5031000.0,3366.0,0.011884,0.035651,28.050000,3.000000,0.000000,0.025000,31.0,13.333333,4.444444,111.0,0.264515,0.311711,1,1,1,1
562,C1267,0.0,675.0,2.0,38.0,0.0,1.0,467.0,670.0,0.074074,0.0,0.162963,0.302222,0.000000,0.000000,0.000000,0.186667,0.000000,0.0,0.266667,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.992593,0.691852,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,1.0,0.0,1.0,0,0,5,2,0,4,0.0,0.0,1.0,1.0,1.0,0,0,5,2,4,52896.0,114454.454545,219230.0,1258999.0,6882000.0,1.481329e+07,25247619.0,162946164.0,20592.0,0.022679,0.032780,30.506667,1.445396,0.000000,0.002141,429.0,17.763158,12.289474,637.0,0.267281,0.307580,5,4,3,2


# features 분할

In [523]:
features.drop('지역',axis=1,inplace=True)

In [524]:
features = features.replace([np.inf, -np.inf], np.nan) # replace 메서드로 np.inf를 None(np.nan)으로 변경
features.fillna(0,inplace=True)
features=features.rename(columns = lambda x:re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]+', '', str(x)))


In [457]:
features

Unnamed: 0,단지코드,등록차량수,총세대수,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C2483,1015.0,900.0,38.0,0.0,3.0,1425.0,900.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.0,0.738889,0.0,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014,0.0,1.0,0,0,8,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,0.0,0.0,0.0,8,0,0,0,0,103680.0,167910.000000,214270.0,1343280.0,15667000.0,2.477638e+07,30357000.0,198211000.0,44628.0,0.031931,0.020167,49.586667,0.631579,0.000000,0.002105,1387.0,23.684211,37.500000,862.0,0.258220,0.306926,10,9,7,6
1,C2515,205.0,545.0,17.0,0.0,3.0,624.0,545.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.0,0.242202,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,7,0,0,0,1.0,0.0,0.0,0.0,0.0,7,0,0,0,0,82940.0,139557.142857,190090.0,976900.0,9216000.0,1.678714e+07,23042000.0,117510000.0,21525.0,0.028990,0.025319,39.495413,0.873397,0.000000,0.004808,607.0,32.058824,36.705882,528.0,0.267281,0.307580,7,4,4,4
2,C1407,1064.0,1216.0,13.0,1.0,1.0,1285.0,1216.0,0.000000,0.0,0.000000,0.000000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.0,0.101974,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066,0.0,1.0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,11,0,0,0,1.0,0.0,0.0,0.0,0.0,11,0,0,0,0,127350.0,176730.000000,253520.0,1944030.0,15620000.0,2.851055e+07,42056000.0,313616000.0,47442.0,0.027086,0.025631,39.014803,0.946304,0.000778,0.000778,1272.0,93.538462,98.846154,1203.0,0.273852,0.317539,10,9,8,6
3,C1945,730.0,755.0,6.0,1.0,3.0,734.0,755.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.0,0.401325,0.0,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,6,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,6,0,0,130070.0,189106.666667,244020.0,1134640.0,15607000.0,2.265167e+07,29209000.0,135910000.0,36897.0,0.019893,0.020462,48.870199,1.028610,0.001362,0.004087,728.0,125.833333,122.333333,749.0,0.270337,0.307120,8,8,7,7
4,C1470,553.0,696.0,14.0,0.0,2.0,645.0,696.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.0,0.353448,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,102130.0,120150.000000,144180.0,480600.0,12015000.0,1.772275e+07,25233000.0,70891000.0,30510.0,0.021141,0.022812,43.836207,1.079070,0.000000,0.003101,631.0,49.714286,46.071429,682.0,0.269341,0.287767,7,7,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,C2456,0.0,349.0,17.0,0.0,4.0,270.0,346.0,0.000000,0.0,0.000000,0.320917,0.000000,0.000000,0.000000,0.395415,0.000000,0.0,0.275072,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991404,0.773639,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,4,0,0,117000.0,155082.500000,217100.0,620330.0,6992000.0,1.222875e+07,21246000.0,48915000.0,11976.0,0.022545,0.029142,34.315186,1.292593,0.000000,0.014815,253.0,20.529412,15.882353,332.0,0.255665,0.287076,2,2,1,5
560,C1266,0.0,596.0,35.0,0.0,1.0,593.0,591.0,0.000000,0.0,0.000000,0.302013,0.000000,0.000000,0.000000,0.510067,0.000000,0.0,0.179530,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991611,0.994966,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,5,0,0,149910.0,199878.000000,274570.0,999390.0,8084000.0,1.435520e+07,24190000.0,71776000.0,20079.0,0.029533,0.029683,33.689597,1.005059,0.000000,0.001686,558.0,17.028571,16.942857,561.0,0.267580,0.293533,6,4,3,7
561,C2152,0.0,120.0,9.0,0.0,1.0,40.0,120.0,0.000000,0.0,0.000000,0.550000,0.000000,0.000000,0.450000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.333333,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,2,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,2,0,42350.0,50040.000000,57730.0,100080.0,2129000.0,2.515500e+06,2902000.0,5031000.0,3366.0,0.011884,0.035651,28.050000,3.000000,0.000000,0.025000,31.0,13.333333,4.444444,111.0,0.264515,0.311711,1,1,1,1
562,C1267,0.0,675.0,38.0,0.0,1.0,467.0,670.0,0.074074,0.0,0.162963,0.302222,0.000000,0.000000,0.000000,0.186667,0.000000,0.0,0.266667,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.992593,0.691852,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,1.0,0.0,1.0,0,0,5,2,0,4,0.0,0.0,1.0,1.0,1.0,0,0,5,2,4,52896.0,114454.454545,219230.0,1258999.0,6882000.0,1.481329e+07,25247619.0,162946164.0,20592.0,0.022679,0.032780,30.506667,1.445396,0.000000,0.002141,429.0,17.763158,12.289474,637.0,0.267281,0.307580,5,4,3,2


In [459]:
df_train_edited

Unnamed: 0,단지코드,등록차량수,총세대수,지역,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만(여자),10대미만(남자),10대(여자),10대(남자),20대(여자),20대(남자),30대(여자),30대(남자),40대(여자),40대(남자),50대(여자),50대(남자),60대(여자),60대(남자),70대(여자),70대(남자),80대(여자),80대(남자),90대(여자),90대(남자),100대(여자),100대(남자)
0,C2483,1015.0,900.0,3.0,38.0,0.0,3.0,1425.0,900.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.000000,0.738889,0.000000,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014
1,C2515,205.0,545.0,2.0,17.0,0.0,3.0,624.0,545.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.000000,0.242202,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010
2,C1407,1064.0,1216.0,6.0,13.0,1.0,1.0,1285.0,1216.0,0.000000,0.0,0.0,0.000000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.000000,0.101974,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066
3,C1945,730.0,755.0,1.0,6.0,1.0,3.0,734.0,755.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.000000,0.401325,0.000000,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067
4,C1470,553.0,696.0,11.0,14.0,0.0,2.0,645.0,696.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.000000,0.353448,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,C2586,57.0,90.0,12.0,7.0,0.0,3.0,66.0,90.0,0.133333,0.0,0.0,0.466667,0.000000,0.000000,0.000000,0.400000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.733333,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013
411,C2035,246.0,492.0,0.0,24.0,0.0,1.0,521.0,492.0,0.000000,0.0,0.0,0.000000,0.317073,0.000000,0.000000,0.317073,0.000000,0.0,0.365854,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.058943,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017
412,C2020,19.0,40.0,7.0,7.0,1.0,2.0,25.0,40.0,0.500000,0.0,0.0,0.125000,0.000000,0.000000,0.000000,0.375000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.625000,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028
413,C2437,16.0,90.0,14.0,12.0,0.0,1.0,30.0,90.0,0.000000,0.0,0.0,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333333,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123


In [525]:
df_train_edited = features.iloc[:415].fillna(0)
df_test_edited = features.iloc[415:].reset_index(drop = True).fillna(0)

In [527]:
df_test_edited

Unnamed: 0,단지코드,등록차량수,총세대수,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C1072,0.0,754.0,14.0,0.0,2.0,683.0,754.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.153846,0.0,0.347480,0.0,0.498674,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.905836,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,1.0,0.0,0.0,1,0,7,0,0,189840.0,265843.750000,296780.0,2126750.0,22830000.0,3.812025e+07,43497000.0,304962000.0,35490.0,0.019245,0.021245,47.068966,1.103953,0.0,0.002928,669.0,53.857143,48.785714,740.0,0.270337,0.307120,7,8,9,9
1,C1128,0.0,1354.0,9.0,0.0,3.0,1216.0,1354.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.310192,0.0,0.262925,0.0,0.426883,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.898080,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,9,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,9,0,0,0,1.0,0.0,1.0,0.0,0.0,4,0,5,0,0,189840.0,245516.666667,296780.0,2209650.0,22830000.0,3.412500e+07,43497000.0,307125000.0,61878.0,0.019652,0.021882,45.700148,1.113487,0.0,0.002467,1207.0,150.444444,135.111111,1345.0,0.270337,0.307120,10,10,9,9
2,C1456,0.0,619.0,18.0,0.0,16.0,547.0,619.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.132472,0.000000,0.266559,0.0,0.316640,0.0,0.213247,0.0,0.071082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.883683,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028,0.0,1.0,0,0,9,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,9,0,0,0,1.0,0.0,0.0,0.0,0.0,9,0,0,0,0,156200.0,223624.444444,312420.0,2012620.0,19706000.0,3.396522e+07,55275000.0,305687000.0,27201.0,0.020110,0.022757,43.943457,1.131627,0.0,0.029250,529.0,34.388889,30.388889,601.0,0.263784,0.301197,6,6,9,8
3,C1840,0.0,593.0,7.0,0.0,3.0,543.0,593.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.564924,0.0,0.293423,0.0,0.141653,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.915683,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,108130.0,123150.000000,144180.0,492600.0,14418000.0,1.892425e+07,25233000.0,75697000.0,25179.0,0.021566,0.023551,42.460371,1.092081,0.0,0.005525,536.0,84.714286,77.571429,586.0,0.269341,0.287767,6,5,5,3
4,C1332,0.0,1297.0,11.0,0.0,2.0,1112.0,1297.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.327679,0.0,0.437934,0.0,0.234387,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.857363,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,8,0,0,203050.0,292880.000000,362870.0,2343040.0,28598000.0,4.133450e+07,51188000.0,330676000.0,57639.0,0.019292,0.022502,44.440247,1.166367,0.0,0.001799,1101.0,117.909091,101.090909,1286.0,0.270337,0.307120,10,10,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,C2456,0.0,349.0,17.0,0.0,4.0,270.0,346.0,0.000000,0.0,0.000000,0.320917,0.000000,0.0,0.000000,0.395415,0.000000,0.0,0.275072,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991404,0.773639,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,4,0,0,117000.0,155082.500000,217100.0,620330.0,6992000.0,1.222875e+07,21246000.0,48915000.0,11976.0,0.022545,0.029142,34.315186,1.292593,0.0,0.014815,253.0,20.529412,15.882353,332.0,0.255665,0.287076,2,2,1,5
146,C1266,0.0,596.0,35.0,0.0,1.0,593.0,591.0,0.000000,0.0,0.000000,0.302013,0.000000,0.0,0.000000,0.510067,0.000000,0.0,0.179530,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991611,0.994966,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,5,0,0,149910.0,199878.000000,274570.0,999390.0,8084000.0,1.435520e+07,24190000.0,71776000.0,20079.0,0.029533,0.029683,33.689597,1.005059,0.0,0.001686,558.0,17.028571,16.942857,561.0,0.267580,0.293533,6,4,3,7
147,C2152,0.0,120.0,9.0,0.0,1.0,40.0,120.0,0.000000,0.0,0.000000,0.550000,0.000000,0.0,0.450000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.333333,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,2,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,2,0,42350.0,50040.000000,57730.0,100080.0,2129000.0,2.515500e+06,2902000.0,5031000.0,3366.0,0.011884,0.035651,28.050000,3.000000,0.0,0.025000,31.0,13.333333,4.444444,111.0,0.264515,0.311711,1,1,1,1
148,C1267,0.0,675.0,38.0,0.0,1.0,467.0,670.0,0.074074,0.0,0.162963,0.302222,0.000000,0.0,0.000000,0.186667,0.000000,0.0,0.266667,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.992593,0.691852,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,1.0,0.0,1.0,0,0,5,2,0,4,0.0,0.0,1.0,1.0,1.0,0,0,5,2,4,52896.0,114454.454545,219230.0,1258999.0,6882000.0,1.481329e+07,25247619.0,162946164.0,20592.0,0.022679,0.032780,30.506667,1.445396,0.0,0.002141,429.0,17.763158,12.289474,637.0,0.267281,0.307580,5,4,3,2


In [147]:
X

Unnamed: 0,단지코드,총세대수,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C2483,900.0,38.0,0.0,3.0,1425.0,900.0,0.0,0.0,0.0,0.000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.000000,0.738889,0.000000,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014,0.0,1.0,0,0,8,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,0.0,0.0,0.0,8,0,0,0,0,103680.0,167910.000000,214270.0,1343280.0,15667000.0,2.477638e+07,30357000.0,198211000.0,44628.0,0.031931,0.020167,49.586667,0.631579,0.000000,0.002105,1387.0,23.684211,37.500000,862.0,0.258220,0.306926,10,9,7,6
1,C2515,545.0,17.0,0.0,3.0,624.0,545.0,0.0,0.0,0.0,0.000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.000000,0.242202,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,7,0,0,0,1.0,0.0,0.0,0.0,0.0,7,0,0,0,0,82940.0,139557.142857,190090.0,976900.0,9216000.0,1.678714e+07,23042000.0,117510000.0,21525.0,0.028990,0.025319,39.495413,0.873397,0.000000,0.004808,607.0,32.058824,36.705882,528.0,0.267281,0.307580,7,4,4,4
2,C1407,1216.0,13.0,1.0,1.0,1285.0,1216.0,0.0,0.0,0.0,0.000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.000000,0.101974,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066,0.0,1.0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,11,0,0,0,1.0,0.0,0.0,0.0,0.0,11,0,0,0,0,127350.0,176730.000000,253520.0,1944030.0,15620000.0,2.851055e+07,42056000.0,313616000.0,47442.0,0.027086,0.025631,39.014803,0.946304,0.000778,0.000778,1272.0,93.538462,98.846154,1203.0,0.273852,0.317539,10,9,8,6
3,C1945,755.0,6.0,1.0,3.0,734.0,755.0,0.0,0.0,0.0,0.000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.000000,0.401325,0.000000,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,6,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,6,0,0,130070.0,189106.666667,244020.0,1134640.0,15607000.0,2.265167e+07,29209000.0,135910000.0,36897.0,0.019893,0.020462,48.870199,1.028610,0.001362,0.004087,728.0,125.833333,122.333333,749.0,0.270337,0.307120,8,8,7,7
4,C1470,696.0,14.0,0.0,2.0,645.0,696.0,0.0,0.0,0.0,0.000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.000000,0.353448,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,102130.0,120150.000000,144180.0,480600.0,12015000.0,1.772275e+07,25233000.0,70891000.0,30510.0,0.021141,0.022812,43.836207,1.079070,0.000000,0.003101,631.0,49.714286,46.071429,682.0,0.269341,0.287767,7,7,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,C2035,492.0,24.0,0.0,1.0,521.0,492.0,0.0,0.0,0.0,0.000,0.317073,0.000000,0.000000,0.317073,0.000000,0.0,0.365854,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.058943,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,1.0,0.0,0.0,0.0,0.0,5,0,0,0,0,117000.0,158800.000000,200000.0,794000.0,10000000.0,1.573000e+07,27650000.0,78650000.0,17928.0,0.029061,0.027443,36.439024,0.944338,0.000000,0.001919,497.0,20.500000,21.708333,468.0,0.264515,0.311711,5,3,3,5
411,C2020,40.0,7.0,1.0,2.0,25.0,40.0,0.5,0.0,0.0,0.125,0.000000,0.000000,0.000000,0.375000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.625000,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028,0.0,1.0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0,0,0,3,0.0,0.0,0.0,0.0,1.0,0,0,0,0,3,87000.0,135976.666667,185600.0,407930.0,18000000.0,2.813333e+07,38400000.0,84400000.0,960.0,0.026042,0.041667,24.000000,1.600000,0.040000,0.080000,18.0,5.714286,3.571429,33.0,0.263784,0.301197,1,1,8,4
412,C2437,90.0,12.0,0.0,1.0,30.0,90.0,0.0,0.0,0.0,1.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333333,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,1,0,107530.0,107530.000000,107530.0,107530.0,10346000.0,1.034600e+07,10346000.0,10346000.0,2160.0,0.013889,0.041667,24.000000,3.000000,0.000000,0.033333,18.0,7.500000,2.500000,78.0,0.267580,0.293533,1,1,1,2
413,C2532,239.0,7.0,0.0,1.0,166.0,239.0,0.0,0.0,0.0,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.079498,0.079498,0.761506,0.079498,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.694561,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,6,0,0,0,1.0,0.0,0.0,0.0,0.0,6,0,0,0,0,116090.0,135023.333333,151030.0,810140.0,11346000.0,1.325617e+07,14830000.0,79537000.0,12075.0,0.013747,0.019793,50.523013,1.439759,0.000000,0.006024,159.0,34.142857,23.714286,232.0,0.264515,0.311711,1,2,2,4


In [461]:
df_train_edited

Unnamed: 0,단지코드,등록차량수,총세대수,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C2483,1015.0,900.0,38.0,0.0,3.0,1425.0,900.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.165556,0.0,0.000000,0.000000,0.738889,0.000000,0.095556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.583333,0.030158,0.033195,0.056346,0.061360,0.060096,0.067859,0.053433,0.049572,0.083660,0.072613,0.087149,0.072146,0.082684,0.063889,0.047717,0.030172,0.029361,0.011211,0.005578,0.001553,0.000234,0.000014,0.0,1.0,0,0,8,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,0.0,0.0,0.0,8,0,0,0,0,103680.0,167910.000000,214270.0,1343280.0,15667000.0,2.477638e+07,30357000.0,198211000.0,44628.0,0.031931,0.020167,49.586667,0.631579,0.000000,0.002105,1387.0,23.684211,37.500000,862.0,0.258220,0.306926,10,9,7,6
1,C2515,205.0,545.0,17.0,0.0,3.0,624.0,545.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.506422,0.000000,0.146789,0.0,0.104587,0.000000,0.242202,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.144954,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,7,0,0,0,1.0,0.0,0.0,0.0,0.0,7,0,0,0,0,82940.0,139557.142857,190090.0,976900.0,9216000.0,1.678714e+07,23042000.0,117510000.0,21525.0,0.028990,0.025319,39.495413,0.873397,0.000000,0.004808,607.0,32.058824,36.705882,528.0,0.267281,0.307580,7,4,4,4
2,C1407,1064.0,1216.0,13.0,1.0,1.0,1285.0,1216.0,0.000000,0.0,0.0,0.000000,0.000000,0.320724,0.000000,0.000000,0.297697,0.0,0.279605,0.000000,0.101974,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.056743,0.028197,0.029092,0.040490,0.042793,0.060834,0.064247,0.068654,0.066848,0.074667,0.067925,0.085751,0.068819,0.088468,0.070261,0.051010,0.037143,0.032455,0.013751,0.006494,0.001740,0.000298,0.000066,0.0,1.0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,11,0,0,0,1.0,0.0,0.0,0.0,0.0,11,0,0,0,0,127350.0,176730.000000,253520.0,1944030.0,15620000.0,2.851055e+07,42056000.0,313616000.0,47442.0,0.027086,0.025631,39.014803,0.946304,0.000778,0.000778,1272.0,93.538462,98.846154,1203.0,0.273852,0.317539,10,9,8,6
3,C1945,730.0,755.0,6.0,1.0,3.0,734.0,755.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.317881,0.0,0.000000,0.000000,0.401325,0.000000,0.280795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.972185,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,6,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,6,0,0,130070.0,189106.666667,244020.0,1134640.0,15607000.0,2.265167e+07,29209000.0,135910000.0,36897.0,0.019893,0.020462,48.870199,1.028610,0.001362,0.004087,728.0,125.833333,122.333333,749.0,0.270337,0.307120,8,8,7,7
4,C1470,553.0,696.0,14.0,0.0,2.0,645.0,696.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.364943,0.000000,0.0,0.281609,0.000000,0.353448,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.926724,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,102130.0,120150.000000,144180.0,480600.0,12015000.0,1.772275e+07,25233000.0,70891000.0,30510.0,0.021141,0.022812,43.836207,1.079070,0.000000,0.003101,631.0,49.714286,46.071429,682.0,0.269341,0.287767,7,7,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,C2586,57.0,90.0,7.0,0.0,3.0,66.0,90.0,0.133333,0.0,0.0,0.466667,0.000000,0.000000,0.000000,0.400000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.733333,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013,0.0,1.0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0,0,0,5,0.0,0.0,0.0,0.0,1.0,0,0,0,0,5,71400.0,111180.000000,153000.0,555900.0,16800000.0,2.616000e+07,36000000.0,130800000.0,2484.0,0.026570,0.036232,27.600000,1.363636,0.000000,0.045455,59.0,12.857143,9.428571,83.0,0.255665,0.287076,1,1,8,2
410,C2035,246.0,492.0,24.0,0.0,1.0,521.0,492.0,0.000000,0.0,0.0,0.000000,0.317073,0.000000,0.000000,0.317073,0.000000,0.0,0.365854,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.058943,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,1.0,0.0,0.0,0.0,0.0,5,0,0,0,0,117000.0,158800.000000,200000.0,794000.0,10000000.0,1.573000e+07,27650000.0,78650000.0,17928.0,0.029061,0.027443,36.439024,0.944338,0.000000,0.001919,497.0,20.500000,21.708333,468.0,0.264515,0.311711,5,3,3,5
411,C2020,19.0,40.0,7.0,1.0,2.0,25.0,40.0,0.500000,0.0,0.0,0.125000,0.000000,0.000000,0.000000,0.375000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.625000,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028,0.0,1.0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0,0,0,3,0.0,0.0,0.0,0.0,1.0,0,0,0,0,3,87000.0,135976.666667,185600.0,407930.0,18000000.0,2.813333e+07,38400000.0,84400000.0,960.0,0.026042,0.041667,24.000000,1.600000,0.040000,0.080000,18.0,5.714286,3.571429,33.0,0.263784,0.301197,1,1,8,4
412,C2437,16.0,90.0,12.0,0.0,1.0,30.0,90.0,0.000000,0.0,0.0,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333333,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,1,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,1,0,107530.0,107530.000000,107530.0,107530.0,10346000.0,1.034600e+07,10346000.0,10346000.0,2160.0,0.013889,0.041667,24.000000,3.000000,0.000000,0.033333,18.0,7.500000,2.500000,78.0,0.267580,0.293533,1,1,1,2


In [462]:
df_test_edited

Unnamed: 0,단지코드,등록차량수,총세대수,공가수,지하철,버스,단지내주차면수,총임대가구수,면적_15,면적_18,면적_21,면적_24,면적_27,면적_30,면적_33,면적_36,면적_39,면적_42,면적_45,면적_48,면적_51,면적_54,면적_57,면적_60,면적_63,면적_66,면적_72,면적_75,면적_78,면적_81,면적_84,면적_100,임대비율,가구당주차면수,10대미만여자,10대미만남자,10대여자,10대남자,20대여자,20대남자,30대여자,30대남자,40대여자,40대남자,50대여자,50대남자,60대여자,60대남자,70대여자,70대남자,80대여자,80대남자,90대여자,90대남자,100대여자,100대남자,상가,아파트,아파트_상가,아파트합,상가합,강원도,경기도,경상남도,경상북도,광주광역시,대구광역시,대전광역시,부산광역시,세종특별자치시,울산광역시,전라남도,전라북도,제주특별자치도,충청남도,충청북도,공공임대50년_x,공공임대5년10년분납분양_x,국민임대장기전세_x,영구임대_x,임대상가_x,행복주택_x,공공임대50년_y,공공임대5년10년분납분양_y,국민임대장기전세_y,영구임대_y,임대상가_y,행복주택_y,A_x,D_x,국민임대장기전세_공급대상_x,영구임대_공급대상_x,행복주택_공급대상_x,A_y,D_y,국민임대장기전세_공급대상_y,영구임대_공급대상_y,행복주택_공급대상_y,임대료 임대료_최소,임대료 임대료_평균,임대료 임대료_최대,임대료 임대료_총합,임대보증금 임대보증금_최소,임대보증금 임대보증금_평균,임대보증금 임대보증금_최대,임대보증금 임대보증금_총합,총전용면적,면적당 주차면수,면적당 세대수,면적당 세대수_2,세대수_주차면수,지하철_주차면수,버스_주차면수,공가수_주차면수,공가수_총세대수,주차면수_공가수,공가수_세대수,3060대남자비율,3060대여자비율,공가수_주차면수10분위,총전용면적 10분위,임대보증금 10분위,임대료 10분위
0,C1072,0.0,754.0,14.0,0.0,2.0,683.0,754.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.153846,0.0,0.347480,0.0,0.498674,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.905836,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,1.0,0.0,1.0,0.0,0.0,1,0,7,0,0,189840.0,265843.750000,296780.0,2126750.0,22830000.0,3.812025e+07,43497000.0,304962000.0,35490.0,0.019245,0.021245,47.068966,1.103953,0.0,0.002928,669.0,53.857143,48.785714,740.0,0.270337,0.307120,7,8,9,9
1,C1128,0.0,1354.0,9.0,0.0,3.0,1216.0,1354.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.310192,0.0,0.262925,0.0,0.426883,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.898080,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,9,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,9,0,0,0,1.0,0.0,1.0,0.0,0.0,4,0,5,0,0,189840.0,245516.666667,296780.0,2209650.0,22830000.0,3.412500e+07,43497000.0,307125000.0,61878.0,0.019652,0.021882,45.700148,1.113487,0.0,0.002467,1207.0,150.444444,135.111111,1345.0,0.270337,0.307120,10,10,9,9
2,C1456,0.0,619.0,18.0,0.0,16.0,547.0,619.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.132472,0.000000,0.266559,0.0,0.316640,0.0,0.213247,0.0,0.071082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.883683,0.022003,0.022947,0.032681,0.035512,0.053796,0.057233,0.047049,0.048866,0.061952,0.060769,0.082899,0.068855,0.109297,0.085294,0.078743,0.053388,0.047908,0.020228,0.008043,0.002240,0.000268,0.000028,0.0,1.0,0,0,9,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,9,0,0,0,1.0,0.0,0.0,0.0,0.0,9,0,0,0,0,156200.0,223624.444444,312420.0,2012620.0,19706000.0,3.396522e+07,55275000.0,305687000.0,27201.0,0.020110,0.022757,43.943457,1.131627,0.0,0.029250,529.0,34.388889,30.388889,601.0,0.263784,0.301197,6,6,9,8
3,C1840,0.0,593.0,7.0,0.0,3.0,543.0,593.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.564924,0.0,0.293423,0.0,0.141653,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.915683,0.031930,0.034647,0.059154,0.063379,0.065517,0.070339,0.057977,0.053471,0.078358,0.075724,0.079966,0.077795,0.071465,0.062352,0.042535,0.030704,0.027340,0.009312,0.005965,0.001458,0.000149,0.000464,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,1.0,0.0,0.0,0.0,0.0,4,0,0,0,0,108130.0,123150.000000,144180.0,492600.0,14418000.0,1.892425e+07,25233000.0,75697000.0,25179.0,0.021566,0.023551,42.460371,1.092081,0.0,0.005525,536.0,84.714286,77.571429,586.0,0.269341,0.287767,6,5,5,3
4,C1332,0.0,1297.0,11.0,0.0,2.0,1112.0,1297.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.327679,0.0,0.437934,0.0,0.234387,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.857363,0.038030,0.039507,0.052546,0.053990,0.058484,0.059894,0.072331,0.068704,0.083208,0.078355,0.077345,0.064859,0.074237,0.058419,0.042422,0.032725,0.025136,0.012354,0.005390,0.001707,0.000290,0.000067,0.0,1.0,0,0,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,8,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,8,0,0,203050.0,292880.000000,362870.0,2343040.0,28598000.0,4.133450e+07,51188000.0,330676000.0,57639.0,0.019292,0.022502,44.440247,1.166367,0.0,0.001799,1101.0,117.909091,101.090909,1286.0,0.270337,0.307120,10,10,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,C2456,0.0,349.0,17.0,0.0,4.0,270.0,346.0,0.000000,0.0,0.000000,0.320917,0.000000,0.0,0.000000,0.395415,0.000000,0.0,0.275072,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991404,0.773639,0.028089,0.029065,0.059685,0.060080,0.066262,0.070322,0.052027,0.046596,0.077005,0.066645,0.081408,0.074382,0.076636,0.068042,0.051025,0.035748,0.035049,0.012641,0.007223,0.001898,0.000158,0.000013,0.0,1.0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0,0,4,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,4,0,0,117000.0,155082.500000,217100.0,620330.0,6992000.0,1.222875e+07,21246000.0,48915000.0,11976.0,0.022545,0.029142,34.315186,1.292593,0.0,0.014815,253.0,20.529412,15.882353,332.0,0.255665,0.287076,2,2,1,5
146,C1266,0.0,596.0,35.0,0.0,1.0,593.0,591.0,0.000000,0.0,0.000000,0.302013,0.000000,0.0,0.000000,0.510067,0.000000,0.0,0.179530,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991611,0.994966,0.031369,0.031711,0.059077,0.062422,0.067975,0.072622,0.065095,0.067303,0.078860,0.073418,0.079299,0.069167,0.070278,0.057692,0.042296,0.028682,0.024514,0.010900,0.005429,0.001549,0.000219,0.000123,0.0,1.0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,1.0,0.0,0.0,0.0,0,0,5,0,0,0,0.0,0.0,1.0,0.0,0.0,0,0,5,0,0,149910.0,199878.000000,274570.0,999390.0,8084000.0,1.435520e+07,24190000.0,71776000.0,20079.0,0.029533,0.029683,33.689597,1.005059,0.0,0.001686,558.0,17.028571,16.942857,561.0,0.267580,0.293533,6,4,3,7
147,C2152,0.0,120.0,9.0,0.0,1.0,40.0,120.0,0.000000,0.0,0.000000,0.550000,0.000000,0.0,0.450000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.333333,0.028888,0.030682,0.051287,0.052712,0.060374,0.063157,0.059821,0.054321,0.076201,0.068002,0.087266,0.072177,0.088423,0.070014,0.047732,0.032760,0.033515,0.013027,0.007628,0.001677,0.000319,0.000017,0.0,1.0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0,2,0,0,0.0,0.0,0.0,1.0,0.0,0,0,0,2,0,42350.0,50040.000000,57730.0,100080.0,2129000.0,2.515500e+06,2902000.0,5031000.0,3366.0,0.011884,0.035651,28.050000,3.000000,0.0,0.025000,31.0,13.333333,4.444444,111.0,0.264515,0.311711,1,1,1,1
148,C1267,0.0,675.0,38.0,0.0,1.0,467.0,670.0,0.074074,0.0,0.162963,0.302222,0.000000,0.0,0.000000,0.186667,0.000000,0.0,0.266667,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.992593,0.691852,0.027400,0.026902,0.053257,0.055568,0.064920,0.070618,0.056414,0.057550,0.077092,0.067600,0.086873,0.072570,0.087201,0.069562,0.048357,0.033277,0.027361,0.011295,0.004910,0.001086,0.000179,0.000010,0.0,1.0,0,0,11,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,1.0,0.0,1.0,0,0,5,2,0,4,0.0,0.0,1.0,1.0,1.0,0,0,5,2,4,52896.0,114454.454545,219230.0,1258999.0,6882000.0,1.481329e+07,25247619.0,162946164.0,20592.0,0.022679,0.032780,30.506667,1.445396,0.0,0.002141,429.0,17.763158,12.289474,637.0,0.267281,0.307580,5,4,3,2


In [463]:
df_test_edited=df_test_edited.set_index('단지코드')

In [466]:
X_test = df_test_edited[feature_names]
X_importance = X_test

# Explain model predictions using shap library:
model = LGBMRegressor(random_state=0).fit(X, y)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_importance)

# Plot summary_plot as barplot:
#shap.summary_plot(shap_values, X_importance, plot_type='bar')

shap_sum = np.abs(shap_values).mean(axis=0)
importance_df = pd.DataFrame([X_importance.columns.tolist(), shap_sum.tolist()]).T
importance_df.columns = ['column_name', 'shap_importance']
importance_df = importance_df.sort_values('shap_importance', ascending=False)
importance_df

Unnamed: 0,column_name,shap_importance
111,공가수_주차면수,146.89802
4,단지내주차면수,72.594297
104,총전용면적,52.302925
110,버스_주차면수,11.779166
100,임대보증금 임대보증금_최소,11.689082
...,...,...
63,광주광역시,0.0
62,경상북도,0.0
61,경상남도,0.0
59,강원도,0.0


In [467]:
# 과적합을 피하기 위해 최적의 p값 주변의 값을 선택하는게 더 나은 결과를 얻을 수 있다. 
features_selected = importance_df.query('shap_importance > 0').column_name
X= X[features_selected]
X_test = X_test[features_selected]
print(X.shape)

(414, 62)


In [465]:
df_train_edited=df_train_edited.set_index('단지코드')
X = df_train_edited.copy()
#X = X.loc[:,corr.index[0:20]]
X.drop(['등록차량수',  ],axis=1, inplace=True)
feature_names = X.columns.to_list()
#scaler = StandardScaler()
#X = scaler.fit(X).transform(X)

y = df_train_edited.iloc[:,0]
#X=X.values
# 피쳐 선택은 나중에 우선 상관도 높은 20개 사용 

In [468]:
models = {'RF' : RandomForestRegressor(), 'LR': LinearRegression() , 'RD' : Ridge(), 'LS' : Lasso(), 'ET' : ElasticNet(),
          'XGB' : XGBRegressor(), 
          'LGB' : LGB.LGBMRegressor(), 
          'CB' : CatBoostRegressor(logging_level='Silent'), 
          'PLS' : PLSRegression()}

#CatBoostRegressor은 시끄러운 녀석이라 닥쳐
kfold = KFold(n_splits=5, shuffle = True, random_state=0)
# n_split : 몇개로 분할할지
# shuffle : Fold를 나누기 전에 무작위로 섞을지
# random_state : 나눈 Fold를 그대로 사용할지
answer = []
for model in models.keys():
    print(model)
    scores = cross_val_score(models[model] , X, y, cv=kfold, scoring='neg_mean_absolute_error')
    answer.append(scores)

corss_val_result = pd.DataFrame(answer)
corss_val_result.index = models.keys()
corss_val_result['mean'] = corss_val_result.mean(axis=1)




RF
LR
RD
LS



Ill-conditioned matrix (rcond=9.03309e-20): result may not be accurate.


Ill-conditioned matrix (rcond=8.80205e-20): result may not be accurate.


Ill-conditioned matrix (rcond=8.85884e-20): result may not be accurate.


Ill-conditioned matrix (rcond=1.04148e-19): result may not be accurate.


Ill-conditioned matrix (rcond=8.78208e-20): result may not be accurate.


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4549468.419947619, tolerance: 5272.56140060423


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4352158.104311803, tolerance: 5182.873963141995


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4570576.759775229, tolerance: 4946.319734743203


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4197670.528062156, tolerance: 4700.724982477342


Objective did not converge. You might want to increase

ET



Objective did not converge. You might want to increase the number of iterations. Duality gap: 4324793.236351046, tolerance: 4700.724982477342


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4394773.505150417, tolerance: 4725.915538554217



XGB
LGB
CB
PLS


In [469]:
corss_val_result

Unnamed: 0,0,1,2,3,4,mean
RF,-111.900482,-132.425301,-116.892892,-112.168916,-135.20378,-121.718274
LR,-185.131641,-121.645663,-133.708009,-132.329614,-146.00465,-143.763915
RD,-135.167601,-130.440642,-127.503686,-120.506044,-131.048845,-128.933363
LS,-131.686814,-131.295676,-125.255045,-123.198522,-129.997535,-128.286718
ET,-124.255267,-131.111553,-120.351122,-124.318075,-133.033055,-126.613814
XGB,-122.468311,-146.517721,-124.897067,-135.070733,-160.067574,-137.804281
LGB,-121.774266,-136.894049,-123.416946,-130.480395,-151.60282,-132.833695
CB,-106.027371,-126.524142,-113.125613,-111.569346,-134.23445,-118.296184
PLS,-119.843947,-153.445134,-148.282018,-146.832783,-136.878731,-141.056522


In [166]:
df_sub

Unnamed: 0,code,num
0,C1072,0
1,C1128,0
2,C1456,0
3,C1840,0
4,C1332,0
...,...,...
145,C2456,0
146,C1266,0
147,C2152,0
148,C1267,0


In [164]:
X_test

Unnamed: 0_level_0,공가수_주차면수,단지내주차면수,총전용면적,총세대수,임대보증금 임대보증금_최소,버스_주차면수,임대료 임대료_최대,임대료 임대료_최소,공가수_총세대수,주차면수_공가수,임대보증금 임대보증금_평균,100대남자,공가수,면적당 세대수,상가합,공가수_세대수,임대보증금 임대보증금_총합,면적_45,70대남자,총임대가구수,30대여자,A_y,임대보증금 임대보증금_최대,버스,가구당주차면수,80대여자,면적당 주차면수,면적_39,임대료 임대료_총합,임대료 임대료_평균,면적_27,면적_36,국민임대장기전세_y,40대여자,50대남자,20대여자,면적_57,공공임대5년10년분납분양_y,세대수_주차면수,면적_51,50대여자,40대남자,80대남자,임대비율,면적당 세대수_2,3060대여자비율,90대남자,20대남자,30대남자,10대여자,10대미만남자,90대여자,10대미만여자,100대여자,임대보증금 10분위,국민임대장기전세_공급대상_y,총전용면적 10분위,면적_24,70대여자,3060대남자비율,면적_21,60대남자,60대여자
단지코드,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1
C1128,1207.0,1216.0,61878.0,1354.0,22830000.0,0.002467,296780.0,189840.0,150.444444,135.111111,3.412500e+07,0.000067,9.0,0.021882,9,1345.0,307125000.0,0.262925,0.032725,1354.0,0.072331,4,43497000.0,3.0,0.898080,0.025136,0.019652,0.310192,2209650.0,245516.666667,0.000000,0.000000,9,0.083208,0.064859,0.058484,0.000000,0,1.113487,0.426883,0.077345,0.078355,0.012354,1.000000,45.700148,0.307120,0.001707,0.059894,0.068704,0.052546,0.039507,0.005390,0.038030,0.000290,9,5,10,0.000000,0.042422,0.270337,0.000000,0.058419,0.074237
C1456,529.0,547.0,27201.0,619.0,19706000.0,0.029250,312420.0,156200.0,34.388889,30.388889,3.396522e+07,0.000028,18.0,0.022757,9,601.0,305687000.0,0.316640,0.053388,619.0,0.047049,9,55275000.0,16.0,0.883683,0.047908,0.020110,0.266559,2012620.0,223624.444444,0.000000,0.000000,9,0.061952,0.068855,0.053796,0.071082,0,1.131627,0.213247,0.082899,0.060769,0.020228,1.000000,43.943457,0.301197,0.002240,0.057233,0.048866,0.032681,0.022947,0.008043,0.022003,0.000268,9,0,6,0.000000,0.078743,0.263784,0.000000,0.085294,0.109297
C1840,536.0,543.0,25179.0,593.0,14418000.0,0.005525,144180.0,108130.0,84.714286,77.571429,1.892425e+07,0.000464,7.0,0.023551,4,586.0,75697000.0,0.293423,0.030704,593.0,0.057977,4,25233000.0,3.0,0.915683,0.027340,0.021566,0.564924,492600.0,123150.000000,0.000000,0.000000,4,0.078358,0.077795,0.065517,0.000000,0,1.092081,0.141653,0.079966,0.075724,0.009312,1.000000,42.460371,0.287767,0.001458,0.070339,0.053471,0.059154,0.034647,0.005965,0.031930,0.000149,5,0,5,0.000000,0.042535,0.269341,0.000000,0.062352,0.071465
C1332,1101.0,1112.0,57639.0,1297.0,28598000.0,0.001799,362870.0,203050.0,117.909091,101.090909,4.133450e+07,0.000067,11.0,0.022502,8,1286.0,330676000.0,0.437934,0.032725,1297.0,0.072331,0,51188000.0,2.0,0.857363,0.025136,0.019292,0.327679,2343040.0,292880.000000,0.000000,0.000000,8,0.083208,0.064859,0.058484,0.000000,0,1.166367,0.234387,0.077345,0.078355,0.012354,1.000000,44.440247,0.307120,0.001707,0.059894,0.068704,0.052546,0.039507,0.005390,0.038030,0.000290,10,8,10,0.000000,0.042422,0.270337,0.000000,0.058419,0.074237
C1563,1681.0,1696.0,88110.0,1974.0,30040000.0,0.003538,360480.0,206380.0,131.600000,113.066667,4.269218e+07,0.000067,15.0,0.022404,11,1959.0,469614000.0,0.332320,0.032725,1974.0,0.072331,9,53110000.0,6.0,0.859169,0.025136,0.019249,0.364235,3225360.0,293214.545455,0.000000,0.000000,11,0.083208,0.064859,0.058484,0.000000,0,1.163915,0.303445,0.077345,0.078355,0.012354,1.000000,44.635258,0.307120,0.001707,0.059894,0.068704,0.052546,0.039507,0.005390,0.038030,0.000290,10,2,10,0.000000,0.042422,0.270337,0.000000,0.058419,0.074237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C2456,253.0,270.0,11976.0,349.0,6992000.0,0.014815,217100.0,117000.0,20.529412,15.882353,1.222875e+07,0.000013,17.0,0.029142,4,332.0,48915000.0,0.275072,0.035748,346.0,0.052027,0,21246000.0,4.0,0.773639,0.035049,0.022545,0.000000,620330.0,155082.500000,0.000000,0.395415,4,0.077005,0.074382,0.066262,0.000000,0,1.292593,0.000000,0.081408,0.066645,0.012641,0.991404,34.315186,0.287076,0.001898,0.070322,0.046596,0.059685,0.029065,0.007223,0.028089,0.000158,1,4,2,0.320917,0.051025,0.255665,0.000000,0.068042,0.076636
C1266,558.0,593.0,20079.0,596.0,8084000.0,0.001686,274570.0,149910.0,17.028571,16.942857,1.435520e+07,0.000123,35.0,0.029683,5,561.0,71776000.0,0.179530,0.028682,591.0,0.065095,0,24190000.0,1.0,0.994966,0.024514,0.029533,0.000000,999390.0,199878.000000,0.000000,0.510067,5,0.078860,0.069167,0.067975,0.000000,0,1.005059,0.000000,0.079299,0.073418,0.010900,0.991611,33.689597,0.293533,0.001549,0.072622,0.067303,0.059077,0.031711,0.005429,0.031369,0.000219,3,5,4,0.302013,0.042296,0.267580,0.000000,0.057692,0.070278
C2152,31.0,40.0,3366.0,120.0,2129000.0,0.025000,57730.0,42350.0,13.333333,4.444444,2.515500e+06,0.000017,9.0,0.035651,2,111.0,5031000.0,0.000000,0.032760,120.0,0.059821,0,2902000.0,1.0,0.333333,0.033515,0.011884,0.000000,100080.0,50040.000000,0.000000,0.000000,0,0.076201,0.072177,0.060374,0.000000,0,3.000000,0.000000,0.087266,0.068002,0.013027,1.000000,28.050000,0.311711,0.001677,0.063157,0.054321,0.051287,0.030682,0.007628,0.028888,0.000319,1,0,1,0.550000,0.047732,0.264515,0.000000,0.070014,0.088423
C1267,429.0,467.0,20592.0,675.0,6882000.0,0.002141,219230.0,52896.0,17.763158,12.289474,1.481329e+07,0.000010,38.0,0.032780,11,637.0,162946164.0,0.266667,0.033277,670.0,0.056414,0,25247619.0,1.0,0.691852,0.027361,0.022679,0.000000,1258999.0,114454.454545,0.000000,0.186667,5,0.077092,0.072570,0.064920,0.000000,0,1.445396,0.000000,0.086873,0.067600,0.011295,0.992593,30.506667,0.307580,0.001086,0.070618,0.057550,0.053257,0.026902,0.004910,0.027400,0.000179,3,5,4,0.302222,0.048357,0.267281,0.162963,0.069562,0.087201


In [471]:
df_sub

Unnamed: 0,code,num
0,C1072,762.223142
1,C1128,1339.288960
2,C1456,511.635147
3,C1840,519.178528
4,C1332,1267.756269
...,...,...
145,C2456,248.172090
146,C1266,452.427358
147,C2152,0.256270
148,C1267,384.801333


In [470]:
model = CB = CatBoostRegressor(random_state=0)
model.fit(X, y)

#X_test = df_test_edited[feature_names]

pred = CB.predict(X_test)
#submission = df_sub['num']

df_sub['num'] = pred
df_sub.to_csv('moonjoo_0722_10.csv', index=False)



Learning rate set to 0.03336
0:	learn: 380.5049458	total: 23ms	remaining: 22.9s
1:	learn: 374.1638020	total: 25.5ms	remaining: 12.7s
2:	learn: 368.2923734	total: 27.9ms	remaining: 9.28s
3:	learn: 361.7946605	total: 30.5ms	remaining: 7.6s
4:	learn: 355.7166890	total: 33ms	remaining: 6.56s
5:	learn: 349.2223604	total: 35.6ms	remaining: 5.89s
6:	learn: 343.5915838	total: 37.9ms	remaining: 5.38s
7:	learn: 337.0180246	total: 40.5ms	remaining: 5.02s
8:	learn: 331.2154472	total: 43.3ms	remaining: 4.76s
9:	learn: 325.6437767	total: 45.7ms	remaining: 4.53s
10:	learn: 321.0837255	total: 48.5ms	remaining: 4.36s
11:	learn: 316.1298018	total: 51ms	remaining: 4.2s
12:	learn: 310.8121470	total: 53.2ms	remaining: 4.04s
13:	learn: 306.3270718	total: 55.3ms	remaining: 3.9s
14:	learn: 301.8454135	total: 57.5ms	remaining: 3.77s
15:	learn: 296.7120742	total: 59.7ms	remaining: 3.67s
16:	learn: 291.6036683	total: 61.8ms	remaining: 3.57s
17:	learn: 286.9388916	total: 64.4ms	remaining: 3.51s
18:	learn: 283.046

217:	learn: 101.7934016	total: 507ms	remaining: 1.82s
218:	learn: 101.6148136	total: 509ms	remaining: 1.82s
219:	learn: 101.3205294	total: 512ms	remaining: 1.81s
220:	learn: 100.8864619	total: 514ms	remaining: 1.81s
221:	learn: 100.6770560	total: 516ms	remaining: 1.81s
222:	learn: 100.3812554	total: 519ms	remaining: 1.81s
223:	learn: 100.2287745	total: 521ms	remaining: 1.8s
224:	learn: 99.9623154	total: 524ms	remaining: 1.8s
225:	learn: 99.7968467	total: 526ms	remaining: 1.8s
226:	learn: 99.5166404	total: 528ms	remaining: 1.8s
227:	learn: 99.2869352	total: 531ms	remaining: 1.8s
228:	learn: 99.0483527	total: 533ms	remaining: 1.79s
229:	learn: 98.7391164	total: 536ms	remaining: 1.79s
230:	learn: 98.6694853	total: 538ms	remaining: 1.79s
231:	learn: 98.2930633	total: 540ms	remaining: 1.79s
232:	learn: 98.1064233	total: 542ms	remaining: 1.78s
233:	learn: 98.0043013	total: 545ms	remaining: 1.78s
234:	learn: 97.8037568	total: 547ms	remaining: 1.78s
235:	learn: 97.6298870	total: 549ms	remainin

442:	learn: 59.2922104	total: 1.02s	remaining: 1.28s
443:	learn: 59.1189020	total: 1.02s	remaining: 1.28s
444:	learn: 59.0373742	total: 1.02s	remaining: 1.27s
445:	learn: 58.8710066	total: 1.02s	remaining: 1.27s
446:	learn: 58.5686857	total: 1.03s	remaining: 1.27s
447:	learn: 58.5401081	total: 1.03s	remaining: 1.27s
448:	learn: 58.3896242	total: 1.03s	remaining: 1.27s
449:	learn: 58.2466204	total: 1.03s	remaining: 1.26s
450:	learn: 58.1064293	total: 1.04s	remaining: 1.26s
451:	learn: 58.0844249	total: 1.04s	remaining: 1.26s
452:	learn: 57.9130277	total: 1.04s	remaining: 1.26s
453:	learn: 57.8922396	total: 1.04s	remaining: 1.25s
454:	learn: 57.7047117	total: 1.05s	remaining: 1.25s
455:	learn: 57.5599999	total: 1.05s	remaining: 1.25s
456:	learn: 57.4792987	total: 1.05s	remaining: 1.25s
457:	learn: 57.3366711	total: 1.05s	remaining: 1.25s
458:	learn: 57.2626529	total: 1.05s	remaining: 1.24s
459:	learn: 57.0929448	total: 1.06s	remaining: 1.24s
460:	learn: 56.9388811	total: 1.06s	remaining:

662:	learn: 36.5753811	total: 1.53s	remaining: 778ms
663:	learn: 36.4976322	total: 1.53s	remaining: 775ms
664:	learn: 36.4556754	total: 1.53s	remaining: 773ms
665:	learn: 36.4429582	total: 1.54s	remaining: 771ms
666:	learn: 36.3020389	total: 1.54s	remaining: 768ms
667:	learn: 36.1808415	total: 1.54s	remaining: 766ms
668:	learn: 36.0559219	total: 1.54s	remaining: 764ms
669:	learn: 36.0023790	total: 1.55s	remaining: 762ms
670:	learn: 35.9540043	total: 1.55s	remaining: 759ms
671:	learn: 35.8551423	total: 1.55s	remaining: 757ms
672:	learn: 35.8320868	total: 1.55s	remaining: 755ms
673:	learn: 35.7482799	total: 1.55s	remaining: 752ms
674:	learn: 35.6146740	total: 1.56s	remaining: 750ms
675:	learn: 35.5857910	total: 1.56s	remaining: 748ms
676:	learn: 35.4973499	total: 1.56s	remaining: 745ms
677:	learn: 35.3924864	total: 1.56s	remaining: 743ms
678:	learn: 35.3688509	total: 1.57s	remaining: 741ms
679:	learn: 35.3267783	total: 1.57s	remaining: 738ms
680:	learn: 35.3146793	total: 1.57s	remaining:

891:	learn: 23.4921212	total: 2.04s	remaining: 247ms
892:	learn: 23.4857740	total: 2.04s	remaining: 245ms
893:	learn: 23.4003171	total: 2.05s	remaining: 243ms
894:	learn: 23.3731616	total: 2.05s	remaining: 240ms
895:	learn: 23.3279962	total: 2.05s	remaining: 238ms
896:	learn: 23.3004330	total: 2.05s	remaining: 236ms
897:	learn: 23.2139378	total: 2.06s	remaining: 234ms
898:	learn: 23.1977565	total: 2.06s	remaining: 231ms
899:	learn: 23.1536269	total: 2.06s	remaining: 229ms
900:	learn: 23.1222180	total: 2.06s	remaining: 227ms
901:	learn: 23.0905289	total: 2.08s	remaining: 226ms
902:	learn: 23.0370826	total: 2.08s	remaining: 224ms
903:	learn: 22.9854897	total: 2.08s	remaining: 221ms
904:	learn: 22.9802185	total: 2.09s	remaining: 219ms
905:	learn: 22.9712969	total: 2.09s	remaining: 217ms
906:	learn: 22.9197517	total: 2.09s	remaining: 214ms
907:	learn: 22.8551853	total: 2.09s	remaining: 212ms
908:	learn: 22.7958198	total: 2.1s	remaining: 210ms
909:	learn: 22.7791910	total: 2.1s	remaining: 2

<h3> 상관도 확인

In [281]:
df_train_edited=df_train_edited.reset_index()

In [473]:
corr = df_train_edited.drop(['단지코드'],1,).corr()["등록차량수"].abs().sort_values(ascending=False)
corr

등록차량수           1.000000
공가수_주차면수        0.850513
단지내주차면수         0.846272
공가수_주차면수10분위    0.802941
총전용면적           0.794874
                  ...   
90대여자           0.005897
40대여자           0.004259
면적_63           0.002011
80대여자           0.001065
아파트                  NaN
Name: 등록차량수, Length: 122, dtype: float64

In [477]:
corr[:60]

등록차량수              1.000000
공가수_주차면수           0.850513
단지내주차면수            0.846272
공가수_주차면수10분위       0.802941
총전용면적              0.794874
총전용면적 10분위         0.735142
총세대수               0.591772
공가수_세대수            0.591369
총임대가구수             0.579381
가구당주차면수            0.503476
임대보증금 10분위         0.438906
공공임대5년10년분납분양_y    0.407375
임대보증금 임대보증금_평균     0.405811
공공임대5년10년분납분양_x    0.404852
상가합                0.402697
임대료 임대료_최소         0.402058
세대수_주차면수           0.382912
면적당 세대수_2          0.376403
임대보증금 임대보증금_최소     0.373301
A_y                0.360991
버스_주차면수            0.351646
임대료 임대료_평균         0.346319
임대보증금 임대보증금_총합     0.331438
임대료 10분위           0.323197
아파트_상가             0.317305
상가                 0.317305
임대상가_x             0.317305
면적_72              0.299609
D_x                0.273319
임대상가_y             0.263332
아파트합               0.263332
면적_100             0.261697
주차면수_공가수           0.257433
면적_24              0.256134
국민임대장기전세_y         0.254790
임대보증금 임대보증금_최대     0

In [78]:
corr = df_train_edited.drop(['단지코드'],1,).corr()["등록차량수"].abs().sort_values(ascending=False)
corr

등록차량수       1.000000
단지내주차면수     0.842500
주차공간분위수     0.833174
총전용면적의합     0.796403
총세대수        0.592093
              ...   
100대(남자)    0.006599
40대(여자)     0.004380
보증금임대료비율    0.002749
면적_63       0.002098
80대(여자)     0.001474
Name: 등록차량수, Length: 82, dtype: float64

In [79]:
corr[:30]

등록차량수                 1.000000
단지내주차면수               0.842500
주차공간분위수               0.833174
총전용면적의합               0.796403
총세대수                  0.592093
실제세대수                 0.591694
총임대가구수                0.579705
가구당주차면수               0.477503
공공임대(5년/10년/분납/분양)    0.407516
평균임대보증금               0.406089
아파트                   0.402050
A                     0.361548
임대료평균                 0.345247
총임대보증금                0.331808
면적_72                 0.299753
공가수의비율                0.290711
상가                    0.262898
임대상가                  0.262898
면적_100                0.261291
국민임대/장기전세             0.255430
면적_24                 0.255357
면적_30                 0.250530
D                     0.248464
면적_57                 0.232326
국민임대/장기전세_공급대상        0.208869
기성세대남자비율              0.203962
20대(여자)               0.200972
면적_66                 0.194661
40대(남자)               0.188929
면적_84                 0.188834
Name: 등록차량수, dtype: float64

In [528]:
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import MinMaxScaler
X = df_train_edited.copy()
X = X.loc[:,corr.index[0:50]]
X.drop(['등록차량수',  ],axis=1, inplace=True)
feature_names = X.columns.to_list()
#scaler = MinMaxScaler()
#X = scaler.fit(X).transform(X)
#scaler = PowerTransformer()
#X = scaler.fit(X).transform(X)
X = pd.DataFrame(X)
y = df_train_edited.iloc[:,1]


# 피쳐 선택은 나중에 우선 상관도 높은 20개 사용 

<h2> 바닐라 모델 교차 검증

In [529]:
models = {'RF' : RandomForestRegressor(), 'LR': LinearRegression() , 'RD' : Ridge(), 'LS' : Lasso(), 'ET' : ElasticNet(),
          'XGB' : XGBRegressor(), 
          'LGB' : LGB.LGBMRegressor(), 
          'CB' : CatBoostRegressor(logging_level='Silent' ), 
          'PLS' : PLSRegression()
}

#CatBoostRegressor은 시끄러운 녀석이라 닥쳐
kfold = KFold(n_splits=5, shuffle = True, random_state=0)
# n_split : 몇개로 분할할지
# shuffle : Fold를 나누기 전에 무작위로 섞을지
# random_state : 나눈 Fold를 그대로 사용할지
answer = []
for model in models.keys():
    print(model)
    scores = cross_val_score(models[model] , X, y, cv=kfold, scoring='neg_mean_absolute_error')
    answer.append(scores)

corss_val_result = pd.DataFrame(answer)
corss_val_result.index = models.keys()
corss_val_result['mean'] = corss_val_result.mean(axis=1)




RF
LR
RD
LS
ET



Ill-conditioned matrix (rcond=1.21919e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.35426e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.29643e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.56302e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.28265e-19): result may not be accurate.


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4735847.417885592, tolerance: 5235.640740662651


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4049094.338558245, tolerance: 5029.392087650604


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4983982.46740484, tolerance: 5132.9296385542175


Objective did not converge. You might want to increase the number of iterations. Duality gap: 4125652.5461214636, tolerance: 4488.534227409638


Objective did not converge. You might want to increa

XGB
LGB
CB
PLS


In [530]:
#104
corss_val_result

Unnamed: 0,0,1,2,3,4,mean
RF,-123.074096,-141.232048,-105.686988,-127.800964,-129.278916,-125.414602
LR,-127.418885,-154.11893,-113.328363,-147.279125,-156.369734,-139.703008
RD,-120.442844,-154.47787,-104.835406,-141.083833,-144.662977,-133.100586
LS,-109.946381,-158.592985,-106.167123,-138.175946,-142.468849,-131.070257
ET,-106.405934,-154.374749,-102.314363,-133.156376,-139.101053,-127.070495
XGB,-140.182943,-153.211744,-106.387818,-142.377035,-159.982519,-140.428412
LGB,-127.404793,-165.344922,-123.859131,-133.003385,-147.118336,-139.346114
CB,-114.100799,-141.33359,-100.812967,-126.591382,-134.861758,-123.540099
PLS,-118.044964,-153.190844,-132.737757,-158.297347,-130.291174,-138.512417


In [481]:
#104
corss_val_result

Unnamed: 0,0,1,2,3,4,mean
RF,-108.350241,-136.509398,-122.244699,-114.138313,-134.77878,-123.204286
LR,-146.195444,-134.515056,-123.44053,-131.840902,-161.837029,-139.565792
RD,-127.14404,-136.890727,-120.122591,-130.78345,-146.419844,-132.27213
LS,-114.482663,-139.53001,-119.752602,-129.37477,-142.862397,-129.200489
ET,-114.450639,-137.711806,-116.120395,-124.14175,-140.519353,-126.588789
XGB,-129.589672,-138.098736,-131.488322,-131.644217,-154.711761,-137.106542
LGB,-120.030601,-147.148294,-134.208103,-131.382679,-151.655126,-136.884961
CB,-107.791525,-135.72236,-112.260102,-116.192745,-147.408107,-123.874968
PLS,-110.06281,-150.241753,-144.28016,-144.050659,-141.078584,-137.942793


In [308]:
df_test_edited=df_test_edited.rename(columns = lambda x:re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]+', '', str(x)))

In [318]:
X_test.drop('단지내주차면수',axis=1,inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [319]:
X_test

Unnamed: 0,공가수_주차면수,총전용면적,총세대수,총임대가구수,가구당주차면수,공공임대5년10년분납분양_y,임대보증금 임대보증금_평균,공공임대5년10년분납분양_x,상가합,임대료 임대료_최소,세대수_주차면수,임대보증금 임대보증금_최소,A_y,버스_주차면수,임대료 임대료_평균,임대보증금 임대보증금_총합,아파트_상가,임대상가_x,상가,면적_72,D_x,임대상가_y,아파트합,면적_100,주차면수_공가수,국민임대장기전세_y,면적_24,임대보증금 임대보증금_최대,면적_30,D_y,A_x,면적_57,영구임대_x,국민임대장기전세_공급대상_y,영구임대_공급대상_x,20대여자,면적_66,면적당 세대수,40대남자,면적_84,공가수,20대남자,10대남자,50대남자,10대여자,영구임대_공급대상_y,영구임대_y,면적_15
0,669.0,35490.0,754.0,754.0,0.905836,0,3.812025e+07,0.0,8,189840.0,1.103953,22830000.0,1,0.002928,265843.750000,304962000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,48.785714,8,0.000000,43497000.0,0.0,0,1.0,0.000000,0.0,7,0.0,0.058484,0.0,0.021245,0.078355,0.0,14.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000
1,1207.0,61878.0,1354.0,1354.0,0.898080,0,3.412500e+07,0.0,9,189840.0,1.113487,22830000.0,4,0.002467,245516.666667,307125000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,135.111111,9,0.000000,43497000.0,0.0,0,1.0,0.000000,0.0,5,0.0,0.058484,0.0,0.021882,0.078355,0.0,9.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000
2,529.0,27201.0,619.0,619.0,0.883683,0,3.396522e+07,0.0,9,156200.0,1.131627,19706000.0,9,0.029250,223624.444444,305687000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,30.388889,9,0.000000,55275000.0,0.0,0,1.0,0.071082,0.0,0,0.0,0.053796,0.0,0.022757,0.060769,0.0,18.0,0.057233,0.035512,0.068855,0.032681,0,0,0.000000
3,536.0,25179.0,593.0,593.0,0.915683,0,1.892425e+07,0.0,4,108130.0,1.092081,14418000.0,4,0.005525,123150.000000,75697000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,77.571429,4,0.000000,25233000.0,0.0,0,1.0,0.000000,0.0,0,0.0,0.065517,0.0,0.023551,0.075724,0.0,7.0,0.070339,0.063379,0.077795,0.059154,0,0,0.000000
4,1101.0,57639.0,1297.0,1297.0,0.857363,0,4.133450e+07,0.0,8,203050.0,1.166367,28598000.0,0,0.001799,292880.000000,330676000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,101.090909,8,0.000000,51188000.0,0.0,0,0.0,0.000000,0.0,8,0.0,0.058484,0.0,0.022502,0.078355,0.0,11.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,253.0,11976.0,349.0,346.0,0.773639,0,1.222875e+07,0.0,4,117000.0,1.292593,6992000.0,0,0.014815,155082.500000,48915000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,15.882353,4,0.320917,21246000.0,0.0,0,0.0,0.000000,0.0,4,0.0,0.066262,0.0,0.029142,0.066645,0.0,17.0,0.070322,0.060080,0.074382,0.059685,0,0,0.000000
146,558.0,20079.0,596.0,591.0,0.994966,0,1.435520e+07,0.0,5,149910.0,1.005059,8084000.0,0,0.001686,199878.000000,71776000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,16.942857,5,0.302013,24190000.0,0.0,0,0.0,0.000000,0.0,5,0.0,0.067975,0.0,0.029683,0.073418,0.0,35.0,0.072622,0.062422,0.069167,0.059077,0,0,0.000000
147,31.0,3366.0,120.0,120.0,0.333333,0,2.515500e+06,0.0,2,42350.0,3.000000,2129000.0,0,0.025000,50040.000000,5031000.0,0,0.0,0.0,0.0,0.0,0,0,0.0,4.444444,0,0.550000,2902000.0,0.0,0,0.0,0.000000,1.0,0,1.0,0.060374,0.0,0.035651,0.068002,0.0,9.0,0.063157,0.052712,0.072177,0.051287,2,2,0.000000
148,429.0,20592.0,675.0,670.0,0.691852,0,1.481329e+07,0.0,11,52896.0,1.445396,6882000.0,0,0.002141,114454.454545,162946164.0,0,0.0,0.0,0.0,0.0,0,0,0.0,12.289474,5,0.302222,25247619.0,0.0,0,0.0,0.000000,1.0,5,1.0,0.064920,0.0,0.032780,0.067600,0.0,38.0,0.070618,0.055568,0.072570,0.053257,2,2,0.074074


In [None]:
df_sub

In [531]:
model = CB = CatBoostRegressor(random_state=0)
model.fit(X, y)

X_test = df_test_edited[feature_names]

pred = CB.predict(X_test)
#submission = df_sub['num']

df_sub['num'] = pred
df_sub.to_csv('moonjoo_0723_7.csv', index=False)



Learning rate set to 0.033374
0:	learn: 379.2203678	total: 30.1ms	remaining: 30s
1:	learn: 372.8499221	total: 32.5ms	remaining: 16.2s
2:	learn: 365.7312020	total: 34.6ms	remaining: 11.5s
3:	learn: 359.0560861	total: 36.6ms	remaining: 9.11s
4:	learn: 352.5998450	total: 38.7ms	remaining: 7.7s
5:	learn: 346.0573780	total: 40.7ms	remaining: 6.75s
6:	learn: 340.1325312	total: 42.8ms	remaining: 6.07s
7:	learn: 333.7703537	total: 44.5ms	remaining: 5.52s
8:	learn: 328.1702707	total: 46.4ms	remaining: 5.11s
9:	learn: 321.8414883	total: 48.1ms	remaining: 4.76s
10:	learn: 316.3052092	total: 50ms	remaining: 4.5s
11:	learn: 310.7198724	total: 51.3ms	remaining: 4.22s
12:	learn: 305.1423962	total: 53ms	remaining: 4.02s
13:	learn: 300.1180577	total: 54.8ms	remaining: 3.86s
14:	learn: 295.3966027	total: 56.6ms	remaining: 3.72s
15:	learn: 290.8957034	total: 58.5ms	remaining: 3.6s
16:	learn: 286.4932225	total: 60.3ms	remaining: 3.49s
17:	learn: 282.3334080	total: 62.2ms	remaining: 3.39s
18:	learn: 278.45

160:	learn: 125.2158613	total: 333ms	remaining: 1.73s
161:	learn: 124.9055809	total: 335ms	remaining: 1.73s
162:	learn: 124.4387989	total: 337ms	remaining: 1.73s
163:	learn: 124.2754409	total: 339ms	remaining: 1.73s
164:	learn: 124.0786581	total: 341ms	remaining: 1.72s
165:	learn: 123.9352920	total: 343ms	remaining: 1.72s
166:	learn: 123.5094145	total: 345ms	remaining: 1.72s
167:	learn: 123.2417687	total: 347ms	remaining: 1.72s
168:	learn: 123.0183910	total: 348ms	remaining: 1.71s
169:	learn: 122.8369491	total: 350ms	remaining: 1.71s
170:	learn: 122.7188416	total: 352ms	remaining: 1.71s
171:	learn: 122.4468113	total: 354ms	remaining: 1.71s
172:	learn: 122.2417720	total: 356ms	remaining: 1.7s
173:	learn: 121.8899699	total: 358ms	remaining: 1.7s
174:	learn: 121.7310036	total: 360ms	remaining: 1.7s
175:	learn: 121.3263873	total: 362ms	remaining: 1.7s
176:	learn: 121.1011135	total: 364ms	remaining: 1.69s
177:	learn: 120.9176332	total: 366ms	remaining: 1.69s
178:	learn: 120.7425108	total: 3

334:	learn: 86.2523801	total: 673ms	remaining: 1.34s
335:	learn: 86.0150798	total: 676ms	remaining: 1.33s
336:	learn: 85.8821428	total: 678ms	remaining: 1.33s
337:	learn: 85.5819495	total: 680ms	remaining: 1.33s
338:	learn: 85.3962464	total: 682ms	remaining: 1.33s
339:	learn: 85.0964554	total: 684ms	remaining: 1.33s
340:	learn: 84.8220006	total: 686ms	remaining: 1.32s
341:	learn: 84.5224822	total: 688ms	remaining: 1.32s
342:	learn: 84.2405504	total: 690ms	remaining: 1.32s
343:	learn: 84.0440179	total: 692ms	remaining: 1.32s
344:	learn: 83.8128687	total: 694ms	remaining: 1.32s
345:	learn: 83.7162543	total: 696ms	remaining: 1.32s
346:	learn: 83.6254197	total: 698ms	remaining: 1.31s
347:	learn: 83.4720352	total: 701ms	remaining: 1.31s
348:	learn: 83.4295523	total: 703ms	remaining: 1.31s
349:	learn: 83.3111079	total: 705ms	remaining: 1.31s
350:	learn: 83.0910624	total: 707ms	remaining: 1.31s
351:	learn: 82.8796412	total: 732ms	remaining: 1.35s
352:	learn: 82.7936684	total: 734ms	remaining:

497:	learn: 61.8891840	total: 1.01s	remaining: 1.02s
498:	learn: 61.7441352	total: 1.01s	remaining: 1.02s
499:	learn: 61.5542346	total: 1.02s	remaining: 1.02s
500:	learn: 61.5402297	total: 1.02s	remaining: 1.01s
501:	learn: 61.5266790	total: 1.02s	remaining: 1.01s
502:	learn: 61.3256910	total: 1.02s	remaining: 1.01s
503:	learn: 61.2851102	total: 1.02s	remaining: 1.01s
504:	learn: 61.1836150	total: 1.02s	remaining: 1s
505:	learn: 61.0948186	total: 1.03s	remaining: 1s
506:	learn: 61.0798732	total: 1.03s	remaining: 1s
507:	learn: 61.0409484	total: 1.03s	remaining: 999ms
508:	learn: 60.8842644	total: 1.03s	remaining: 997ms
509:	learn: 60.7561194	total: 1.03s	remaining: 995ms
510:	learn: 60.6034024	total: 1.04s	remaining: 993ms
511:	learn: 60.5052541	total: 1.04s	remaining: 990ms
512:	learn: 60.3566733	total: 1.04s	remaining: 988ms
513:	learn: 60.2616183	total: 1.04s	remaining: 986ms
514:	learn: 60.1576790	total: 1.04s	remaining: 983ms
515:	learn: 60.0811649	total: 1.05s	remaining: 981ms
51

665:	learn: 45.8759187	total: 1.35s	remaining: 678ms
666:	learn: 45.7635481	total: 1.35s	remaining: 676ms
667:	learn: 45.7361590	total: 1.36s	remaining: 674ms
668:	learn: 45.6458193	total: 1.36s	remaining: 672ms
669:	learn: 45.6046517	total: 1.36s	remaining: 670ms
670:	learn: 45.4475323	total: 1.36s	remaining: 668ms
671:	learn: 45.3884373	total: 1.36s	remaining: 666ms
672:	learn: 45.2804440	total: 1.37s	remaining: 664ms
673:	learn: 45.2731282	total: 1.37s	remaining: 662ms
674:	learn: 45.2324194	total: 1.37s	remaining: 660ms
675:	learn: 45.1140480	total: 1.37s	remaining: 658ms
676:	learn: 45.0654669	total: 1.37s	remaining: 656ms
677:	learn: 44.8959145	total: 1.38s	remaining: 653ms
678:	learn: 44.8496537	total: 1.38s	remaining: 651ms
679:	learn: 44.7820254	total: 1.38s	remaining: 649ms
680:	learn: 44.7665929	total: 1.38s	remaining: 647ms
681:	learn: 44.6831165	total: 1.38s	remaining: 645ms
682:	learn: 44.6307350	total: 1.39s	remaining: 643ms
683:	learn: 44.6227139	total: 1.39s	remaining:

846:	learn: 34.2067300	total: 1.69s	remaining: 305ms
847:	learn: 34.1859023	total: 1.69s	remaining: 303ms
848:	learn: 34.1417773	total: 1.69s	remaining: 301ms
849:	learn: 34.1354054	total: 1.7s	remaining: 299ms
850:	learn: 34.0324653	total: 1.7s	remaining: 297ms
851:	learn: 33.9920391	total: 1.7s	remaining: 295ms
852:	learn: 33.8869125	total: 1.7s	remaining: 293ms
853:	learn: 33.8129351	total: 1.7s	remaining: 291ms
854:	learn: 33.7685735	total: 1.71s	remaining: 289ms
855:	learn: 33.7521050	total: 1.71s	remaining: 287ms
856:	learn: 33.6855908	total: 1.71s	remaining: 285ms
857:	learn: 33.6150701	total: 1.71s	remaining: 283ms
858:	learn: 33.5313771	total: 1.71s	remaining: 281ms
859:	learn: 33.4934092	total: 1.72s	remaining: 280ms
860:	learn: 33.4090140	total: 1.72s	remaining: 278ms
861:	learn: 33.4028903	total: 1.72s	remaining: 276ms
862:	learn: 33.3383731	total: 1.72s	remaining: 274ms
863:	learn: 33.3032895	total: 1.73s	remaining: 272ms
864:	learn: 33.2407227	total: 1.73s	remaining: 270m

In [532]:
df_sub

Unnamed: 0,code,num
0,C1072,727.988823
1,C1128,1309.705516
2,C1456,530.080531
3,C1840,550.804839
4,C1332,1120.543177
...,...,...
145,C2456,227.843336
146,C1266,452.712613
147,C2152,9.666678
148,C1267,448.385979


In [321]:
df_sub

Unnamed: 0,code,num
0,C1072,747.599520
1,C1128,1403.535375
2,C1456,531.826773
3,C1840,521.381965
4,C1332,1295.170585
...,...,...
145,C2456,238.399581
146,C1266,463.542455
147,C2152,22.062274
148,C1267,402.250415


In [None]:
from sklearn.ensemble import VotingRegressor
cat = CatBoostRegressor(random_state=1000 )
reg_ridge = Ridge(random_state = 1000)
rf = RandomForestRegressor()

vr = VotingRegressor(estimators=[('rf', rf), ('cat', cat) , ('reg_ridge', reg_ridge)],  n_jobs=-1)
vr = vr.fit(X, y)

kfold = KFold(n_splits=5, shuffle = True, random_state=0)


scores = cross_val_score(vr , X, y, cv=kfold, scoring='neg_mean_absolute_error')
scores


In [None]:
a123

<h1> 결과 파일 생성 

In [None]:
#scaler = MinMaxScaler()
#X_test = scaler.fit(X_test).transform(X_test)
#scaler = PowerTransformer()
#X_test = scaler.fit(X_test).transform(X_test)
#X_test = pd.DataFrame(X_test)

In [84]:
X_test = df_test_edited[feature_names]

In [85]:
X_test

Unnamed: 0,단지내주차면수,주차공간분위수,총전용면적의합,총세대수,실제세대수,총임대가구수,가구당주차면수,공공임대(5년/10년/분납/분양),평균임대보증금,아파트,A,임대료평균,총임대보증금,면적_72,공가수의비율,상가,임대상가,면적_100,국민임대/장기전세,면적_24,면적_30,D,면적_57,국민임대/장기전세_공급대상,기성세대남자비율,20대(여자),면적_66,40대(남자),면적_84,공가수,20대(남자),10대(남자),50대(남자),10대(여자),영구임대_공급대상,영구임대,면적_15,30대(남자),면적_18,90대(남자),100대(여자),30대(여자),행복주택_공급대상,행복주택,50대(여자),총면적별주차면수,지역,면적_36,버스
0,683.0,593.0,35490.0,754.0,740.0,754.0,0.905836,0,3.812025e+07,8,1,265843.750000,304962000.0,0.0,0.018568,0,0,0.0,8,0.000000,0.0,0,0.000000,7,0.270337,0.058484,0.0,0.078355,0.0,14.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000,0.068704,0.0,0.001707,0.000290,0.072331,0,0,0.077345,0.019245,1.0,0.000000,2.0
1,1216.0,1036.7,61878.0,1354.0,1345.0,1354.0,0.898080,0,3.412500e+07,9,4,245516.666667,307125000.0,0.0,0.006647,0,0,0.0,9,0.000000,0.0,0,0.000000,5,0.270337,0.058484,0.0,0.078355,0.0,9.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000,0.068704,0.0,0.001707,0.000290,0.072331,0,0,0.077345,0.019652,1.0,0.000000,3.0
2,547.0,503.0,27201.0,619.0,601.0,619.0,0.883683,0,3.396522e+07,9,9,223624.444444,305687000.0,0.0,0.029079,0,0,0.0,9,0.000000,0.0,0,0.071082,0,0.263784,0.053796,0.0,0.060769,0.0,18.0,0.057233,0.035512,0.068855,0.032681,0,0,0.000000,0.048866,0.0,0.002240,0.000268,0.047049,0,0,0.082899,0.020110,7.0,0.000000,16.0
3,543.0,503.0,25179.0,593.0,586.0,593.0,0.915683,0,1.892425e+07,4,4,123150.000000,75697000.0,0.0,0.011804,0,0,0.0,4,0.000000,0.0,0,0.000000,0,0.269341,0.065517,0.0,0.075724,0.0,7.0,0.070339,0.063379,0.077795,0.059154,0,0,0.000000,0.053471,0.0,0.001458,0.000149,0.057977,0,0,0.079966,0.021566,11.0,0.000000,3.0
4,1112.0,1036.7,57639.0,1297.0,1286.0,1297.0,0.857363,0,4.133450e+07,8,0,292880.000000,330676000.0,0.0,0.008481,0,0,0.0,8,0.000000,0.0,0,0.000000,8,0.270337,0.058484,0.0,0.078355,0.0,11.0,0.059894,0.053990,0.064859,0.052546,0,0,0.000000,0.068704,0.0,0.001707,0.000290,0.072331,0,0,0.077345,0.019292,1.0,0.000000,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,270.0,273.2,11976.0,349.0,332.0,346.0,0.773639,0,1.222875e+07,4,0,155082.500000,48915000.0,0.0,0.048711,0,0,0.0,4,0.320917,0.0,0,0.000000,4,0.255665,0.066262,0.0,0.066645,0.0,17.0,0.070322,0.060080,0.074382,0.059685,0,0,0.000000,0.046596,0.0,0.001898,0.000158,0.052027,0,0,0.081408,0.022545,12.0,0.395415,4.0
146,593.0,503.0,20079.0,596.0,561.0,591.0,0.994966,0,1.435520e+07,5,0,199878.000000,71776000.0,0.0,0.058725,0,0,0.0,5,0.302013,0.0,0,0.000000,5,0.267580,0.067975,0.0,0.073418,0.0,35.0,0.072622,0.062422,0.069167,0.059077,0,0,0.000000,0.067303,0.0,0.001549,0.000219,0.065095,0,0,0.079299,0.029533,14.0,0.510067,1.0
147,40.0,186.8,3366.0,120.0,111.0,120.0,0.333333,0,2.515500e+06,2,0,50040.000000,5031000.0,0.0,0.075000,0,0,0.0,0,0.550000,0.0,0,0.000000,0,0.264515,0.060374,0.0,0.068002,0.0,9.0,0.063157,0.052712,0.072177,0.051287,2,2,0.000000,0.054321,0.0,0.001677,0.000319,0.059821,0,0,0.087266,0.011884,0.0,0.000000,1.0
148,467.0,426.8,20592.0,675.0,637.0,670.0,0.691852,0,1.481329e+07,11,0,114454.454545,162946164.0,0.0,0.056296,0,0,0.0,5,0.302222,0.0,0,0.000000,5,0.267281,0.064920,0.0,0.067600,0.0,38.0,0.070618,0.055568,0.072570,0.053257,2,2,0.074074,0.057550,0.0,0.001086,0.000179,0.056414,4,4,0.086873,0.022679,2.0,0.186667,1.0


In [86]:
X_test = pd.DataFrame(X_test)

In [87]:
X_test = pd.concat( [X_test,cat_test.iloc[:,1:]], axis = 1)
#X_test = pd.concat([X_test, kmean_id_t],axis = 1)
X_test = X_test.values

In [88]:
catb = CatBoostRegressor()

In [89]:
catb.fit(X,y, verbose=100)

Learning rate set to 0.033374
0:	learn: 380.1332322	total: 3.54ms	remaining: 3.54s
100:	learn: 151.0899720	total: 311ms	remaining: 2.77s
200:	learn: 117.7935733	total: 584ms	remaining: 2.32s
300:	learn: 92.9248678	total: 857ms	remaining: 1.99s
400:	learn: 75.8207855	total: 1.21s	remaining: 1.81s
500:	learn: 62.3820205	total: 1.49s	remaining: 1.48s
600:	learn: 51.9379195	total: 1.76s	remaining: 1.17s
700:	learn: 43.7848542	total: 2.04s	remaining: 870ms
800:	learn: 36.7842593	total: 2.34s	remaining: 582ms
900:	learn: 30.8324458	total: 2.64s	remaining: 290ms
999:	learn: 26.5685873	total: 2.92s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x15c53c44e80>

In [90]:
df_sub["num"] = catb.predict(X_test)

In [91]:
df_sub.to_csv("submission_fuckingerror.csv",index = False)

In [92]:
pd.read_csv("submission_fuckingerror.csv")

Unnamed: 0,code,num
0,C1072,758.797821
1,C1128,1384.556679
2,C1456,497.635644
3,C1840,552.502456
4,C1332,1259.481067
...,...,...
145,C2456,244.077065
146,C1266,452.817505
147,C2152,1.582663
148,C1267,346.191216


<h1> 모델 설명 

In [None]:
explainer = shap.TreeExplainer(RF) # Tree model Shap Value 확인 객체 지정
shap_values = explainer.shap_values(X_test) # Shap Values 계산
shap.initjs() # javascript 초기화 (graph 초기화)
shap.force_plot(explainer.expected_value, shap_values[1,:], feature_names)

## 빨간색 영향도 높음, 파란색 영향도 낮음(음의 영향력)

In [None]:
shap.force_plot(explainer.expected_value, shap_values, X_test,feature_names) 
#x축. y축 title을 클릭하면 drop down 생성, 전 피처에 대하여 영향력 확인 가능 

In [None]:
shap.summary_plot(shap_values, X_test,feature_names)

# 모든 변수들의 shap value를 요약한 것으로 해당 변수가 빨간색을 띄면 target(price)에 대해 양의 영향력, 파란색을 띄면 음의 영향력을 가진다. 


In [None]:
 # 각 변수에 대한 |Shap Values|을 통해 변수 importance 파악
shap.summary_plot(shap_values, X_test, plot_type = "bar")

# 오늘의 추첨룰렛

In [None]:
mun = pd.read_csv("lotto/moonjoo_0714_.csv") # 닥치셈
#sung = pd.read_csv("lotto/sub8.csv") 
seok = pd.read_csv("lotto/submission_0719_3_noseoul_bow_nomalmodel.csv") # ㅈ 댈뻔
#ha = pd.read_csv("lotto/0701002.csv")

In [None]:
ha.head()# 재밌잖아

In [None]:
lotto = (mun["num"]+seok["num"])/2 # 그저 태양

In [None]:
df_sub["num"] = lotto # 닥치셈

In [None]:
df_sub

In [None]:
df_sub.to_csv("submission_향기로운칵테일에취해도보고.csv",index = False)

In [None]:
pd.read_csv("submission_향기로운칵테일에취해도보고.csv")

In [None]:
df_train[df_train.임대보증금 == df_train.임대료]