# 구별로 모델 별도 생성

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import gc

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error

import lightgbm as lgb

import joblib

# 맥에서 글씨 깨짐 방지
from matplotlib import rc
rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False

# Data Load

In [2]:
df = pd.read_csv('../data/16_over_30_under_50_years.csv')

  df = pd.read_csv('../data/16_over_30_under_50_years.csv')


In [3]:
df.shape

(1128094, 89)

In [4]:
# 일부 feature 제거 - 기준은 내마음대로

df.drop(['해제사유발생일', '등기신청일자', '거래유형', '중개사소재지', 'k-단지분류(아파트,주상복합등등)', 'k-전화번호',
         'k-팩스번호', '단지소개기존clob', 'k-세대타입(분양형태)', 'k-복도유형', 'k-난방방식', 'k-전체동수', 'k-전체세대수',
         'k-사용검사일-사용승인일', 'k-관리비부과면적', 'k-전용면적별세대현황(60이하)', 'k-전용면적별세대현황(60~85이하)',
         'k-85~135이하', 'k-135초과', 'k-홈페이지', 'k-등록일자', 'k-수정일자', '고용보험관리번호', '경비비관리형태',
         '기타/의무/임대/임의=1/2/3/4', '단지승인일', '사용허가여부', '관리비 업로드', '단지신청일'], axis=1, inplace=True)

In [5]:
# 추가로 제거가 필요한 feature 제거
df.drop(['시군구', '번지', '본번', '부번', '아파트명', '도로명', 'k-관리방식', 'k-건설사(시공사)', 'k-시행사', 'k-연면적', 'k-주거전용면적',
         '세대전기계약방법', '청소비관리형태', '건축면적', '주차대수', '시군구 번지', '좌표X,좌표Y', 'index', '역사_ID', '역사명', '위도', '경도',
         '가장 가까운 버스 정류장 index', '가장 가까운 버스 정류장 노드 ID', '가장 가까운 버스 정류소번호', '가장 가까운 버스 정류소명',
         '가장 가까운 버스 정류소 타입', '가장 가까운 버스 정류장 X좌표', '가장 가까운 버스 정류장 Y좌표', '동'], axis=1, inplace=True)

In [6]:
df.columns

Index(['전용면적', '계약년월', '계약일', '층', '건축년도', '좌표X', '좌표Y', 'target', 'is_test',
       '가장 가까운 거리', '호선', '인근 지하철 역 개수', '가장 가까운 버스 정류장 거리', '인근 버스 정류장 개수',
       '계약년', '계약월', 'GDP', '한국은행 기준금리', '기대 인플레이션', '지가지수', '아파트 인허가', '미분양',
       '거래량', '건설사 랭킹', '구', '구별 지가지수', '공시지가 평균', '매수우위지수', '건물나이',
       '30년이상50년이하'],
      dtype='object')

In [7]:
df.drop(['지가지수', '아파트 인허가', '미분양', '건설사 랭킹'], axis=1, inplace=True)

In [8]:
print(df.shape)
df.head(10)

(1128094, 23)


Unnamed: 0,전용면적,계약년월,계약일,층,건축년도,좌표X,좌표Y,target,is_test,가장 가까운 거리,...,인근 버스 정류장 개수,계약년,계약월,GDP,한국은행 기준금리,기대 인플레이션,거래량,구,구별 지가지수,공시지가 평균
0,79.97,201712,8,3,1987,127.05721,37.476763,124000.0,0,1127.738351,...,7.0,2017,12,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0
1,79.97,201712,22,4,1987,127.05721,37.476763,123500.0,0,1127.738351,...,7.0,2017,12,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0
2,54.98,201712,28,5,1987,127.05721,37.476763,91500.0,0,1127.738351,...,7.0,2017,12,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0
3,79.97,201801,3,4,1987,127.05721,37.476763,130000.0,0,1127.738351,...,7.0,2018,1,1725373.4,1.5,2.6,15107.0,강남구,75.461,7322618.0
4,79.97,201801,8,2,1987,127.05721,37.476763,117000.0,0,1127.738351,...,7.0,2018,1,1725373.4,1.5,2.6,15107.0,강남구,75.461,7322618.0
5,79.97,201801,11,1,1987,127.05721,37.476763,130000.0,0,1127.738351,...,7.0,2018,1,1725373.4,1.5,2.6,15107.0,강남구,75.461,7322618.0
6,79.97,201803,19,2,1987,127.05721,37.476763,139500.0,0,1127.738351,...,7.0,2018,3,1725373.4,1.5,2.6,24122.0,강남구,76.313,7322618.0
7,54.98,201804,5,5,1987,127.05721,37.476763,107500.0,0,1127.738351,...,7.0,2018,4,1725373.4,1.5,2.6,12347.0,강남구,76.483,7322618.0
8,79.97,201806,28,3,1987,127.05721,37.476763,145000.0,0,1127.738351,...,7.0,2018,6,1725373.4,1.5,2.6,10401.0,강남구,77.037,7322618.0
9,54.98,201807,9,3,1987,127.05721,37.476763,112000.0,0,1127.738351,...,7.0,2018,7,1725373.4,1.5,2.6,11753.0,강남구,77.567,7322618.0


In [9]:
# 계약월 변수 생성
df['계약월'] = df['계약년월'].astype(str).str[4:].astype(int)

# 계약년월, 계약일, 건축년도, 계약년 변수 제거
df.drop(['계약년월', '계약일'], axis=1, inplace=True)

df['계약월_sin'] = np.sin(2 * np.pi * df['계약월'] / 12)
df.drop(['계약월'], axis=1, inplace=True)

In [10]:
df.head()

Unnamed: 0,전용면적,층,건축년도,좌표X,좌표Y,target,is_test,가장 가까운 거리,호선,인근 지하철 역 개수,...,인근 버스 정류장 개수,계약년,GDP,한국은행 기준금리,기대 인플레이션,거래량,구,구별 지가지수,공시지가 평균,계약월_sin
0,79.97,3,1987,127.05721,37.476763,124000.0,0,1127.738351,분당선,2.0,...,7.0,2017,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0,-2.449294e-16
1,79.97,4,1987,127.05721,37.476763,123500.0,0,1127.738351,분당선,2.0,...,7.0,2017,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0,-2.449294e-16
2,54.98,5,1987,127.05721,37.476763,91500.0,0,1127.738351,분당선,2.0,...,7.0,2017,1623074.1,1.5,2.5,13740.0,강남구,75.121,6858562.0,-2.449294e-16
3,79.97,4,1987,127.05721,37.476763,130000.0,0,1127.738351,분당선,2.0,...,7.0,2018,1725373.4,1.5,2.6,15107.0,강남구,75.461,7322618.0,0.5
4,79.97,2,1987,127.05721,37.476763,117000.0,0,1127.738351,분당선,2.0,...,7.0,2018,1725373.4,1.5,2.6,15107.0,강남구,75.461,7322618.0,0.5


In [11]:
df.columns

Index(['전용면적', '층', '건축년도', '좌표X', '좌표Y', 'target', 'is_test', '가장 가까운 거리',
       '호선', '인근 지하철 역 개수', '가장 가까운 버스 정류장 거리', '인근 버스 정류장 개수', '계약년', 'GDP',
       '한국은행 기준금리', '기대 인플레이션', '거래량', '구', '구별 지가지수', '공시지가 평균', '계약월_sin'],
      dtype='object')

# scaling

In [12]:
def scailing(col, scaler_type):
    if scaler_type == 'min_max': scaler = MinMaxScaler()
    else : scaler = StandardScaler()

    return scaler.fit_transform(df[[col]])

cols = ['전용면적', '층', '건축년도', '좌표X', '좌표Y', '가장 가까운 거리',
       '인근 지하철 역 개수', '가장 가까운 버스 정류장 거리', '인근 버스 정류장 개수', '계약년', 'GDP',
       '한국은행 기준금리', '기대 인플레이션', '거래량', '구별 지가지수', '공시지가 평균', '계약월_sin']

for col in cols:
    print(col)
    scaler_type = 'min_max'
    if col == '계약월_sin': scaler_type = 'standard'

    df[[col]] = scailing(col, scaler_type)

전용면적
층
건축년도
좌표X
좌표Y
가장 가까운 거리
인근 지하철 역 개수
가장 가까운 버스 정류장 거리
인근 버스 정류장 개수
계약년
GDP
한국은행 기준금리
기대 인플레이션
거래량
구별 지가지수
공시지가 평균
계약월_sin


In [13]:
df

Unnamed: 0,전용면적,층,건축년도,좌표X,좌표Y,target,is_test,가장 가까운 거리,호선,인근 지하철 역 개수,...,인근 버스 정류장 개수,계약년,GDP,한국은행 기준금리,기대 인플레이션,거래량,구,구별 지가지수,공시지가 평균,계약월_sin
0,0.168839,0.095890,0.419355,0.678243,0.167126,124000.0,0,0.353721,분당선,0.086957,...,0.2,0.6250,0.776663,0.210526,0.290323,0.466276,강남구,0.395747,0.509398,-0.022272
1,0.168839,0.109589,0.419355,0.678243,0.167126,123500.0,0,0.353721,분당선,0.086957,...,0.2,0.6250,0.776663,0.210526,0.290323,0.466276,강남구,0.395747,0.509398,-0.022272
2,0.108520,0.123288,0.419355,0.678243,0.167126,91500.0,0,0.353721,분당선,0.086957,...,0.2,0.6250,0.776663,0.210526,0.290323,0.466276,강남구,0.395747,0.509398,-0.022272
3,0.168839,0.109589,0.419355,0.678243,0.167126,130000.0,0,0.353721,분당선,0.086957,...,0.2,0.6875,0.893613,0.210526,0.322581,0.522738,강남구,0.403844,0.551484,0.684200
4,0.168839,0.082192,0.419355,0.678243,0.167126,117000.0,0,0.353721,분당선,0.086957,...,0.2,0.6875,0.893613,0.210526,0.322581,0.522738,강남구,0.403844,0.551484,0.684200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1128089,0.180135,0.232877,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,0.2,1.0000,0.879231,0.631579,0.548387,0.149932,중랑구,0.964896,0.156150,-0.728745
1128090,0.180063,0.219178,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,0.2,1.0000,0.879231,0.631579,0.548387,0.149932,중랑구,0.964896,0.156150,-0.728745
1128091,0.221168,0.219178,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,0.2,1.0000,0.879231,0.631579,0.548387,0.176160,중랑구,0.967372,0.156150,-1.245919
1128092,0.180835,0.301370,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,0.2,1.0000,0.879231,0.631579,0.548387,0.162942,중랑구,0.972255,0.156150,-1.435218


# 호선 One-Hot encoding

In [14]:
ohe = OneHotEncoder(sparse=False)
ohe_re = ohe.fit_transform(df[['호선']])



In [15]:
df = pd.concat([df, pd.DataFrame(ohe_re, columns=[col for col in ohe.categories_[0]])], axis=1)
df

Unnamed: 0,전용면적,층,건축년도,좌표X,좌표Y,target,is_test,가장 가까운 거리,호선,인근 지하철 역 개수,...,경춘선,공항철도1호선,과천선,분당선,신림선,신분당선,신분당선(연장2),우이신설선,일산선,중앙선
0,0.168839,0.095890,0.419355,0.678243,0.167126,124000.0,0,0.353721,분당선,0.086957,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.168839,0.109589,0.419355,0.678243,0.167126,123500.0,0,0.353721,분당선,0.086957,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.108520,0.123288,0.419355,0.678243,0.167126,91500.0,0,0.353721,분당선,0.086957,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.168839,0.109589,0.419355,0.678243,0.167126,130000.0,0,0.353721,분당선,0.086957,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.168839,0.082192,0.419355,0.678243,0.167126,117000.0,0,0.353721,분당선,0.086957,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1128089,0.180135,0.232877,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1128090,0.180063,0.219178,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1128091,0.221168,0.219178,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1128092,0.180835,0.301370,0.854839,0.807949,0.728162,,1,0.227756,경춘선,0.173913,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
df.drop(['호선'], axis=1, inplace=True)

# Dataset 생성

In [17]:
train_df = df[df['is_test'] == 0]
test_df = df[df['is_test'] == 1]

In [18]:
print(train_df.shape)
print(test_df.shape)

(1118822, 44)
(9272, 44)


In [19]:
train_df.drop(['is_test'], axis=1, inplace=True)
test_df.drop(['is_test'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.drop(['is_test'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.drop(['is_test'], axis=1, inplace=True)


In [20]:
print(train_df.shape)
print(test_df.shape)

(1118822, 43)
(9272, 43)


## 구별로 분할

In [21]:
district_list = train_df['구'].unique().tolist()
print(len(district_list))
district_list

25


['강남구',
 '강동구',
 '강북구',
 '강서구',
 '관악구',
 '광진구',
 '구로구',
 '금천구',
 '노원구',
 '도봉구',
 '동대문구',
 '동작구',
 '마포구',
 '서대문구',
 '서초구',
 '성동구',
 '성북구',
 '송파구',
 '양천구',
 '영등포구',
 '용산구',
 '은평구',
 '종로구',
 '중구',
 '중랑구']

In [22]:
train_df_list = []

for district in district_list:
    train_df_list.append(train_df[train_df['구'] == district])

In [23]:
train_df_list[0][['구']]

Unnamed: 0,구
0,강남구
1,강남구
2,강남구
3,강남구
4,강남구
...,...
1118450,강남구
1118451,강남구
1118452,강남구
1118453,강남구


In [24]:
test_df_list = []

for district in district_list:
    test_df_list.append(test_df[test_df['구'] == district])

In [25]:
test_df_list[0][['구']]

Unnamed: 0,구
1118822,강남구
1118823,강남구
1118824,강남구
1118825,강남구
1118826,강남구
...,...
1123928,강남구
1123929,강남구
1123930,강남구
1123931,강남구


In [26]:
# train data X, y 분할
train_X_list = []
train_y_list = []

for i in range(25):
    train_X_list.append(train_df_list[i].drop(['target'], axis=1))
    train_y_list.append(train_df_list[i][['target']])

In [30]:
train_X_list[0].columns

Index(['전용면적', '층', '건축년도', '좌표X', '좌표Y', '가장 가까운 거리', '인근 지하철 역 개수',
       '가장 가까운 버스 정류장 거리', '인근 버스 정류장 개수', '계약년', 'GDP', '한국은행 기준금리',
       '기대 인플레이션', '거래량', '구', '구별 지가지수', '공시지가 평균', '계약월_sin', '1호선', '2호선',
       '3호선', '4호선', '5호선', '6호선', '7호선', '8호선', '9호선', '9호선(연장)', '경부선',
       '경원선', '경의중앙선', '경인선', '경춘선', '공항철도1호선', '과천선', '분당선', '신림선', '신분당선',
       '신분당선(연장2)', '우이신설선', '일산선', '중앙선'],
      dtype='object')

## train, valid 분할

In [31]:
valid_X_list = [None for _ in range(25)]
valid_y_list = [None for _ in range(25)]

for i in range(25):
    train_X_list[i], valid_X_list[i], train_y_list[i], valid_y_list[i] = train_test_split(train_X_list[i], train_y_list[i], train_size=0.9, test_size=0.1, random_state=74)

# Model 생성

In [32]:
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'rmse',
    'metric_freq': 20,
    'device': 'gpu',
    'verbosity': 0
}

def make_model(train_X, train_y, valid_X, valid_y, i):
    model = lgb.LGBMRegressor(n_estimators=100000,
                          metric='rmse',
                          data_sample_strategy='goss')
    
    model.fit(
        train_X, train_y,
        eval_set = [(train_X, train_y), (valid_X, valid_y)],
        eval_metric='rmse',
        callbacks=[lgb.early_stopping(stopping_rounds=60),
                   lgb.log_evaluation(period=10, show_stdv=True)]
    )

    joblib.dump(model, f'../models/14/{i}.pkl')

    return model

In [33]:
models = []

for i in range(25):
    models.append(make_model(train_X_list[i].drop(['구'], axis=1), train_y_list[i], valid_X_list[i].drop(['구'], axis=1), valid_y_list[i], i))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1890
[LightGBM] [Info] Number of data points in the train set: 62174, number of used features: 25
[LightGBM] [Info] Using GOSS
[LightGBM] [Info] Start training from score 114864.819860
Training until validation scores don't improve for 60 rounds
[10]	training's rmse: 40944.5	valid_1's rmse: 40829.2
[20]	training's rmse: 27631.5	valid_1's rmse: 27720.3
[30]	training's rmse: 21882.4	valid_1's rmse: 22116.3
[40]	training's rmse: 19117.4	valid_1's rmse: 19448.1
[50]	training's rmse: 17438.8	valid_1's rmse: 17856.7
[60]	training's rmse: 16352.9	valid_1's rmse: 16795.8
[70]	training's rmse: 15540.8	valid_1's rmse: 16049
[80]	training's rmse: 14887.1	valid_1's rmse: 15411
[90]	training's rmse: 14401.4	valid_1's rmse: 14981.1
[100]	training's rmse: 13936.7	valid_1's rmse: 14614.5
[110]	training's rmse: 135

# Submission

In [34]:
for i in range(25):
    test_df_list[i].drop(['target'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i].drop(['target'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i].drop(['target'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i].drop(['target'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tes

In [35]:
preds_li = []

for i in range(25):
    preds_li.append(models[i].predict(test_df_list[i].drop(['구'], axis=1)))

In [36]:
preds_li[0]

array([183572.89340515, 295183.47928658, 332229.22505256, 284610.97097172,
       231278.38782391, 236577.53599942, 242468.15896133, 230750.14921783,
       178247.91946989, 408808.31304824, 267277.86376526, 258065.76479585,
       265209.27270135, 188747.59287013, 261746.20262174, 256841.66768034,
       193976.85156962, 269269.28175796, 257450.72648715, 268858.42053874,
       326608.59369546, 341790.20561548, 168880.22988613, 305484.39836168,
        41603.70206432,  43760.42306566, 165267.83017844, 207115.95859262,
       147423.37537123, 157778.8775517 , 223937.87460924, 222109.41434162,
       257182.4696818 , 224048.69848765, 268822.65525468, 269213.8085256 ,
        96285.24083638, 393702.12023254, 265397.04908283, 397776.44142659,
       268765.34175458, 184634.10888684, 186218.41600298, 260362.8841362 ,
       195453.15732228, 198813.42920597, 344572.60478192, 318836.4688166 ,
       193779.18430591, 209359.01910254, 352029.35845419, 190272.04157134,
       203877.16862164, 2

### 결과 후처리

In [37]:
for i in range(25):
    preds_li[i] = np.round(preds_li[i]).astype(int)

In [38]:
for i in range(25):
    test_df_list[i]['target'] = preds_li[i]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i]['target'] = preds_li[i]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i]['target'] = preds_li[i]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_list[i]['target'] = preds_li[i]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [40]:
test_df_list[0]

Unnamed: 0,전용면적,층,건축년도,좌표X,좌표Y,가장 가까운 거리,인근 지하철 역 개수,가장 가까운 버스 정류장 거리,인근 버스 정류장 개수,계약년,...,공항철도1호선,과천선,분당선,신림선,신분당선,신분당선(연장2),우이신설선,일산선,중앙선,target
1118822,0.168839,0.123288,0.419355,0.678243,0.167126,0.353721,0.086957,0.101599,0.200000,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,183573
1118823,0.236982,0.191781,0.967742,0.676106,0.199219,0.092077,0.304348,0.209173,0.228571,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,295183
1118824,0.364422,0.260274,0.370968,0.675047,0.195279,0.126138,0.304348,0.163092,0.257143,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,332229
1118825,0.297948,0.246575,0.370968,0.675047,0.195279,0.126138,0.304348,0.163092,0.257143,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,284611
1118826,0.227878,0.136986,0.370968,0.675047,0.195279,0.126138,0.304348,0.163092,0.257143,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,231278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123928,0.180835,0.150685,0.967742,0.734501,0.220301,0.112839,0.260870,0.319755,0.171429,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,234303
1123929,0.180835,0.136986,0.967742,0.734501,0.220301,0.112839,0.260870,0.319755,0.171429,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,231865
1123930,0.180835,0.178082,0.967742,0.734501,0.220301,0.112839,0.260870,0.319755,0.171429,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,231770
1123931,0.155733,0.095890,0.516129,0.740689,0.187216,0.108642,0.130435,0.207077,0.114286,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,176078


In [41]:
# 결과 병합

re = pd.concat([preds for preds in test_df_list]).sort_index()

In [44]:
re[['target']].reset_index(drop=True).to_csv('../preds/14.csv', index=False)