In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['font.family'] = 'Malgun Gothic'
import datetime
import time

In [2]:
def day_modifier(x):
    # x is a number
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        if len(x) == 1:
            return '0' + x
        else:
            return x

In [3]:
def landnum_modifier(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x).replace(' ','')
        splitted = x.split('-')
        if len(splitted) == 1:
            return x + '-0'
        else:
            return x

In [4]:
def data_prep(bdtype, tradetype):
    # bdtype is a string: one of 아파트, 연립다세대 or 오피스텔
    # tradetype is a string: one of 매매 or 전월세
    
    basedir = './국토교통부_실거래가_공개시스템/{}/{}/'.format(bdtype, tradetype)
    filenames = [f for f in os.listdir(basedir) if (f.endswith('.csv'))&('(' in f)]
    
    dfs_list = []
    for i, f in tqdm(enumerate(filenames)):
        df = pd.read_csv(basedir + f, encoding='euc-kr', header=15)
        if '해제사유발생일' in df.columns.tolist():
            df = df[df['해제사유발생일'].isna()]
            df = df.drop(columns=['해제사유발생일'])

        dfs_list.append(df)
    
    concat_df = pd.concat(dfs_list).reset_index(drop=True)
        
    concat_df['번지'] = concat_df['번지'].apply(landnum_modifier)
    
    concat_df['계약년월'] = concat_df['계약년월'].apply(str)
    concat_df['계약일'] = concat_df['계약일'].apply(str)
    
    concat_df['계약날짜기준_건물연식'] = concat_df['계약년월'].apply(lambda x: int(x[:4])) - concat_df['건축년도']
    
    concat_df['계약일'] = concat_df['계약일'].apply(day_modifier)
    
    concat_df['계약날짜'] = concat_df['계약년월'].apply(lambda x: x[:4]) + '-' + concat_df['계약년월'].apply(lambda x: x[-2:])\
    + '-' + concat_df['계약일']
    
    concat_df['계약날짜'] = pd.to_datetime(concat_df['계약날짜'], format='%Y-%m-%d')
    
    concat_df['거래금액(만원)'] = concat_df['거래금액(만원)'].apply(lambda x: int(x.replace(',','')))
    concat_df['단가(만원/㎡)'] = concat_df['거래금액(만원)'] / concat_df['전용면적(㎡)']
    
    concat_df['지번주소'] = concat_df['시군구'] + ' ' + concat_df['번지']
        
    cols_to_drop = ['시군구', '번지', '본번', '부번', '지번주소', '계약년월', '계약일', '도로명']
        
    concat_df = concat_df[['지번주소', '도로명'] + [col for col in concat_df.columns if col not in cols_to_drop]]
    
    date_today = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'), format='%Y-%m-%d')
    concat_df['건물연식'] = date_today.year - concat_df['건축년도']
    
    concat_df = concat_df.dropna(subset=['지번주소'])
    
    if '건물명' in concat_df.columns:
        concat_df.rename(columns={'건물명':'건물(단지)명'}, inplace=True)
    elif '단지명' in concat_df.columns:
        concat_df.rename(columns={'단지명':'건물(단지)명'}, inplace=True)
    
    concat_df = concat_df[concat_df['층'] >= 0].reset_index(drop=True)
    
    return concat_df

In [5]:
yunrip_df = data_prep('연립다세대', '매매')
yunrip_df = yunrip_df[(yunrip_df['계약날짜'] >= pd.to_datetime('2015-01-01'))
                      #&(yunrip_df['계약날짜기준_건물연식'] <= 5)
                     ]
yunrip_df = yunrip_df.reset_index(drop=True)
yunrip_df['부동산유형'] = '연립다세대'
yunrip_df.drop(columns=['대지권면적(㎡)'], inplace=True)
print(yunrip_df.shape)
yunrip_df.head()

16it [00:01, 10.39it/s]


(318821, 12)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 강남구 개포동 1216-4,개포로26길 16,(1216-4),33.77,24800,4,2012.0,3.0,2015-03-26,734.379627,9.0,연립다세대
1,서울특별시 강남구 개포동 1216-4,개포로26길 16,(1216-4),35.87,27200,4,2012.0,3.0,2015-06-23,758.293839,9.0,연립다세대
2,서울특별시 강남구 개포동 1216-4,개포로26길 16,(1216-4),29.12,22200,4,2012.0,3.0,2015-07-20,762.362637,9.0,연립다세대
3,서울특별시 강남구 개포동 1216-4,개포로26길 16,(1216-4),29.97,22500,3,2012.0,3.0,2015-08-06,750.750751,9.0,연립다세대
4,서울특별시 강남구 개포동 170-18,선릉로12길 17,(170-18),26.6,21000,1,1988.0,27.0,2015-08-21,789.473684,33.0,연립다세대


In [6]:
officetel_df = data_prep('오피스텔', '매매')
officetel_df = officetel_df[(officetel_df['계약날짜'] >= pd.to_datetime('2015-01-01'))
                            #&(officetel_df['계약날짜기준_건물연식'] <= 5)
                           ]
officetel_df = officetel_df.reset_index(drop=True)
officetel_df['부동산유형'] = '오피스텔'
print(officetel_df.shape)
officetel_df.head()

16it [00:00, 41.12it/s]


(82013, 12)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 강남구 개포동 13-3,개포로 623,대청타워,43.24,22000,14,1997.0,18.0,2015-01-08,508.788159,24.0,오피스텔
1,서울특별시 강남구 개포동 13-3,개포로 623,대청타워,32.44,15800,21,1997.0,18.0,2015-01-12,487.053021,24.0,오피스텔
2,서울특별시 강남구 개포동 13-3,개포로 623,대청타워,32.44,16000,10,1997.0,18.0,2015-01-19,493.218249,24.0,오피스텔
3,서울특별시 강남구 개포동 13-3,개포로 623,대청타워,32.44,15400,21,1997.0,18.0,2015-01-26,474.722565,24.0,오피스텔
4,서울특별시 강남구 개포동 13-3,개포로 623,대청타워,31.91,16000,26,1997.0,18.0,2015-01-28,501.410216,24.0,오피스텔


In [7]:
apart_df = data_prep('아파트', '매매')
apart_df = apart_df[(apart_df['계약날짜'] >= pd.to_datetime('2015-01-01'))
                    #&(apart_df['계약날짜기준_건물연식'] <= 5)
                   ]
apart_df = apart_df.reset_index(drop=True)
apart_df['부동산유형'] = '아파트'
print(apart_df.shape)
apart_df.head()

16it [00:02,  7.90it/s]


(596180, 12)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 강남구 개포동 655-2,언주로 103,개포2차현대아파트(220),77.75,60000,5,1988,27,2015-01-11,771.70418,33,아파트
1,서울특별시 강남구 개포동 655-2,언주로 103,개포2차현대아파트(220),77.75,65000,5,1988,27,2015-03-03,836.012862,33,아파트
2,서울특별시 강남구 개포동 655-2,언주로 103,개포2차현대아파트(220),77.75,62500,2,1988,27,2015-03-07,803.858521,33,아파트
3,서울특별시 강남구 개포동 655-2,언주로 103,개포2차현대아파트(220),77.75,68000,6,1988,27,2015-08-16,874.598071,33,아파트
4,서울특별시 강남구 개포동 658-1,언주로 3,개포6차우성아파트1동~8동,79.97,73000,5,1987,28,2015-01-29,912.842316,34,아파트


In [8]:
concat_df = pd.concat([yunrip_df, officetel_df, apart_df]).sort_values(['지번주소', '건축년도', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
print(concat_df.shape)
concat_df.head()

(997014, 12)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 강남구 개포동 1164-0,논현로2길 6,골드캐슬,58.05,57500,2,2017.0,1.0,2018-01-20,990.525409,4.0,연립다세대
1,서울특별시 강남구 개포동 1164-0,논현로2길 6,골드캐슬,58.05,57000,1,2017.0,1.0,2018-06-01,981.912145,4.0,연립다세대
2,서울특별시 강남구 개포동 1164-0,논현로2길 6,골드캐슬,58.05,63000,2,2017.0,2.0,2019-01-19,1085.271318,4.0,연립다세대
3,서울특별시 강남구 개포동 1164-12,논현로2길 34,새롬(1164-12),73.5,42000,3,2000.0,15.0,2015-07-28,571.428571,21.0,아파트
4,서울특별시 강남구 개포동 1164-12,논현로2길 34,새롬(1164-12),73.5,55400,2,2000.0,17.0,2017-07-21,753.741497,21.0,아파트


In [9]:
def find_bulktrade(df):
    idx_list = []
    for addr in tqdm(df['지번주소'].unique()):
        addr_df = df[df['지번주소'] == addr]
        
        for built_year in addr_df['건축년도'].unique():
            built_year_df = addr_df[addr_df['건축년도'] == built_year]

            for date in built_year_df['계약날짜'].unique():
                date_df = built_year_df[built_year_df['계약날짜'] == date]

                if date_df.shape[0] >= 10:
                    [idx_list.append(idx) for idx in date_df.index]
    
    selected_df = df.loc[idx_list,:]
    
    return selected_df

In [27]:
concat_selected_df = find_bulktrade(concat_df)
print(concat_selected_df.shape)
concat_selected_df.head()

 52%|███████████████████████████████████████▋                                    | 42050/80471 [36:47<33:37, 19.05it/s]


KeyboardInterrupt: 

In [None]:
idx_to_drop = []
for addr in tqdm(concat_selected_df['지번주소'].unique()):
    addr_df = concat_selected_df[concat_selected_df['지번주소'] == addr]
    if addr_df['건축년도'].nunique() > 1:
        non_max_df = addr_df[addr_df['건축년도'] != addr_df['건축년도'].max()]
        [idx_to_drop.append(i) for i in non_max_df.index]

In [None]:
len(idx_to_drop)

In [None]:
concat_selected_df.shape

In [None]:
concat_selected_df = concat_selected_df.drop(idx_to_drop).reset_index(drop=True)
print(concat_selected_df.shape)
concat_selected_df.head()

In [16]:
bulktrade_basedir = './국토교통부_실거래가_공개시스템/집값분석/일괄구매/10호이상/'

In [17]:
%%time
concat_selected_df.to_csv(bulktrade_basedir+'아파트_연립다세대_오피스텔_일괄구매_전체.csv')

Wall time: 355 ms


In [22]:
yunrip_officetel_complete_df = pd.read_excel(bulktrade_basedir + '2018년부터/2018년01월01일부터_2021년06월07일까지_SH_LH_거래사례_요약본_빠진애들_채워넣음.xlsx')
yunrip_officetel_complete_df = yunrip_officetel_complete_df.drop(columns=['Unnamed: 16'])
yunrip_officetel_complete_df['지번주소'] = '서울특별시 ' + yunrip_officetel_complete_df['지번주소']
print(yunrip_officetel_complete_df.shape)
yunrip_officetel_complete_df.head()

(829, 16)


Unnamed: 0,지번주소,건물(단지)명,건축년도,계약날짜,부동산유형,소유자,평균 전용면적(㎡),총 전용면적(㎡),전용면적 표준편차(㎡),평균 거래금액(만원),총 거래금액(만원),거래금액 표준편차(만원),최대 층,평균 단가(만원),단가 표준편차(만원),세대 수
0,서울특별시 강남구 개포동 1237-7,한별2,2021,2021-06-01,오피스텔,SH,29.665,118.66,0.39,33565.25,134261,437.5,2,1131.477759,0.128995,4
1,서울특별시 강남구 개포동 1237-7,한별2,2021,2021-06-01,연립다세대,SH,29.946667,449.2,0.037544,38163.333333,572450,44.185755,6,1274.376854,0.358219,15
2,서울특별시 강남구 개포동 1216-7,백년빌,2020,2020-03-18,연립다세대,SH,26.686,400.29,2.367074,30573.333333,458600,2134.65175,5,1147.330195,25.484846,15
3,서울특별시 강남구 개포동 1195-10,동영빌,2020,2020-03-02,연립다세대,SH,28.814667,432.22,4.855684,31463.333333,471950,4139.63709,5,1097.178201,36.299709,15
4,서울특별시 강남구 개포동 1199-7,개포백년빌,2019,2019-12-19,연립다세대,LH,27.905,390.67,3.070186,28857.142857,404000,3536.722578,5,1033.367584,37.609677,14


In [None]:
except_df = concat_selected_df[concat_selected_df['지번주소'].isin(yunrip_officetel_complete_df['지번주소'].unique()) == False]
print(except_df.shape)
except_df.head()

In [None]:
included_df = concat_selected_df[concat_selected_df['지번주소'].isin(yunrip_officetel_complete_df['지번주소'].unique())]
print(included_df.shape)
included_df.head()

In [None]:
count = 0
for addr in included_df['지번주소'].unique():
    addr_df = included_df[included_df['지번주소'] == addr]
    
    if '아파트' in addr_df['부동산유형'].unique():
        print(addr)
        print(addr_df.shape)
        display(addr_df)
        count += 1
    
    if count > 5:
        break

In [25]:
count = 0
for addr in concat_selected_df['지번주소'].unique():
    addr_df = concat_selected_df[concat_selected_df['지번주소'] == addr]
    
    
    if '아파트' in addr_df['부동산유형'].unique():
        print(addr)
        print(addr_df.shape)
        display(addr_df.head(20))

        count += 1
    
    if count > 5:
        break

서울특별시 강남구 논현동 9-2
(13, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
685,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),17.22,18773,4,1996.0,23.0,2019-03-28,1090.18583,25.0,아파트,
686,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),21.24,23153,2,1996.0,23.0,2019-03-28,1090.065913,25.0,아파트,
687,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),21.24,23153,3,1996.0,23.0,2019-03-28,1090.065913,25.0,아파트,
688,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),21.84,23803,5,1996.0,23.0,2019-03-28,1089.880952,25.0,아파트,
689,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),21.84,23803,3,1996.0,23.0,2019-03-28,1089.880952,25.0,아파트,
690,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),21.84,23803,2,1996.0,23.0,2019-03-28,1089.880952,25.0,아파트,
691,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),22.05,24033,5,1996.0,23.0,2019-03-28,1089.931973,25.0,아파트,
692,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),23.1,25179,5,1996.0,23.0,2019-03-28,1090.0,25.0,아파트,
693,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),31.11,33912,5,1996.0,23.0,2019-03-28,1090.067502,25.0,아파트,
694,서울특별시 강남구 논현동 9-2,도산대로24길 19,논현인텔(9-2),33.32,36322,3,1996.0,23.0,2019-03-28,1090.096038,25.0,아파트,


서울특별시 강남구 삼성동 3-1
(56, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
1008,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,69000,2,1997.0,23.0,2020-06-19,1173.94855,24.0,아파트,
1009,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,67000,2,1997.0,23.0,2020-06-19,1139.921056,24.0,아파트,
1010,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,70000,3,1997.0,23.0,2020-06-19,1190.962298,24.0,아파트,
1011,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,70000,3,1997.0,23.0,2020-06-19,1190.962298,24.0,아파트,
1012,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,71000,4,1997.0,23.0,2020-06-19,1207.976045,24.0,아파트,
1013,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,70000,4,1997.0,23.0,2020-06-19,1190.962298,24.0,아파트,
1014,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,74000,10,1997.0,23.0,2020-06-19,1259.017286,24.0,아파트,
1015,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,74000,10,1997.0,23.0,2020-06-19,1259.017286,24.0,아파트,
1016,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,74000,5,1997.0,23.0,2020-06-19,1259.017286,24.0,아파트,
1017,서울특별시 강남구 삼성동 3-1,학동로 408,삼성월드타워,58.776,74000,5,1997.0,23.0,2020-06-19,1259.017286,24.0,아파트,


서울특별시 강남구 역삼동 831-29
(63, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
2555,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,4,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2556,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,9,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2557,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,10,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2558,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,4,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2559,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,4,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2560,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,5,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2561,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,5,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2562,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,5,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2563,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,5,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,
2564,서울특별시 강남구 역삼동 831-29,역삼로3길 17-4,에레프,14.31,17505,5,2013.0,3.0,2016-10-27,1223.27044,8.0,아파트,


서울특별시 강남구 역삼동 835-33
(13, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
2630,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,29.28,15132,4,1997.0,20.0,2017-04-29,516.803279,24.0,아파트,
2631,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,29.28,15132,3,1997.0,20.0,2017-04-29,516.803279,24.0,아파트,
2632,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,29.28,15132,5,1997.0,20.0,2017-04-29,516.803279,24.0,아파트,
2633,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,35.68,18421,4,1997.0,20.0,2017-04-29,516.283632,24.0,아파트,
2634,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,35.68,18421,3,1997.0,20.0,2017-04-29,516.283632,24.0,아파트,
2635,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,35.68,18421,5,1997.0,20.0,2017-04-29,516.283632,24.0,아파트,
2636,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,42.13,21711,4,1997.0,20.0,2017-04-29,515.333492,24.0,아파트,
2637,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,42.13,21711,5,1997.0,20.0,2017-04-29,515.333492,24.0,아파트,
2638,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,42.13,21711,3,1997.0,20.0,2017-04-29,515.333492,24.0,아파트,
2639,서울특별시 강남구 역삼동 835-33,도곡로11길 10,광평하우스빌,45.3,23520,3,1997.0,20.0,2017-04-29,519.205298,24.0,아파트,


서울특별시 강남구 역삼동 837-31
(30, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
2643,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,36755,4,1998.0,22.0,2020-05-29,1162.397217,23.0,아파트,
2644,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,36755,4,1998.0,22.0,2020-05-29,1162.397217,23.0,아파트,
2645,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,5,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2646,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,7,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2647,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,8,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2648,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,8,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2649,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,9,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2650,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,5,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2651,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,6,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,
2652,서울특별시 강남구 역삼동 837-31,도곡로1길 22,경민리빙텔,31.62,37852,9,1998.0,22.0,2020-05-29,1197.090449,23.0,아파트,


서울특별시 강동구 길동 343-1
(27, 13)


Unnamed: 0,지번주소,도로명,건물(단지)명,전용면적(㎡),거래금액(만원),층,건축년도,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형,소유자
2949,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,11,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2950,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,11,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2951,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,12,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2952,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13900,12,2012.0,7.0,2019-10-31,819.623799,9.0,아파트,
2953,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13900,12,2012.0,7.0,2019-10-31,819.623799,9.0,아파트,
2954,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,13,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2955,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13900,15,2012.0,7.0,2019-10-31,819.623799,9.0,아파트,
2956,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,13,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2957,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,14,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,
2958,서울특별시 강동구 길동 343-1,천중로53길 6,현대웰하임(101동),16.959,13600,15,2012.0,7.0,2019-10-31,801.934076,9.0,아파트,


In [18]:
concat_selected_df['소유자'] = np.nan

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
driver = webdriver.Chrome(ChromeDriverManager().install())

In [None]:
driver.get("http://www.iros.go.kr/PMainJ.jsp")

In [None]:
driver.get("http://www.iros.go.kr/frontservlet?cmd=RISUWelcomeViewC")

In [None]:
def get_owner_name(df):
    df = df.copy()
    
    count = 0
    for addr in tqdm(df['지번주소'].unique()):
        owner_name = np.nan
        addr = addr.replace('-0', '')
        
        addr_df = df[df['지번주소'] == addr]
        driver.switch_to.default_content()
        
        
        driver.switch_to.frame("inputFrame")
        
        elem_search = driver.find_element_by_xpath('html/body/form[1]/div[24]/div/div/div/fieldset/div/table/tbody/tr[5]/td/span/input')
        elem_search.clear()
        elem_search.send_keys(addr)
        elem_search.send_keys(Keys.RETURN)
        time.sleep(2)
        
        try:
            driver.switch_to.alert.accept()
        except:
            pass
        
        driver.switch_to.default_content()
        driver.switch_to.frame('resultFrame')
        driver.switch_to.frame('frmOuterModal')
        
        try:
            driver.find_element_by_xpath('/html/body/div[2]/div[2]/table/tbody/tr[3]/td[6]/button').click()
        except:
            continue
        
        time.sleep(0.5)
        
        elem_result = driver.find_element_by_xpath('/html/body/div[2]/div[2]/table/tbody/tr/td[3]')
        
        owner_name = elem_result.text
        
        driver.switch_to.default_content()
        
        df.loc[addr_df.index, '소유자'] = owner_name
        
        count += 1
    
    return df

In [None]:
prac_df = get_owner_name(concat_selected_df)

In [None]:
concat_selected_df = prac_df.copy()

In [None]:
driver.close()

In [None]:
concat_selected_df.reset_index(drop=True, inplace=True)

In [None]:
concat_selected_df.to_csv(bulktrade_basedir+'연립다세대_오피스텔_일괄구매_전체_소유자_포함.csv', index=False)

In [None]:
yunrip_selected_df.head()

In [None]:
yunrip_selected_df['소유자'].value_counts()

In [None]:
inv_df = yunrip_selected_df[
    yunrip_selected_df['소유자'] == '에***~'
]
print(inv_df.shape)
inv_df.head()

In [None]:
count = 0
for addr in inv_df['지번주소'].unique():
    addr_df = inv_df[inv_df['지번주소'] == addr]
    
    print(addr_df.shape)
    display(addr_df)
    
    count += 1
    if count > 5:
        break

In [None]:
print(yunrip_selected_df.shape)
yunrip_selected_df = yunrip_selected_df[
    (yunrip_selected_df['소유자'] == '서***~')
    |(yunrip_selected_df['소유자'] == '한***~')
    |(yunrip_selected_df['소유자'] == '에***~')
]
print(yunrip_selected_df.shape)
yunrip_selected_df.head()

In [None]:
officetel_selected_df['소유자'].value_counts()

In [None]:
inv_df = officetel_selected_df[
    officetel_selected_df['소유자'] == '중***~'
]
print(inv_df.shape)
inv_df.head()

In [None]:
count = 0
for addr in inv_df['지번주소'].unique():
    addr_df = inv_df[inv_df['지번주소'] == addr]
    
    print(addr_df.shape)
    display(addr_df)
    
    count += 1
    if count > 5:
        break

In [None]:
officetel_selected_df = officetel_selected_df[
    (officetel_selected_df['소유자'] == '서***~')
    |(officetel_selected_df['소유자'] == '한***~')
]
print(officetel_selected_df.shape)
officetel_selected_df.head()

In [None]:
count = 0
for addr in officetel_selected_df['지번주소'].unique():
    addr_df = officetel_selected_df[officetel_selected_df['지번주소'] == addr]
    
    print(addr_df.shape)
    display(addr_df)
    
    count += 1
    if count > 5:
        break

In [None]:
yunrip_selected_df['지번주소'].nunique()

In [None]:
yunrip_selected_df.head()

In [None]:
yunrip_selected_df['구'] = yunrip_selected_df['지번주소'].apply(lambda x: x.split(' ')[1])
yunrip_selected_df['동'] = yunrip_selected_df['지번주소'].apply(lambda x: x.split(' ')[2])

In [None]:
yunrip_selected_df.head()

In [None]:
yunrip_selected_df['구'].value_counts()

In [None]:
def mean_prices_per_size(df):
    df = df.copy()
    
    df['크기별_평균단가'] = df[['지번주소', '건축년도', '계약날짜', '전용면적(㎡)', '단가(만원/㎡)']].groupby(['지번주소', '건축년도', '계약날짜', '전용면적(㎡)']).transform('mean')['단가(만원/㎡)']
    df['크기별_평균총액'] = df[['지번주소', '건축년도', '계약날짜', '전용면적(㎡)', '거래금액(만원)']].groupby(['지번주소', '건축년도', '계약날짜', '전용면적(㎡)']).transform('mean')['거래금액(만원)']
    df = df[['지번주소', '건축년도', '건물연식', '계약날짜', '계약날짜기준_건물연식', '전용면적(㎡)', '크기별_평균총액', '크기별_평균단가']]
    df = df.drop_duplicates().reset_index(drop=True)
    
    return df

In [None]:
yunrip_short_df = mean_prices_per_size(yunrip_selected_df)
print(yunrip_short_df.shape)
yunrip_short_df.head()