In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['font.family'] = 'Malgun Gothic'
font = {'size': 16}
matplotlib.rc('font', **font)

In [2]:
def day_modifier(x):
    # x is a number
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        if len(x) == 1:
            return '0' + x
        else:
            return x

In [3]:
def landnum_modifier(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        splitted = x.split('-')
        if len(splitted) == 1:
            return x + '-0'
        else:
            return x

In [4]:
def yunrip_data_prep():
    basedir = './국토교통부_실거래가_공개시스템/연립다세대/매매/'
    filenames = [f for f in os.listdir(basedir) if (f.endswith('.csv'))&('(' in f)]
    
    dfs_list = []
    for i, f in tqdm(enumerate(filenames)):
        df = pd.read_csv(basedir + f, encoding='euc-kr', header=15)
        if '해제사유발생일' in df.columns.tolist():
            df = df.drop(columns=['해제사유발생일'])

        dfs_list.append(df)
    
    concat_df = pd.concat(dfs_list).reset_index(drop=True)
    
    concat_df['번지'] = concat_df['번지'].apply(landnum_modifier)
    
    concat_df['계약년월'] = concat_df['계약년월'].apply(str)
    concat_df['계약일'] = concat_df['계약일'].apply(str)
    
    concat_df['건물연식'] = concat_df['계약년월'].apply(lambda x: int(x[:4])) - concat_df['건축년도']
    
    concat_df['계약일'] = concat_df['계약일'].apply(day_modifier)
    
    concat_df['계약날짜'] = concat_df['계약년월'].apply(lambda x: x[:4]) + '-' + concat_df['계약년월'].apply(lambda x: x[-2:])\
    + '-' + concat_df['계약일']
    
    concat_df['계약날짜'] = pd.to_datetime(concat_df['계약날짜'], format='%Y-%m-%d')
    
    concat_df['거래금액(만원)'] = concat_df['거래금액(만원)'].apply(lambda x: int(x.replace(',','')))
    concat_df['전용면적단가(만원/㎡)'] = concat_df['거래금액(만원)'] / concat_df['전용면적(㎡)']
    
    concat_df['지번주소'] = concat_df['시군구'] + ' ' + concat_df['번지']
    
    concat_df['년'] = concat_df['계약날짜'].dt.year
    
    cols_to_drop = ['시군구', '번지', '본번', '부번', '지번주소', '계약년월', '계약일', '도로명']
    
    concat_df = concat_df[concat_df['년'] >= 2015]
    
    return concat_df[['지번주소'] + [col for col in concat_df.columns if col not in cols_to_drop]]

In [5]:
def officetel_data_prep():
    basedir = './국토교통부_실거래가_공개시스템/오피스텔/매매/'
    filenames = [f for f in os.listdir(basedir) if (f.endswith('.csv'))&('(' in f)]
    
    dfs_list = []
    for i, f in tqdm(enumerate(filenames)):
        df = pd.read_csv(basedir + f, encoding='euc-kr', header=15)
        if '해제사유발생일' in df.columns.tolist():
            df = df.drop(columns=['해제사유발생일'])

        dfs_list.append(df)
    
    concat_df = pd.concat(dfs_list).reset_index(drop=True)
    
    concat_df['번지'] = concat_df['번지'].apply(landnum_modifier)
    
    concat_df['계약년월'] = concat_df['계약년월'].apply(str)
    concat_df['계약일'] = concat_df['계약일'].apply(str)
    
    concat_df['건물연식'] = concat_df['계약년월'].apply(lambda x: int(x[:4])) - concat_df['건축년도']
    
    concat_df['계약일'] = concat_df['계약일'].apply(day_modifier)
    
    concat_df['계약날짜'] = concat_df['계약년월'].apply(lambda x: x[:4]) + '-' + concat_df['계약년월'].apply(lambda x: x[-2:])\
    + '-' + concat_df['계약일']
    
    concat_df['계약날짜'] = pd.to_datetime(concat_df['계약날짜'], format='%Y-%m-%d')
    
    concat_df['거래금액(만원)'] = concat_df['거래금액(만원)'].apply(lambda x: int(x.replace(',','')))
    concat_df['전용면적단가(만원/㎡)'] = concat_df['거래금액(만원)'] / concat_df['전용면적(㎡)']
    
    concat_df['지번주소'] = concat_df['시군구'] + ' ' + concat_df['번지']
    
    concat_df['년'] = concat_df['계약날짜'].dt.year
    
    cols_to_drop = ['시군구', '번지', '본번', '부번', '지번주소', '계약년월', '계약일', '도로명']
    
    concat_df = concat_df[concat_df['년'] >= 2015]
        
    return concat_df[['지번주소'] + [col for col in concat_df.columns if col not in cols_to_drop]]

In [6]:
yunrip_df = yunrip_data_prep()
print(yunrip_df.shape)
yunrip_df.head()

16it [00:01, 11.01it/s]


(329098, 11)


Unnamed: 0,지번주소,건물명,전용면적(㎡),대지권면적(㎡),거래금액(만원),층,건축년도,건물연식,계약날짜,전용면적단가(만원/㎡),년
358389,서울특별시 강남구 개포동 1216-4,(1216-4),33.77,21.67,24800,4,2012.0,3.0,2015-03-26,734.379627,2015
358390,서울특별시 강남구 개포동 1216-4,(1216-4),35.87,23.02,27200,4,2012.0,3.0,2015-06-23,758.293839,2015
358391,서울특별시 강남구 개포동 1216-4,(1216-4),29.12,18.68,22200,4,2012.0,3.0,2015-07-20,762.362637,2015
358392,서울특별시 강남구 개포동 1216-4,(1216-4),29.97,19.23,22500,3,2012.0,3.0,2015-08-06,750.750751,2015
358393,서울특별시 강남구 개포동 170-18,(170-18),26.6,21.53,21000,1,1988.0,27.0,2015-08-21,789.473684,2015


In [7]:
officetel_df = officetel_data_prep()
print(officetel_df.shape)
officetel_df.head()

16it [00:00, 44.19it/s]


(77955, 10)


Unnamed: 0,지번주소,단지명,전용면적(㎡),거래금액(만원),층,건축년도,건물연식,계약날짜,전용면적단가(만원/㎡),년
100156,서울특별시 강남구 개포동 13-3,대청타워,43.24,22000,14,1997.0,18.0,2015-01-08,508.788159,2015
100157,서울특별시 강남구 개포동 13-3,대청타워,32.44,15800,21,1997.0,18.0,2015-01-12,487.053021,2015
100158,서울특별시 강남구 개포동 13-3,대청타워,32.44,16000,10,1997.0,18.0,2015-01-19,493.218249,2015
100159,서울특별시 강남구 개포동 13-3,대청타워,32.44,15400,21,1997.0,18.0,2015-01-26,474.722565,2015
100160,서울특별시 강남구 개포동 13-3,대청타워,31.91,16000,26,1997.0,18.0,2015-01-28,501.410216,2015


In [8]:
land_specs_df = pd.read_csv('./prepped_data/land_specs_ver_4.csv')
print(land_specs_df.shape)
land_specs_df.head()

(8706295, 11)


Unnamed: 0,지번주소,년,지목명,토지면적,용도지역명1,용도지역명2,토지이동상황,지형높이,지형형상,도로접면,공시지가
0,서울특별시 강남구 개포동 100-0,2013,전,876.0,자연녹지지역,지정되지않음,전,완경사,부정형,맹지,325000.0
1,서울특별시 강남구 개포동 100-0,2014,전,876.0,자연녹지지역,지정되지않음,전,완경사,부정형,맹지,330000.0
2,서울특별시 강남구 개포동 100-0,2015,전,876.0,자연녹지지역,지정되지않음,전,완경사,부정형,맹지,335000.0
3,서울특별시 강남구 개포동 100-0,2016,전,876.0,자연녹지지역,지정되지않음,전,완경사,부정형,맹지,345000.0
4,서울특별시 강남구 개포동 100-0,2017,전,876.0,자연녹지지역,지정되지않음,전,완경사,부정형,맹지,355000.0


In [9]:
land_specs_df['도로접면'].value_counts()

세로한면(가)    2688474
지정되지않음     1289408
세로한면(불)    1008484
세로각지(가)     726127
소로한면        627975
소로각지        503622
광대소각        428623
중로각지        393928
중로한면        298912
광대로한면       278183
맹지          185238
광대세각        171918
세로각지(불)     105268
Name: 도로접면, dtype: int64

In [10]:
yunrip_merge_df = yunrip_df.merge(land_specs_df, on=['지번주소', '년'])
print(yunrip_merge_df.shape)
yunrip_merge_df.head()

(326789, 20)


Unnamed: 0,지번주소,건물명,전용면적(㎡),대지권면적(㎡),거래금액(만원),층,건축년도,건물연식,계약날짜,전용면적단가(만원/㎡),년,지목명,토지면적,용도지역명1,용도지역명2,토지이동상황,지형높이,지형형상,도로접면,공시지가
0,서울특별시 강남구 개포동 1216-4,(1216-4),33.77,21.67,24800,4,2012.0,3.0,2015-03-26,734.379627,2015,대,257.0,제2종일반주거지역,지정되지않음,다세대,평지,정방형,세로각지(가),3470000.0
1,서울특별시 강남구 개포동 1216-4,(1216-4),35.87,23.02,27200,4,2012.0,3.0,2015-06-23,758.293839,2015,대,257.0,제2종일반주거지역,지정되지않음,다세대,평지,정방형,세로각지(가),3470000.0
2,서울특별시 강남구 개포동 1216-4,(1216-4),29.12,18.68,22200,4,2012.0,3.0,2015-07-20,762.362637,2015,대,257.0,제2종일반주거지역,지정되지않음,다세대,평지,정방형,세로각지(가),3470000.0
3,서울특별시 강남구 개포동 1216-4,(1216-4),29.97,19.23,22500,3,2012.0,3.0,2015-08-06,750.750751,2015,대,257.0,제2종일반주거지역,지정되지않음,다세대,평지,정방형,세로각지(가),3470000.0
4,서울특별시 강남구 개포동 170-18,(170-18),26.6,21.53,21000,1,1988.0,27.0,2015-08-21,789.473684,2015,대,130.3,제2종일반주거지역,지정되지않음,다세대,평지,세로장방,세로한면(가),4370000.0


In [11]:
officetel_merge_df = officetel_df.merge(land_specs_df, on=['지번주소', '년'])
print(officetel_merge_df.shape)
officetel_merge_df.head()

(75617, 19)


Unnamed: 0,지번주소,단지명,전용면적(㎡),거래금액(만원),층,건축년도,건물연식,계약날짜,전용면적단가(만원/㎡),년,지목명,토지면적,용도지역명1,용도지역명2,토지이동상황,지형높이,지형형상,도로접면,공시지가
0,서울특별시 강남구 개포동 13-3,대청타워,43.24,22000,14,1997.0,18.0,2015-01-08,508.788159,2015,대,4188.5,일반상업지역,지정되지않음,업무용,평지,세로장방,광대소각,12670000.0
1,서울특별시 강남구 개포동 13-3,대청타워,32.44,15800,21,1997.0,18.0,2015-01-12,487.053021,2015,대,4188.5,일반상업지역,지정되지않음,업무용,평지,세로장방,광대소각,12670000.0
2,서울특별시 강남구 개포동 13-3,대청타워,32.44,16000,10,1997.0,18.0,2015-01-19,493.218249,2015,대,4188.5,일반상업지역,지정되지않음,업무용,평지,세로장방,광대소각,12670000.0
3,서울특별시 강남구 개포동 13-3,대청타워,32.44,15400,21,1997.0,18.0,2015-01-26,474.722565,2015,대,4188.5,일반상업지역,지정되지않음,업무용,평지,세로장방,광대소각,12670000.0
4,서울특별시 강남구 개포동 13-3,대청타워,31.91,16000,26,1997.0,18.0,2015-01-28,501.410216,2015,대,4188.5,일반상업지역,지정되지않음,업무용,평지,세로장방,광대소각,12670000.0


In [12]:
reference_df = pd.read_excel('./감정평가사_자료/이승준_총괄표.xlsx', header=[0,1]).dropna(subset=[('소재지','소재지')]).sort_values([('소재지', '지역'),('소재지', '소재지'), ('소재지', '지번')]).reset_index(drop=True)
print(reference_df.shape)
reference_df.head()

(131, 39)


Unnamed: 0_level_0,소재지,소재지,소재지,토지내용,토지내용,토지내용,토지내용,토지내용,토지내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,평가단가(원/전유㎡),평가단가(원/전유㎡),매출원가(원/전유㎡),매출원가(원/전유㎡),사업진행일자,사업진행일자,사업진행일자,사업진행일자,건축비\n(원/평),원가비율,원가비율,원가비율,원가비율,원가비율,비고
Unnamed: 0_level_1,지역,소재지,지번,용도지역,도로너비,토지면적(㎡),토지매매금액,토지매매단가\n(원/㎡),공시지가와격차\n(개공/매매),건물용도,공급면적(㎡),전유면적(㎡),전용율,용적율\n(%),건폐율\n(%),구분건물호,건물동수,건물층수,의뢰인,기준시점,전체평가금액,투입비용합계,순이익,순이익율,전유면적,공급면적,전유면적기준,공급면적기준,토지계약일자,허가일자,착공일자,사용승인일,Unnamed: 32_level_1,토지원가비율,건축원가비율,부대비용,금융비용,합계,Unnamed: 38_level_1
0,강남구,개포동,1195-10,2종일주,6미터,265.7,2970000000.0,11178020.0,0.433,다세대주택,519.54,432.22,0.832,195.54,59.62,15개호,1개동,지상5층,SH공사,2020.09.23,4719500000.0,4167814000.0,551685800.0,0.117,10919208.0,9083997.0,9642807.0,8372395.0,2019.02.26,2019.06.18,2019.06.18,2020.01.13,5500000.0,0.7563,0.2139,0.006,0.0239,1.0,
1,강남구,개포동,1199-7,2종일주,4미터,248.0,2550000000.0,10282258.0,0.365,다세대주택,463.84,390.67,0.842,187.04,59.61,14개호,1개동,지상5층,LH공사,2019.11.29,4040000000.0,3593249000.0,446751100.0,0.111,10341209.0,8709524.0,9197658.0,8074624.0,2018.08.21,2018.12.31,2019.01.07,2019.06.04,5400000.0,0.752,0.2175,0.0061,0.0244,1.0,
2,강남구,개포동,1216-7,2종일주,8미터,258.1,2931000000.0,11356064.0,0.472,도시행생활주택,489.46,400.29,0.818,199.62,59.89,15개호,1개동,지상5층,SH공사,2019.11.29,4586000000.0,4054965000.0,531034800.0,0.116,11456694.0,9369509.0,10130070.0,8312283.0,2019.01.24,2019.06.17,2019.07.23,2020.02.25,5400000.0,0.7667,0.2034,0.0057,0.0242,1.0,
3,강남구,역삼동,707-9,일반상업,8미터,363.5,9500000000.0,26134801.0,0.58,도시행생활주택,1695.85,1289.62,0.76,199.62,59.89,15개호,1개동,지상5층,LH공사,2019.11.29,22199830000.0,13988110000.0,8211721000.0,0.37,17214239.0,11159450.0,10846690.0,16602582.0,2019.08.14,2019.07.15,2020.01.03,2020.10.07,5800000.0,0.7189,0.2568,0.0072,0.017,1.0,
4,강남구,역삼동,751-6,2종일주,6미터,391.8,5900000000.0,15058703.0,0.441,도시행생활주택,658.88,537.65,0.816,168.17,46.35,20개호,1개동,지상6층,LH공사,2019.11.29,9810500000.0,7604511000.0,2205989000.0,0.225,18247001.0,14889660.0,14143980.0,12107552.0,2019.01.04,2019.03.05,2019.04.20,2019.11.29,5500000.0,0.8249,0.1486,0.0042,0.0223,1.0,


In [13]:
reference_df[('소재지', '지번')] = reference_df[('소재지', '지번')].apply(landnum_modifier)

In [14]:
reference_df['지번주소'] = '서울특별시 ' + reference_df[('소재지', '지역')] + ' ' + reference_df[('소재지', '소재지')]\
+ ' ' + reference_df[('소재지', '지번')]

In [15]:
def date_modifier(x):
    # x is a string or datetime
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        if '-' in x:
            x = x.replace('-', '.')
            
        splitted = x.split('.')
        
        if len(splitted) == 1:
            return np.nan
        else:
            return x

In [16]:
reference_df['기준시점2'] = reference_df[('수입/비용', '기준시점')].apply(date_modifier)

In [17]:
def get_year(x):
    if pd.isna(x) == True:
        return x
    else:
        return int(x.split('.')[0])

In [18]:
reference_df['년'] = reference_df['기준시점2'].apply(get_year)

In [19]:
reference_df['년'] = reference_df['년'].fillna(2020)
reference_df['년'].isna().sum()

0

In [20]:
def get_bd_type(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        if '오피' in x:
            return '오피스텔'
        else:
            return '다세대'

In [21]:
reference_df['건물유형'] = reference_df[('건물내용', '건물용도')].apply(get_bd_type)

In [22]:
reference_df['최근3년50분위'] = np.nan
reference_df['최근3년50분위_평가단가와의격차'] = np.nan
reference_df['최근3년평균'] = np.nan
reference_df['최근3년평균_평가단가와의격차'] = np.nan
reference_df['최근2년50분위'] = np.nan
reference_df['최근2년50분위_평가단가와의격차'] = np.nan
reference_df['최근2년평균'] = np.nan
reference_df['최근2년평균_평가단가와의격차'] = np.nan
reference_df['당해50분위'] = np.nan
reference_df['당해50분위_평가단가와의격차'] = np.nan
reference_df['당해평균'] = np.nan
reference_df['당해평균_평가단가와의격차'] = np.nan

In [23]:
reference_df.head()

Unnamed: 0_level_0,소재지,소재지,소재지,토지내용,토지내용,토지내용,토지내용,토지내용,토지내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,평가단가(원/전유㎡),평가단가(원/전유㎡),매출원가(원/전유㎡),매출원가(원/전유㎡),사업진행일자,사업진행일자,사업진행일자,사업진행일자,건축비\n(원/평),원가비율,원가비율,원가비율,원가비율,원가비율,비고,지번주소,기준시점2,년,건물유형,최근3년50분위,최근3년50분위_평가단가와의격차,최근3년평균,최근3년평균_평가단가와의격차,최근2년50분위,최근2년50분위_평가단가와의격차,최근2년평균,최근2년평균_평가단가와의격차,당해50분위,당해50분위_평가단가와의격차,당해평균,당해평균_평가단가와의격차
Unnamed: 0_level_1,지역,소재지,지번,용도지역,도로너비,토지면적(㎡),토지매매금액,토지매매단가\n(원/㎡),공시지가와격차\n(개공/매매),건물용도,공급면적(㎡),전유면적(㎡),전용율,용적율\n(%),건폐율\n(%),구분건물호,건물동수,건물층수,의뢰인,기준시점,전체평가금액,투입비용합계,순이익,순이익율,전유면적,공급면적,전유면적기준,공급면적기준,토지계약일자,허가일자,착공일자,사용승인일,Unnamed: 32_level_1,토지원가비율,건축원가비율,부대비용,금융비용,합계,Unnamed: 38_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
0,강남구,개포동,1195-10,2종일주,6미터,265.7,2970000000.0,11178020.0,0.433,다세대주택,519.54,432.22,0.832,195.54,59.62,15개호,1개동,지상5층,SH공사,2020.09.23,4719500000.0,4167814000.0,551685800.0,0.117,10919208.0,9083997.0,9642807.0,8372395.0,2019.02.26,2019.06.18,2019.06.18,2020.01.13,5500000.0,0.7563,0.2139,0.006,0.0239,1.0,,서울특별시 강남구 개포동 1195-10,2020.09.23,2020.0,다세대,,,,,,,,,,,,
1,강남구,개포동,1199-7,2종일주,4미터,248.0,2550000000.0,10282258.0,0.365,다세대주택,463.84,390.67,0.842,187.04,59.61,14개호,1개동,지상5층,LH공사,2019.11.29,4040000000.0,3593249000.0,446751100.0,0.111,10341209.0,8709524.0,9197658.0,8074624.0,2018.08.21,2018.12.31,2019.01.07,2019.06.04,5400000.0,0.752,0.2175,0.0061,0.0244,1.0,,서울특별시 강남구 개포동 1199-7,2019.11.29,2019.0,다세대,,,,,,,,,,,,
2,강남구,개포동,1216-7,2종일주,8미터,258.1,2931000000.0,11356064.0,0.472,도시행생활주택,489.46,400.29,0.818,199.62,59.89,15개호,1개동,지상5층,SH공사,2019.11.29,4586000000.0,4054965000.0,531034800.0,0.116,11456694.0,9369509.0,10130070.0,8312283.0,2019.01.24,2019.06.17,2019.07.23,2020.02.25,5400000.0,0.7667,0.2034,0.0057,0.0242,1.0,,서울특별시 강남구 개포동 1216-7,2019.11.29,2019.0,다세대,,,,,,,,,,,,
3,강남구,역삼동,707-9,일반상업,8미터,363.5,9500000000.0,26134801.0,0.58,도시행생활주택,1695.85,1289.62,0.76,199.62,59.89,15개호,1개동,지상5층,LH공사,2019.11.29,22199830000.0,13988110000.0,8211721000.0,0.37,17214239.0,11159450.0,10846690.0,16602582.0,2019.08.14,2019.07.15,2020.01.03,2020.10.07,5800000.0,0.7189,0.2568,0.0072,0.017,1.0,,서울특별시 강남구 역삼동 707-9,2019.11.29,2019.0,다세대,,,,,,,,,,,,
4,강남구,역삼동,751-6,2종일주,6미터,391.8,5900000000.0,15058703.0,0.441,도시행생활주택,658.88,537.65,0.816,168.17,46.35,20개호,1개동,지상6층,LH공사,2019.11.29,9810500000.0,7604511000.0,2205989000.0,0.225,18247001.0,14889660.0,14143980.0,12107552.0,2019.01.04,2019.03.05,2019.04.20,2019.11.29,5500000.0,0.8249,0.1486,0.0042,0.0223,1.0,,서울특별시 강남구 역삼동 751-6,2019.11.29,2019.0,다세대,,,,,,,,,,,,


In [24]:
reference_df.isna().sum()

소재지                지역                       0
                   소재지                      0
                   지번                       0
토지내용               용도지역                     0
                   도로너비                     0
                   토지면적(㎡)                  0
                   토지매매금액                   0
                   토지매매단가\n(원/㎡)            0
                   공시지가와격차\n(개공/매매)         0
건물내용               건물용도                     0
                   공급면적(㎡)                  0
                   전유면적(㎡)                  0
                   전용율                      0
                   용적율\n(%)                 0
                   건폐율\n(%)                 0
                   구분건물호                    0
                   건물동수                     0
                   건물층수                     0
수입/비용              의뢰인                      0
                   기준시점                     0
                   전체평가금액                   0
                   투입비용합계         

In [25]:
for i in tqdm(range(reference_df.shape[0])):
    gu = reference_df.loc[i, ('소재지', '지역')].replace(' ', '')
    dong = reference_df.loc[i, ('소재지', '소재지')].replace(' ', '')
    
    
    land_purpose = reference_df.loc[i, ('토지내용', '용도지역')].replace(' ', '')[:3]
    year = int(reference_df.loc[i, '년'])
    bd_type = reference_df.loc[i, '건물유형'][0]
    
    if bd_type == '오피스텔':
        target_df = officetel_merge_df[(officetel_merge_df['지번주소'].str.contains(gu+' '+dong))&
                                       (officetel_merge_df['용도지역명1'].str.contains(land_purpose))&
                                       (officetel_merge_df['건물연식'] <= 5)
                                      ]
    else:
        target_df = yunrip_merge_df[(yunrip_merge_df['지번주소'].str.contains(gu+' '+dong))&
                                    (yunrip_merge_df['용도지역명1'].str.contains(land_purpose))&
                                    (yunrip_merge_df['건물연식'] <= 5)
                                   ]    
    
    
    target_year_minus_two_df = target_df[(target_df['년'] <= year)&(target_df['년'] >= year-2)]
    target_year_minus_one_df = target_df[(target_df['년'] <= year)&(target_df['년'] >= year-1)]
    target_year_df = target_df[target_df['년'] == year]
        
        
    reference_df.loc[i, '최근3년50분위'] = target_year_minus_two_df['전용면적단가(만원/㎡)'].median()
    reference_df.loc[i, '최근3년평균'] = target_year_minus_two_df['전용면적단가(만원/㎡)'].mean()
    reference_df.loc[i, '최근2년50분위'] = target_year_minus_one_df['전용면적단가(만원/㎡)'].median()
    reference_df.loc[i, '최근2년평균'] = target_year_minus_one_df['전용면적단가(만원/㎡)'].mean()
    reference_df.loc[i, '당해50분위'] = target_year_df['전용면적단가(만원/㎡)'].median()
    reference_df.loc[i, '당해평균'] = target_year_df['전용면적단가(만원/㎡)'].mean()

100%|████████████████████████████████████████████████████████████████████████████████| 131/131 [00:22<00:00,  5.89it/s]


In [26]:
reference_df.head()

Unnamed: 0_level_0,소재지,소재지,소재지,토지내용,토지내용,토지내용,토지내용,토지내용,토지내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,건물내용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,수입/비용,평가단가(원/전유㎡),평가단가(원/전유㎡),매출원가(원/전유㎡),매출원가(원/전유㎡),사업진행일자,사업진행일자,사업진행일자,사업진행일자,건축비\n(원/평),원가비율,원가비율,원가비율,원가비율,원가비율,비고,지번주소,기준시점2,년,건물유형,최근3년50분위,최근3년50분위_평가단가와의격차,최근3년평균,최근3년평균_평가단가와의격차,최근2년50분위,최근2년50분위_평가단가와의격차,최근2년평균,최근2년평균_평가단가와의격차,당해50분위,당해50분위_평가단가와의격차,당해평균,당해평균_평가단가와의격차
Unnamed: 0_level_1,지역,소재지,지번,용도지역,도로너비,토지면적(㎡),토지매매금액,토지매매단가\n(원/㎡),공시지가와격차\n(개공/매매),건물용도,공급면적(㎡),전유면적(㎡),전용율,용적율\n(%),건폐율\n(%),구분건물호,건물동수,건물층수,의뢰인,기준시점,전체평가금액,투입비용합계,순이익,순이익율,전유면적,공급면적,전유면적기준,공급면적기준,토지계약일자,허가일자,착공일자,사용승인일,Unnamed: 32_level_1,토지원가비율,건축원가비율,부대비용,금융비용,합계,Unnamed: 38_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
0,강남구,개포동,1195-10,2종일주,6미터,265.7,2970000000.0,11178020.0,0.433,다세대주택,519.54,432.22,0.832,195.54,59.62,15개호,1개동,지상5층,SH공사,2020.09.23,4719500000.0,4167814000.0,551685800.0,0.117,10919208.0,9083997.0,9642807.0,8372395.0,2019.02.26,2019.06.18,2019.06.18,2020.01.13,5500000.0,0.7563,0.2139,0.006,0.0239,1.0,,서울특별시 강남구 개포동 1195-10,2020.09.23,2020.0,다세대,1079.676961,,1065.218106,,1114.612052,,1115.573296,,1146.667354,,1138.21815,
1,강남구,개포동,1199-7,2종일주,4미터,248.0,2550000000.0,10282258.0,0.365,다세대주택,463.84,390.67,0.842,187.04,59.61,14개호,1개동,지상5층,LH공사,2019.11.29,4040000000.0,3593249000.0,446751100.0,0.111,10341209.0,8709524.0,9197658.0,8074624.0,2018.08.21,2018.12.31,2019.01.07,2019.06.04,5400000.0,0.752,0.2175,0.0061,0.0244,1.0,,서울특별시 강남구 개포동 1199-7,2019.11.29,2019.0,다세대,988.660278,,986.346819,,1005.36193,,1007.860927,,1060.92935,,1071.290025,
2,강남구,개포동,1216-7,2종일주,8미터,258.1,2931000000.0,11356064.0,0.472,도시행생활주택,489.46,400.29,0.818,199.62,59.89,15개호,1개동,지상5층,SH공사,2019.11.29,4586000000.0,4054965000.0,531034800.0,0.116,11456694.0,9369509.0,10130070.0,8312283.0,2019.01.24,2019.06.17,2019.07.23,2020.02.25,5400000.0,0.7667,0.2034,0.0057,0.0242,1.0,,서울특별시 강남구 개포동 1216-7,2019.11.29,2019.0,다세대,988.660278,,986.346819,,1005.36193,,1007.860927,,1060.92935,,1071.290025,
3,강남구,역삼동,707-9,일반상업,8미터,363.5,9500000000.0,26134801.0,0.58,도시행생활주택,1695.85,1289.62,0.76,199.62,59.89,15개호,1개동,지상5층,LH공사,2019.11.29,22199830000.0,13988110000.0,8211721000.0,0.37,17214239.0,11159450.0,10846690.0,16602582.0,2019.08.14,2019.07.15,2020.01.03,2020.10.07,5800000.0,0.7189,0.2568,0.0072,0.017,1.0,,서울특별시 강남구 역삼동 707-9,2019.11.29,2019.0,다세대,,,,,,,,,,,,
4,강남구,역삼동,751-6,2종일주,6미터,391.8,5900000000.0,15058703.0,0.441,도시행생활주택,658.88,537.65,0.816,168.17,46.35,20개호,1개동,지상6층,LH공사,2019.11.29,9810500000.0,7604511000.0,2205989000.0,0.225,18247001.0,14889660.0,14143980.0,12107552.0,2019.01.04,2019.03.05,2019.04.20,2019.11.29,5500000.0,0.8249,0.1486,0.0042,0.0223,1.0,,서울특별시 강남구 역삼동 751-6,2019.11.29,2019.0,다세대,1227.524205,,1245.563652,,1261.127596,,1308.884122,,1420.128276,,1438.316844,


In [27]:
cols_list = []
for i in range(reference_df.shape[1]):
    if i < 39:
        cols_list.append(reference_df.columns[i][0] + '_' + reference_df.columns[i][1])
    else:
        cols_list.append(reference_df.columns[i][0])

In [28]:
reference_df.columns = cols_list

In [29]:
cols = ['지번주소', '기준시점2', '건물내용_건물용도', '건물유형', '토지내용_용도지역', '토지내용_도로너비',
        '평가단가(원/전유㎡) _전유면적', '최근3년50분위', '최근3년50분위_평가단가와의격차', '최근3년평균',
        '최근3년평균_평가단가와의격차', '최근2년50분위', '최근2년50분위_평가단가와의격차', '최근2년평균',
        '최근2년평균_평가단가와의격차', '당해50분위', '당해50분위_평가단가와의격차', '당해평균', '당해평균_평가단가와의격차'
       ]
selected_df = reference_df[cols]

In [30]:
selected_df['평가단가(원/전유㎡) _전유면적'] = selected_df['평가단가(원/전유㎡) _전유면적'] / 10000

In [31]:
selected_cols = ['최근3년50분위', '최근3년평균', '최근2년50분위', '최근2년평균', '당해50분위', '당해평균']
for col in selected_cols:
    selected_df[col+'_평가단가와의격차'] = selected_df[col] / selected_df['평가단가(원/전유㎡) _전유면적']

In [32]:
selected_df.head()

Unnamed: 0,지번주소,기준시점2,건물내용_건물용도,건물유형,토지내용_용도지역,토지내용_도로너비,평가단가(원/전유㎡) _전유면적,최근3년50분위,최근3년50분위_평가단가와의격차,최근3년평균,최근3년평균_평가단가와의격차,최근2년50분위,최근2년50분위_평가단가와의격차,최근2년평균,최근2년평균_평가단가와의격차,당해50분위,당해50분위_평가단가와의격차,당해평균,당해평균_평가단가와의격차
0,서울특별시 강남구 개포동 1195-10,2020.09.23,다세대주택,다세대,2종일주,6미터,1091.9208,1079.676961,0.988787,1065.218106,0.975545,1114.612052,1.020781,1115.573296,1.021661,1146.667354,1.050138,1138.21815,1.0424
1,서울특별시 강남구 개포동 1199-7,2019.11.29,다세대주택,다세대,2종일주,4미터,1034.1209,988.660278,0.956039,986.346819,0.953802,1005.36193,0.97219,1007.860927,0.974606,1060.92935,1.025924,1071.290025,1.035943
2,서울특별시 강남구 개포동 1216-7,2019.11.29,도시행생활주택,다세대,2종일주,8미터,1145.6694,988.660278,0.862954,986.346819,0.860935,1005.36193,0.877532,1007.860927,0.879714,1060.92935,0.926034,1071.290025,0.935078
3,서울특별시 강남구 역삼동 707-9,2019.11.29,도시행생활주택,다세대,일반상업,8미터,1721.4239,,,,,,,,,,,,
4,서울특별시 강남구 역삼동 751-6,2019.11.29,도시행생활주택,다세대,2종일주,6미터,1824.7001,1227.524205,0.672727,1245.563652,0.682613,1261.127596,0.691142,1308.884122,0.717315,1420.128276,0.77828,1438.316844,0.788248


In [33]:
selected_df

Unnamed: 0,지번주소,기준시점2,건물내용_건물용도,건물유형,토지내용_용도지역,토지내용_도로너비,평가단가(원/전유㎡) _전유면적,최근3년50분위,최근3년50분위_평가단가와의격차,최근3년평균,최근3년평균_평가단가와의격차,최근2년50분위,최근2년50분위_평가단가와의격차,최근2년평균,최근2년평균_평가단가와의격차,당해50분위,당해50분위_평가단가와의격차,당해평균,당해평균_평가단가와의격차
0,서울특별시 강남구 개포동 1195-10,2020.09.23,다세대주택,다세대,2종일주,6미터,1091.9208,1079.676961,0.988787,1065.218106,0.975545,1114.612052,1.020781,1115.573296,1.021661,1146.667354,1.050138,1138.21815,1.0424
1,서울특별시 강남구 개포동 1199-7,2019.11.29,다세대주택,다세대,2종일주,4미터,1034.1209,988.660278,0.956039,986.346819,0.953802,1005.36193,0.97219,1007.860927,0.974606,1060.92935,1.025924,1071.290025,1.035943
2,서울특별시 강남구 개포동 1216-7,2019.11.29,도시행생활주택,다세대,2종일주,8미터,1145.6694,988.660278,0.862954,986.346819,0.860935,1005.36193,0.877532,1007.860927,0.879714,1060.92935,0.926034,1071.290025,0.935078
3,서울특별시 강남구 역삼동 707-9,2019.11.29,도시행생활주택,다세대,일반상업,8미터,1721.4239,,,,,,,,,,,,
4,서울특별시 강남구 역삼동 751-6,2019.11.29,도시행생활주택,다세대,2종일주,6미터,1824.7001,1227.524205,0.672727,1245.563652,0.682613,1261.127596,0.691142,1308.884122,0.717315,1420.128276,0.77828,1438.316844,0.788248
5,서울특별시 강남구 역삼동 778-15,2019.11.29,도생-다세대,다세대,2종일주,6미터 및\n 4미터,1656.9493,1227.524205,0.740834,1245.563652,0.751721,1261.127596,0.761114,1308.884122,0.789936,1420.128276,0.857074,1438.316844,0.868051
6,서울특별시 강남구 역삼동 778-3,2019.11.29,도생-원룸,다세대,2종일주,4미터,1188.423,1227.524205,1.032902,1245.563652,1.048081,1261.127596,1.061177,1308.884122,1.101362,1420.128276,1.194969,1438.316844,1.210273
7,서울특별시 강동구 고덕동 292-5,2019.06.14,다세대주택,다세대,2종일반주거,4미터,989.5452,,,,,,,,,,,,
8,서울특별시 강동구 고덕동 292-6,2019.06.14,다세대주택,다세대,2종일주,4미터,988.1178,,,,,,,,,,,,
9,서울특별시 강동구 길동 140-0,2019.06.14,단지형다세대,다세대,3종일주,6미터,750.4237,882.549216,1.176068,857.707494,1.142964,892.991711,1.189983,967.756881,1.289614,904.295403,1.205046,1026.342379,1.367684


In [34]:
selected2_df = selected_df[(selected_df['건물내용_건물용도'].str.contains('다세'))|(selected_df['건물내용_건물용도'].str.contains('오피'))].dropna(subset=['기준시점2'])
print(selected2_df.shape)
selected2_df

(52, 19)


Unnamed: 0,지번주소,기준시점2,건물내용_건물용도,건물유형,토지내용_용도지역,토지내용_도로너비,평가단가(원/전유㎡) _전유면적,최근3년50분위,최근3년50분위_평가단가와의격차,최근3년평균,최근3년평균_평가단가와의격차,최근2년50분위,최근2년50분위_평가단가와의격차,최근2년평균,최근2년평균_평가단가와의격차,당해50분위,당해50분위_평가단가와의격차,당해평균,당해평균_평가단가와의격차
0,서울특별시 강남구 개포동 1195-10,2020.09.23,다세대주택,다세대,2종일주,6미터,1091.9208,1079.676961,0.988787,1065.218106,0.975545,1114.612052,1.020781,1115.573296,1.021661,1146.667354,1.050138,1138.21815,1.0424
1,서울특별시 강남구 개포동 1199-7,2019.11.29,다세대주택,다세대,2종일주,4미터,1034.1209,988.660278,0.956039,986.346819,0.953802,1005.36193,0.97219,1007.860927,0.974606,1060.92935,1.025924,1071.290025,1.035943
5,서울특별시 강남구 역삼동 778-15,2019.11.29,도생-다세대,다세대,2종일주,6미터 및\n 4미터,1656.9493,1227.524205,0.740834,1245.563652,0.751721,1261.127596,0.761114,1308.884122,0.789936,1420.128276,0.857074,1438.316844,0.868051
7,서울특별시 강동구 고덕동 292-5,2019.06.14,다세대주택,다세대,2종일반주거,4미터,989.5452,,,,,,,,,,,,
8,서울특별시 강동구 고덕동 292-6,2019.06.14,다세대주택,다세대,2종일주,4미터,988.1178,,,,,,,,,,,,
9,서울특별시 강동구 길동 140-0,2019.06.14,단지형다세대,다세대,3종일주,6미터,750.4237,882.549216,1.176068,857.707494,1.142964,892.991711,1.189983,967.756881,1.289614,904.295403,1.205046,1026.342379,1.367684
13,서울특별시 강동구 길동 96-4,2019.06.14,다세대-도시형주택,다세대,3종일주,4미터,746.9423,882.549216,1.181549,857.707494,1.148291,892.991711,1.19553,967.756881,1.295625,904.295403,1.210663,1026.342379,1.374058
14,서울특별시 강동구 명일동 350-1외,2020.11.11,도생(단지형다세대),다세대,3종일주,8미터,713.2919,703.605981,0.986421,696.136712,0.975949,722.416009,1.012792,709.635601,0.994874,729.400749,1.022584,714.925793,1.002291
16,서울특별시 강동구 암사동 433-69외,2020.10.23,다세대주택,다세대,3종일주,4미터,711.012,999.983951,1.406423,941.888515,1.324715,950.297751,1.336542,865.062565,1.216664,779.827379,1.096785,779.827379,1.096785
17,서울특별시 강동구 암사동 454-26,2019.04.01,다세대주택,다세대,2종일주,4미터,609.4015,670.774088,1.10071,708.792854,1.163097,686.684362,1.126818,730.540084,1.198783,686.537173,1.126576,729.806237,1.197579
