In [1]:
# 2021/11/08 업데이트 내용:
# 주소 나열 방법을 지번주소 대신 시군구, 본번, 부번으로 나열하여 조금 더 보기 편하게 바뀜.

In [2]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['font.family'] = 'Malgun Gothic'
import datetime

In [3]:
target_gu = '관악구' # 구 명
target_dong = '신림' # 동 명: 동 명에서 마지막 글자 '동' 뺄 것
#target_purpose = '준주거' # 종 명: 건드릴 필요 없음
target_building_age = 5 # 건물연식: 건드릴 필요 없음
target_period = 36 # 계약날짜 검색범위: 건드릴 필요 없음
target_starting_year = 2018 # 검색 시작 계약 년도: 건드릴 필요 없음

In [4]:
date_today = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'), format='%Y-%m-%d')

In [5]:
date_today_record = str(date_today).split(' ')[0].replace('-', '')
date_today_record

'20211124'

In [6]:
def day_modifier(x):
    # x is a number
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        if len(x) == 1:
            return '0' + x
        else:
            return x

In [7]:
def landnum_modifier(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x).replace('외', '').replace(' ','')
        splitted = x.split('-')
        if len(splitted) == 1:
            return x + '-0'
        else:
            return x

In [8]:
def data_prep(bdtype, tradetype, starting_year=2018):
    # bdtype is a string: one of 아파트, 연립다세대 or 오피스텔
    # tradetype is a string: one of 매매 or 전월세
    if (tradetype != '매매') & (tradetype != '전월세'):
        raise ValueError('두번째 변수는 매매 또는 전월세만 입력 가능')
    
    
    basedir = './국토교통부_실거래가_공개시스템/{}/{}/'.format(bdtype, tradetype)
    filenames = [f for f in os.listdir(basedir) if (f.endswith('.csv'))&('(' in f)]
    
    dfs_list = []
    for i, f in tqdm(enumerate(filenames)):
        splitted_filename = f.split('실거래가_')       
        file_year = int(splitted_filename[-1][:4])
        
        if file_year < starting_year:
            continue
        
        try:
            df = pd.read_csv(basedir + f, encoding='euc-kr', header=15)
        except:
            try:
                df = pd.read_csv(basedir + f, encoding='utf-8', header=15)
            except:
                df = pd.read_csv(basedir + f, header=15)
        
        '''if '건물명' in df.columns:
            df.rename(columns={'건물명':'건물(단지)명'}, inplace=True)
        elif '단지명' in df.columns:
            df.rename(columns={'단지명':'건물(단지)명'}, inplace=True)'''
            
        '''if '대지권면적(㎡)' in df.columns:
            df = df.drop(columns=['대지권면적(㎡)'])'''
                
            
        '''if '해제사유발생일' in df.columns.tolist():
            df = df[df['해제사유발생일'].isna()]
            df = df.drop(columns=['해제사유발생일'])'''
            
        dfs_list.append(df)
    
    concat_df = pd.concat(dfs_list).reset_index(drop=True)
    
    if tradetype == '전월세':
        concat_df = concat_df.rename(columns={'전월세구분':'거래구분', '보증금(만원)':'거래금액(만원)'})
        concat_df = concat_df.drop(columns=['월세(만원)'])
        concat_df = concat_df[concat_df['거래구분'] == '전세']
    elif tradetype == '매매':
        concat_df['거래구분'] = '매매'
        
    concat_df['번지'] = concat_df['번지'].apply(landnum_modifier)
    
    concat_df['계약년월'] = concat_df['계약년월'].astype('Int64')
    concat_df['계약일'] = concat_df['계약일'].astype('Int64')
    
    concat_df['계약년월'] = concat_df['계약년월'].apply(str)
    concat_df['계약일'] = concat_df['계약일'].apply(str)
    
    concat_df['계약날짜기준_건물연식'] = concat_df['계약년월'].apply(lambda x: int(x[:4])) - concat_df['건축년도']
    
    concat_df['계약일'] = concat_df['계약일'].apply(day_modifier)
    
    concat_df['계약날짜'] = concat_df['계약년월'].apply(lambda x: x[:4]) + '-' + concat_df['계약년월'].apply(lambda x: x[-2:])\
    + '-' + concat_df['계약일']
    
    concat_df['계약날짜'] = pd.to_datetime(concat_df['계약날짜'], format='%Y-%m-%d')
    
    concat_df['거래금액(만원)'] = concat_df['거래금액(만원)'].apply(lambda x: int(x.replace(',','')))
    concat_df['단가(만원/㎡)'] = concat_df['거래금액(만원)'] / concat_df['전용면적(㎡)']
    
    concat_df['지번주소'] = concat_df['시군구'] + ' ' + concat_df['번지']
        
    cols_to_drop = ['지번주소', '계약년월', '계약일', '도로명']
        
    concat_df = concat_df[['지번주소', '도로명'] + [col for col in concat_df.columns if col not in cols_to_drop]]
    
    date_today = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'), format='%Y-%m-%d')
    concat_df['건물연식'] = date_today.year - concat_df['건축년도']
    
    concat_df = concat_df.dropna(subset=['지번주소'])
    
    concat_df = concat_df[concat_df['층'] >= 0].reset_index(drop=True)
    
    return concat_df

In [9]:
officetel_trade_df = data_prep('오피스텔', '매매')
officetel_df = pd.concat([officetel_trade_df]).reset_index(drop=True)
del officetel_trade_df
officetel_df['부동산유형'] = '오피스텔'
print(officetel_df.shape)
officetel_df.head()

16it [00:00, 132.20it/s]


(54590, 20)


Unnamed: 0,지번주소,도로명,시군구,번지,본번,부번,단지명,전용면적(㎡),거래금액(만원),층,건축년도,해제사유발생일,거래유형,중개사소재지,거래구분,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 강남구 개포동 13-3,개포로 623,서울특별시 강남구 개포동,13-3,13,3,대청타워,31.91,18500,20,1997.0,,-,-,매매,21.0,2018-01-02,579.755563,24.0,오피스텔
1,서울특별시 강남구 개포동 13-3,개포로 623,서울특별시 강남구 개포동,13-3,13,3,대청타워,31.91,19900,14,1997.0,,-,-,매매,21.0,2018-01-04,623.628956,24.0,오피스텔
2,서울특별시 강남구 개포동 13-3,개포로 623,서울특별시 강남구 개포동,13-3,13,3,대청타워,32.44,20000,26,1997.0,,-,-,매매,21.0,2018-01-08,616.522811,24.0,오피스텔
3,서울특별시 강남구 개포동 13-3,개포로 623,서울특별시 강남구 개포동,13-3,13,3,대청타워,32.44,20000,26,1997.0,,-,-,매매,21.0,2018-01-18,616.522811,24.0,오피스텔
4,서울특별시 강남구 개포동 13-3,개포로 623,서울특별시 강남구 개포동,13-3,13,3,대청타워,32.44,21000,22,1997.0,,-,-,매매,21.0,2018-01-20,647.348952,24.0,오피스텔


In [10]:
officetel_df = officetel_df[officetel_df['지번주소'].str.contains(target_gu + ' ' + target_dong)]
house_df = pd.concat([officetel_df]).sort_values(['시군구', '본번', '부번', '건축년도', '전용면적(㎡)', '계약날짜']).reset_index(drop=True)
del officetel_df
print(house_df.shape)
house_df.head()

(506, 20)


Unnamed: 0,지번주소,도로명,시군구,번지,본번,부번,단지명,전용면적(㎡),거래금액(만원),층,건축년도,해제사유발생일,거래유형,중개사소재지,거래구분,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 관악구 신림동 94-208,쑥고개로 9,서울특별시 관악구 신림동,94-208,94,208,엘리젠트오피스텔,24.0,12000,8,2004.0,,-,-,매매,14.0,2018-01-06,500.0,17.0,오피스텔
1,서울특별시 관악구 신림동 94-208,쑥고개로 9,서울특별시 관악구 신림동,94-208,94,208,엘리젠트오피스텔,24.0,11500,8,2004.0,,-,-,매매,14.0,2018-04-26,479.166667,17.0,오피스텔
2,서울특별시 관악구 신림동 94-208,쑥고개로 9,서울특별시 관악구 신림동,94-208,94,208,엘리젠트오피스텔,24.0,10800,6,2004.0,,-,-,매매,14.0,2018-11-12,450.0,17.0,오피스텔
3,서울특별시 관악구 신림동 94-208,쑥고개로 9,서울특별시 관악구 신림동,94-208,94,208,엘리젠트오피스텔,24.0,11600,6,2004.0,,-,-,매매,16.0,2020-12-17,483.333333,17.0,오피스텔
4,서울특별시 관악구 신림동 94-208,쑥고개로 9,서울특별시 관악구 신림동,94-208,94,208,엘리젠트오피스텔,24.0,11600,6,2004.0,,-,-,매매,16.0,2020-12-17,483.333333,17.0,오피스텔


In [11]:
house_df = house_df.sort_values(['단가(만원/㎡)'], ascending=False).reset_index(drop=True)
print(house_df.shape)
house_df.head()

(506, 20)


Unnamed: 0,지번주소,도로명,시군구,번지,본번,부번,단지명,전용면적(㎡),거래금액(만원),층,건축년도,해제사유발생일,거래유형,중개사소재지,거래구분,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,15.61,25000,12,2020.0,,-,-,매매,1.0,2021-06-22,1601.537476,1.0,오피스텔
1,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,15.61,25000,13,2020.0,,-,-,매매,1.0,2021-08-13,1601.537476,1.0,오피스텔
2,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,15.61,25000,13,2020.0,20210819.0,-,-,매매,1.0,2021-08-13,1601.537476,1.0,오피스텔
3,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,15.61,24700,11,2020.0,,-,-,매매,1.0,2021-06-16,1582.319026,1.0,오피스텔
4,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,15.61,23800,10,2020.0,,-,-,매매,1.0,2021-06-10,1524.663677,1.0,오피스텔


In [24]:
top_100_df = house_df.head(50)
print(top_100_df.shape)
top_100_df.tail()

(50, 20)


Unnamed: 0,지번주소,도로명,시군구,번지,본번,부번,단지명,전용면적(㎡),거래금액(만원),층,건축년도,해제사유발생일,거래유형,중개사소재지,거래구분,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
45,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,27.57,29100,4,2020.0,,-,-,매매,1.0,2021-09-28,1055.495103,1.0,오피스텔
46,서울특별시 관악구 신림동 530-1,난곡로 331,서울특별시 관악구 신림동,530-1,530,1,신대방 노블루체,28.5,29500,4,2019.0,,-,-,매매,1.0,2020-07-09,1035.087719,2.0,오피스텔
47,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,27.57,28400,3,2020.0,,-,-,매매,1.0,2021-09-13,1030.105187,1.0,오피스텔
48,서울특별시 관악구 신림동 1474-15,남부순환로 1466,서울특별시 관악구 신림동,1474-15,1474,15,몽삐에뜨골드,24.18,24700,3,2020.0,,-,-,매매,0.0,2020-12-02,1021.505376,1.0,오피스텔
49,서울특별시 관악구 신림동 530-1,난곡로 331,서울특별시 관악구 신림동,530-1,530,1,신대방 노블루체,28.5,28800,5,2019.0,,-,-,매매,2.0,2021-09-04,1010.526316,2.0,오피스텔


In [25]:
top_100_df = top_100_df.sort_values(['시군구', '본번', '부번', '건축년도', '전용면적(㎡)', '계약날짜']).reset_index(drop=True)
print(top_100_df.shape)
top_100_df.head()

(50, 20)


Unnamed: 0,지번주소,도로명,시군구,번지,본번,부번,단지명,전용면적(㎡),거래금액(만원),층,건축년도,해제사유발생일,거래유형,중개사소재지,거래구분,계약날짜기준_건물연식,계약날짜,단가(만원/㎡),건물연식,부동산유형
0,서울특별시 관악구 신림동 529-15,난곡로66가길 19,서울특별시 관악구 신림동,529-15,529,15,.,17.08,19500,4,2020.0,,-,-,매매,1.0,2021-06-10,1141.686183,1.0,오피스텔
1,서울특별시 관악구 신림동 530-1,난곡로 331,서울특별시 관악구 신림동,530-1,530,1,신대방 노블루체,28.5,29500,4,2019.0,,-,-,매매,1.0,2020-07-09,1035.087719,2.0,오피스텔
2,서울특별시 관악구 신림동 530-1,난곡로 331,서울특별시 관악구 신림동,530-1,530,1,신대방 노블루체,28.5,28800,5,2019.0,,-,-,매매,2.0,2021-09-04,1010.526316,2.0,오피스텔
3,서울특별시 관악구 신림동 1437-19,신림동5가길 13,서울특별시 관악구 신림동,1437-19,1437,19,태양오피스텔,16.45,20900,3,2021.0,,-,-,매매,0.0,2021-05-05,1270.516717,0.0,오피스텔
4,서울특별시 관악구 신림동 1437-19,신림동5가길 13,서울특별시 관악구 신림동,1437-19,1437,19,태양오피스텔,16.45,20900,3,2021.0,,-,-,매매,0.0,2021-08-17,1270.516717,0.0,오피스텔


In [26]:
top_100_df = top_100_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)

In [27]:
cols_to_include = [
    '시군구', '번지', '본번', '부번', '단지명', '전용면적(㎡)', '계약날짜', '거래금액(만원)', '단가(만원/㎡)',
    '층', '건축년도'
]

In [28]:
export_df = top_100_df[cols_to_include]

In [29]:
export_df.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_오피스텔_{}/top_100.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
'''land_specs_df = pd.read_csv('./prepped_data/land_specs_ver_4.csv')
print(land_specs_df.shape)
land_specs_df.head()'''

In [None]:
'''last_df = land_specs_df.drop_duplicates(subset=['지번주소'], keep='last').drop(columns=['년', '공시지가']).reset_index(drop=True)
print(last_df.shape)
last_df.head()'''

In [None]:
'''house_merge_df = house_df.merge(last_df, on=['지번주소']).reset_index(drop=True)
print(house_merge_df.shape)'''

In [None]:
house_df['구'] = house_df['지번주소'].apply(lambda x: x.split(' ')[1])
house_df['동'] = house_df['지번주소'].apply(lambda x: x.split(' ')[2])
#house_merge_df['구'] = house_merge_df['지번주소'].apply(lambda x: x.split(' ')[1])
#house_merge_df['동'] = house_merge_df['지번주소'].apply(lambda x: x.split(' ')[2])

In [None]:
house_df['일괄계약'] = 'N'

In [None]:
#house_df = house_df.merge(last_df, on=['지번주소']).reset_index(drop=True)

In [None]:
'''print(house_df.shape)
house_df.head()'''

In [None]:
def identify_bulk_contract(df):
    df = df.copy()
    
    for addr in tqdm(df['지번주소'].unique()):
        addr_df = df[df['지번주소'] == addr]
        
        for btyear in addr_df['건축년도'].unique():
            btyear_df = addr_df[addr_df['건축년도'] == btyear]
        
            for contract_date in btyear_df['계약날짜'].unique():
                contract_date_df = btyear_df[btyear_df['계약날짜'] == contract_date]

                if contract_date_df.shape[0] >= 5:
                    df.loc[contract_date_df.index, '일괄계약'] = 'Y'
    
    return df

In [None]:
target_1_12_df = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=6)))
    &(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_1_12_df.shape)
target_1_12_df.head()

In [None]:
target_2_24_df = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=12)))
    &(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_2_24_df.shape)
target_2_24_df.head()

In [None]:
target_3_36_df = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=24)))
    &(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_3_36_df.shape)
target_3_36_df.head()

In [None]:
target_5_60_df = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=36)))
    &(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_5_60_df.shape)
target_5_60_df.head()

In [None]:
target_1_12_df = identify_bulk_contract(target_1_12_df)
print(target_1_12_df.shape)

In [None]:
target_2_24_df = identify_bulk_contract(target_2_24_df)
print(target_2_24_df.shape)

In [None]:
target_3_36_df = identify_bulk_contract(target_3_36_df)
print(target_3_36_df.shape)

In [None]:
target_5_60_df = identify_bulk_contract(target_5_60_df)
print(target_5_60_df.shape)

In [None]:
sales_idx_basedir = './한국부동산원/실거래가격지수/'
filenames = [f for f in os.listdir(sales_idx_basedir) if '연립다세대' in f]

sales_idx_df = pd.read_excel(sales_idx_basedir + filenames[0], header=[10, 11])

sales_idx_cols_list = []
for i in range(sales_idx_df.shape[1]):
    if i == 0:
        sales_idx_cols_list.append(sales_idx_df.columns[i][0])
    else:
        sales_idx_cols_list.append(sales_idx_df.columns[i][0] + '_' + sales_idx_df.columns[i][1])

sales_idx_df.columns = sales_idx_cols_list

In [None]:
print(sales_idx_df.shape)
sales_idx_df.head()

In [None]:
def get_sales_idx(x):
    # x is a pandas timestamp
    
    df = sales_idx_df.copy()
    
    if pd.isna(x) == True:
        return x
    else:
        year = str(x.year)
        month = str(x.month)
        if len(month) == 1:
            month = '0'+month
        
        try:
            idx = df[year+'년 '+month+'월_지수'].iloc[0]
            return idx
        
        except:
            return '지수없음'

In [None]:
def get_index_applied_prices(df):
    df = df.copy()
    
    last_idx = sales_idx_df.iloc[0,-2]
    
    df['계약날짜_실거래지수'] = df['계약날짜'].apply(get_sales_idx)
    
    df_idx_positive = df[df['계약날짜_실거래지수'] != '지수없음']
    
    df['지수적용단가'] = df['단가(만원/㎡)']
    
    df_idx_positive['지수적용단가'] = df_idx_positive['단가(만원/㎡)'] / df_idx_positive['계약날짜_실거래지수'] * last_idx
    
    df.loc[df_idx_positive.index, '지수적용단가'] = df_idx_positive['지수적용단가']
    
    df['지수적용단가'] = df['지수적용단가'].astype('float')
    
    df['지수적용날짜'] = '지수없음'
    
    last_date = sales_idx_df.columns[-1].split('_')[0]
    df.loc[df_idx_positive.index, '지수적용날짜'] = last_date
    
    return df

In [None]:
target_1_12_df = get_index_applied_prices(target_1_12_df)
print(target_1_12_df.shape)
target_1_12_df.head()

In [None]:
target_2_24_df = get_index_applied_prices(target_2_24_df)
print(target_2_24_df.shape)
target_2_24_df.head()

In [None]:
target_3_36_df = get_index_applied_prices(target_3_36_df)
print(target_3_36_df.shape)
target_3_36_df.head()

In [None]:
target_5_60_df = get_index_applied_prices(target_5_60_df)
print(target_5_60_df.shape)
target_5_60_df.head()

In [None]:
cols_to_include = [
    '시군구', '번지', '본번', '부번', '건물명', '전용면적(㎡)', '대지권면적(㎡)', '계약날짜', '거래금액(만원)', '단가(만원/㎡)',
    '층', '건축년도'
]

In [None]:
target_1_12_df = target_1_12_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_1_12_df1 = target_1_12_df[cols_to_include]
target_1_12_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/실거래가_건물연식{}년_계약최근6개월.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_2_24_df = target_2_24_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_2_24_df1 = target_2_24_df[cols_to_include]
target_2_24_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/실거래가_건물연식{}년_계약최근12개월.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_3_36_df = target_3_36_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_3_36_df1 = target_3_36_df[cols_to_include]
target_3_36_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/실거래가_건물연식{}년_계약최근24개월.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_5_60_df = target_5_60_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_5_60_df1 = target_5_60_df[cols_to_include]
target_5_60_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/실거래가_건물연식{}년_계약최근36개월.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_df = target_3_36_df.copy()

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('최근 {}개월 {} {} 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_period, target_gu, target_dong, target_building_age, target_df.shape[0]))
plt.grid(True)
ax.xaxis.update_units(target_df['계약날짜'])
sns.regplot(x=ax.xaxis.convert_units(target_df['계약날짜']), y=target_df['단가(만원/㎡)'])
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/산포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
target_last_df = target_df.drop_duplicates(subset=['지번주소', '건축년도'], keep='last').reset_index(drop=True)

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('최근 {}개월 {} {} 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_period, target_gu, target_dong, target_building_age, target_last_df.shape[0]))
plt.grid(True)
ax.xaxis.update_units(target_last_df['계약날짜'])
sns.regplot(x=ax.xaxis.convert_units(target_last_df['계약날짜']), y=target_last_df['단가(만원/㎡)'])
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/마지막_산포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
target_last_df.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/마지막_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_df2 = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    #&(house_merge_df['계약날짜'] >= (date_today - pd.DateOffset(months=target_period)))
    &(house_df['계약날짜기준_건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_df2.shape)
target_df2.head()

In [None]:
target_df2 = get_index_applied_prices(target_df2)

In [None]:
font = {'size': 12}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(12,6))
plt.title('{} {} 계약날짜기준 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_gu, target_dong, target_building_age, target_df2.shape[0]))
plt.grid(True)
sns.histplot(target_df2['단가(만원/㎡)'], kde=True)

In [None]:
target_df3 = target_df2[['지번주소', '건물(단지)명', '건축년도', '층', '전용면적(㎡)', '계약날짜', '단가(만원/㎡)', '부동산유형']]
target_df3 = target_df3.sort_values(['지번주소', '건축년도', '계약날짜']).reset_index(drop=True)
target_df3.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/전체_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_df3_30 = target_df3[
    (target_df3['전용면적(㎡)'] >= 28)
    &(target_df3['전용면적(㎡)'] <= 32)
]

In [None]:
target_df3_30.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/전체_실거래가_30.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
concat_df = pd.concat([target_df2])

In [None]:
concat_df = concat_df.sort_values(['지번주소', '건축년도', '전용면적(㎡)', '계약날짜']).reset_index(drop=True)
concat_df = concat_df.drop_duplicates(['지번주소', '건축년도', '전용면적(㎡)'], keep='last').reset_index(drop=True)

In [None]:
concat_df['단가평균'] = concat_df[['지번주소', '건축년도', '단가(만원/㎡)']].groupby(['지번주소', '건축년도']).transform('mean')['단가(만원/㎡)']
concat_df['지수적용단가평균'] = concat_df[['지번주소', '건축년도', '지수적용단가']].groupby(['지번주소', '건축년도']).transform('mean')['지수적용단가']

In [None]:
concat_df = concat_df.sort_values(['지번주소', '건축년도', '지수적용날짜']).reset_index(drop=True)
print(concat_df.shape)
concat_df.head()

In [None]:
concat_last_df = concat_df.drop_duplicates(subset=['지번주소', '건축년도'], keep='last')
print(concat_last_df.shape)
concat_last_df.head()

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('{} {} 계약날짜기준 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 지번별 {}곳의 전용면적 실거래가격지수 적용후 단가평균'.format(target_gu, target_dong, target_building_age, concat_last_df.shape[0]))
plt.grid(True)
sns.histplot(concat_last_df['지수적용단가'], kde=True)
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/지번별_분포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
concat_last_df = concat_last_df.sort_values(['지번주소']).reset_index(drop=True)

In [None]:
concat_last_df = concat_last_df[['지번주소', '건물(단지)명', '건축년도', '전용면적(㎡)', '층', '계약날짜', '단가(만원/㎡)', '지수적용날짜', '지수적용단가', '부동산유형']]
concat_last_df.rename(columns={'계약날짜':'마지막_계약날짜'}, inplace=True)
concat_last_df.head()

In [None]:
concat_last_df.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_연립다세대_{}/전체_마지막_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)