In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['font.family'] = 'Malgun Gothic'
import datetime
tqdm.tqdm.pandas()

In [2]:
target_gu = '중랑구' # 구 명
target_dong = '면목' # 동 명: 동 명에서 마지막 글자 '동' 뺄 것
#target_purpose = '준주거' # 종 명: 건드릴 필요 없음
target_building_age = 5 # 건물연식: 건드릴 필요 없음
target_period = 36 # 계약날짜 검색범위: 건드릴 필요 없음
target_starting_year = 2018 # 검색 시작 계약 년도: 건드릴 필요 없음

In [3]:
date_today = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'), format='%Y-%m-%d')

In [4]:
date_today_record = str(date_today).split(' ')[0].replace('-', '')
date_today_record

'20220214'

In [5]:
def day_modifier(x):
    # x is a number
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x)
        if len(x) == 1:
            return '0' + x
        else:
            return x

In [6]:
def landnum_modifier(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        x = str(x).replace('외', '').replace(' ','')
        splitted = x.split('-')
        if len(splitted) == 1:
            return x + '-0'
        else:
            return x

In [7]:
def data_prep(starting_year=2018, real_estate_type='토지', trade_type='매매'):
    
    if (real_estate_type == '토지') | (real_estate_type == '상업업무용'):
        basedir = './국토교통부_실거래가_공개시스템/{}/'.format(real_estate_type)
    else:
        basedir = './국토교통부_실거래가_공개시스템/{}/{}/'.format(real_estate_type, trade_type)
    
    filenames = [f for f in os.listdir(basedir) if (f.endswith('.csv'))]
    
    header_num = 15
    
    if real_estate_type == '토지':
        header_num = 14
    
    dfs_list = []
    for i, f in tqdm.tqdm_notebook(enumerate(filenames)):
        splitted_filename = f.split('실거래가_')       
        file_year = int(splitted_filename[-1][:4])
        
        if file_year < starting_year:
            continue
        
        try:
            df = pd.read_csv(basedir + f, encoding='euc-kr', header=header_num)
        except:
            try:
                df = pd.read_csv(basedir + f, encoding='utf-8', header=15)
            except:
                df = pd.read_csv(basedir + f, header=15)
        
        if '건물명' in df.columns:
            df.rename(columns={'건물명':'건물(단지)명'}, inplace=True)
        elif '단지명' in df.columns:
            df.rename(columns={'단지명':'건물(단지)명'}, inplace=True)
            
        '''if '대지권면적(㎡)' in df.columns:
            df = df.drop(columns=['대지권면적(㎡)'])'''
                
            
        if '해제사유발생일' in df.columns.tolist():
            df = df[df['해제사유발생일'].isna()]
            df = df.drop(columns=['해제사유발생일'])
            
        dfs_list.append(df)
    
    concat_df = pd.concat(dfs_list).reset_index(drop=True)
    
    '''if tradetype == '전월세':
        concat_df = concat_df.rename(columns={'전월세구분':'거래구분', '보증금(만원)':'거래금액(만원)'})
        concat_df = concat_df.drop(columns=['월세(만원)'])
        concat_df = concat_df[concat_df['거래구분'] == '전세']
    elif tradetype == '매매':
        concat_df['거래구분'] = '매매'
     '''   
    #concat_df['번지'] = concat_df['번지'].apply(landnum_modifier)
    
    concat_df['계약년월'] = concat_df['계약년월'].astype('Int64')
    concat_df['계약일'] = concat_df['계약일'].astype('Int64')
    
    concat_df['계약년월'] = concat_df['계약년월'].apply(str)
    concat_df['계약일'] = concat_df['계약일'].apply(str)
    
    #concat_df['계약날짜기준_건물연식'] = concat_df['계약년월'].apply(lambda x: int(x[:4])) - concat_df['건축년도']
    
    concat_df['계약일'] = concat_df['계약일'].apply(day_modifier)
    
    concat_df['계약날짜'] = concat_df['계약년월'].apply(lambda x: x[:4]) + '-' + concat_df['계약년월'].apply(lambda x: x[-2:])\
    + '-' + concat_df['계약일']
    
    concat_df['계약날짜'] = pd.to_datetime(concat_df['계약날짜'], format='%Y-%m-%d')
    
    concat_df['거래금액(만원)'] = concat_df['거래금액(만원)'].apply(lambda x: int(x.replace(',','')))
    #concat_df['단가(만원/㎡)'] = concat_df['거래금액(만원)'] / concat_df['전용면적(㎡)']
    
    #concat_df['지번주소'] = concat_df['시군구'] + ' ' + concat_df['번지']
        
    #cols_to_drop = ['번지', '지번주소', '계약년월', '계약일', '도로명']
        
    #concat_df = concat_df[['지번주소', '도로명'] + [col for col in concat_df.columns if col not in cols_to_drop]]
    
    date_today = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'), format='%Y-%m-%d')
    #concat_df['건물연식'] = date_today.year - concat_df['건축년도']
    
    #concat_df = concat_df.dropna(subset=['지번주소'])
    
    #concat_df = concat_df[concat_df['층'] >= 0].reset_index(drop=True)
    
    concat_df = concat_df.drop(columns=['계약년월', '계약일'])
    
    if  '거래유형' in concat_df.columns:
        concat_df = concat_df.drop(columns=['거래유형', '중개사소재지'])
    else:
        concat_df = concat_df.drop(columns=['거래구분', '중개사사무소'])
    
    if '계약면적(㎡)' in concat_df.columns:
        concat_df = concat_df.rename(columns={'계약면적(㎡)':'대지면적(㎡)'})
    
    concat_df = concat_df[concat_df['대지면적(㎡)'] != ' ']
    
    concat_df['대지면적(㎡)'] = concat_df['대지면적(㎡)'].apply(float).astype('Float64')
    
    concat_df['대지면적(평)'] = concat_df['대지면적(㎡)'] * 0.3025
    
    concat_df['단가(만원/평)'] = concat_df['거래금액(만원)'] / concat_df['대지면적(평)']
    
    if '번지' in concat_df.columns:
        concat_df = concat_df.rename(columns={'번지':'지번'})
    
    if '지목' in concat_df.columns:
        concat_df = concat_df[concat_df['지목'] == '대']
        concat_df = concat_df.drop(columns=['지목'])
        
    if '전용/연면적(㎡)' in concat_df.columns:
        concat_df = concat_df.drop(columns=['전용/연면적(㎡)'])
    
    if '층' in concat_df.columns:
        concat_df = concat_df.drop(columns=['층'])
    
    '''if '건축년도' in concat_df.columns:
        concat_df = concat_df.drop(columns=['건축년도'])'''
    
    if '도로명' in concat_df.columns:
        concat_df = concat_df.drop(columns=['도로명'])
    
    if '건축물주용도' in concat_df.columns:
        concat_df = concat_df.drop(columns=['건축물주용도'])
        
    if '유형' in concat_df.columns:
        concat_df = concat_df.drop(columns=['유형'])
    
    
    return concat_df

In [8]:
land_df = data_prep()
land_df['부동산유형'] = '토지'
print(land_df.shape)
land_df.head()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


0it [00:00, ?it/s]

(14375, 11)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형
0,서울특별시 동대문구 전농동,1**,제3종일반주거지역,8m미만,13.33,10100,지분,2018-01-02,4.032325,2504.758421,토지
1,서울특별시 동작구 상도동,2**,제2종일반주거지역,-,102.0,12000,,2018-01-02,30.855,388.915897,토지
2,서울특별시 성동구 행당동,2**,제2종일반주거지역,-,20.0,12770,,2018-01-02,6.05,2110.743802,토지
4,서울특별시 은평구 응암동,6**,제3종일반주거지역,12m미만,76.0,48359,,2018-01-02,22.99,2103.479774,토지
5,서울특별시 용산구 이태원동,7*,준주거지역,8m미만,1.72,1000,지분,2018-01-03,0.5203,1921.968095,토지


In [9]:
commercial_df = data_prep(real_estate_type='상업업무용')
commercial_df['부동산유형'] = '상업업무용'
print(commercial_df.shape)
commercial_df.head()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


0it [00:00, ?it/s]

(13111, 12)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,건축년도,계약날짜,대지면적(평),단가(만원/평),부동산유형
8,서울특별시 강남구 개포동,1***,준주거,8m미만,265.0,435500,,1990.0,2018-02-13,80.1625,5432.714798,상업업무용
9,서울특별시 강남구 개포동,1***,준주거,25m미만,254.8,340000,,1991.0,2018-02-14,77.077,4411.173242,상업업무용
19,서울특별시 강남구 개포동,1***,제2종일반주거,12m미만,228.3,238000,,1991.0,2018-03-09,69.06075,3446.241172,상업업무용
22,서울특별시 강남구 개포동,1***,제2종일반주거,8m미만,265.2,250000,,1993.0,2018-04-03,80.223,3116.313277,상업업무용
23,서울특별시 강남구 개포동,1***,제2종일반주거,25m미만,323.2,612500,,1998.0,2018-04-12,97.768,6264.831029,상업업무용


In [10]:
house_df = pd.concat([land_df, commercial_df]).reset_index(drop=True)
del land_df, commercial_df

In [11]:
house_df['구'] = house_df['시군구'].apply(lambda x: x.split(' ')[1])
house_df['동'] = house_df['시군구'].apply(lambda x: x.split(' ')[2])
#house_merge_df['구'] = house_merge_df['지번주소'].apply(lambda x: x.split(' ')[1])
#house_merge_df['동'] = house_merge_df['지번주소'].apply(lambda x: x.split(' ')[2])

In [12]:
target_1_12_df = house_df[
    (house_df['시군구'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=6)))
    #&(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_1_12_df.shape)
target_1_12_df.head()

(13, 14)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
12763,서울특별시 중랑구 면목동,5**,제1종일반주거지역,8m미만,46.0,25000,,2021-08-23,13.915,1796.62235,토지,,중랑구,면목동
12767,서울특별시 중랑구 면목동,5**,제1종일반주거지역,8m미만,387.0,85000,,2021-08-23,117.0675,726.076836,토지,,중랑구,면목동
13335,서울특별시 중랑구 면목동,3**,제2종일반주거지역,8m미만,108.4,87500,,2021-10-20,32.791,2668.415114,토지,,중랑구,면목동
13750,서울특별시 중랑구 면목동,1**,제2종일반주거지역,8m미만,124.0,135036,,2021-11-28,37.51,3600.0,토지,,중랑구,면목동
14116,서울특별시 중랑구 면목동,6**,준주거지역,8m미만,174.9,171500,,2021-12-30,52.90725,3241.52172,토지,,중랑구,면목동


In [13]:
target_2_24_df = house_df[
    (house_df['시군구'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=12)))
    #&(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_2_24_df.shape)
target_2_24_df.head()

(28, 14)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
10902,서울특별시 중랑구 면목동,6**,제2종일반주거지역,8m미만,93.2,62000,,2021-02-25,28.193,2199.127443,토지,,중랑구,면목동
11099,서울특별시 중랑구 면목동,1**,제3종일반주거지역,25m이상,210.6,168900,,2021-03-18,63.7065,2651.220833,토지,,중랑구,면목동
11104,서울특별시 중랑구 면목동,1**,제3종일반주거지역,-,6.9,5540,,2021-03-18,2.08725,2654.210085,토지,,중랑구,면목동
11402,서울특별시 중랑구 면목동,5**,제2종일반주거지역,8m미만,91.9,51000,,2021-04-14,27.79975,1834.548872,토지,,중랑구,면목동
11495,서울특별시 중랑구 면목동,1**,준주거지역,8m미만,106.8,90000,,2021-04-25,32.307,2785.773981,토지,,중랑구,면목동


In [14]:
target_3_36_df = house_df[
    (house_df['시군구'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=24)))
    #&(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_3_36_df.shape)
target_3_36_df.head()

(69, 14)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
7028,서울특별시 중랑구 면목동,3**,제2종일반주거지역,25m미만,190.7,132600,,2020-02-14,57.68675,2298.621434,토지,,중랑구,면목동
7228,서울특별시 중랑구 면목동,6**,준주거지역,25m이상,22.9,47000,,2020-03-01,6.92725,6784.799163,토지,,중랑구,면목동
7264,서울특별시 중랑구 면목동,3**,제2종일반주거지역,-,107.8,41352,,2020-03-04,32.6095,1268.09672,토지,,중랑구,면목동
7670,서울특별시 중랑구 면목동,2**,제3종일반주거지역,-,5.0,400,,2020-04-23,1.5125,264.46281,토지,,중랑구,면목동
8075,서울특별시 중랑구 면목동,5**,제2종일반주거지역,8m미만,23.0,7000,,2020-06-09,6.9575,1006.108516,토지,,중랑구,면목동


In [15]:
target_5_60_df = house_df[
    (house_df['시군구'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    &(house_df['계약날짜'] >= (date_today - pd.DateOffset(months=36)))
    #&(house_df['건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_5_60_df.shape)
target_5_60_df.head()

(108, 14)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
3923,서울특별시 중랑구 면목동,7**,제3종일반주거지역,8m미만,12.0,5000,지분,2019-03-12,3.63,1377.410468,토지,,중랑구,면목동
4128,서울특별시 중랑구 면목동,1**,준주거지역,8m미만,13.84,7000,지분,2019-04-04,4.1866,1672.001147,토지,,중랑구,면목동
5319,서울특별시 중랑구 면목동,6**,제2종일반주거지역,25m미만,47.0,25000,,2019-08-16,14.2175,1758.396343,토지,,중랑구,면목동
5373,서울특별시 중랑구 면목동,6**,제2종일반주거지역,-,9.9,3000,,2019-08-23,2.99475,1001.753068,토지,,중랑구,면목동
5521,서울특별시 중랑구 면목동,1**,제2종일반주거지역,12m미만,135.5,56500,,2019-09-09,40.98875,1378.427007,토지,,중랑구,면목동


In [16]:
target_3_36_df['용도지역'].value_counts()

제3종일반주거      19
제2종일반주거      17
제2종일반주거지역    12
제3종일반주거지역     8
준주거지역         6
제1종일반주거지역     4
준주거           2
자연녹지지역        1
Name: 용도지역, dtype: int64

In [17]:
samjong_df = target_3_36_df[
    (target_3_36_df['용도지역'].str.contains('3종일반'))
]
print(samjong_df.shape)
samjong_df.head()

(27, 14)


Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
7670,서울특별시 중랑구 면목동,2**,제3종일반주거지역,-,5.0,400,,2020-04-23,1.5125,264.46281,토지,,중랑구,면목동
9586,서울특별시 중랑구 면목동,1**,제3종일반주거지역,8m미만,105.4,55000,,2020-10-27,31.8835,1725.030188,토지,,중랑구,면목동
9635,서울특별시 중랑구 면목동,5**,제3종일반주거지역,25m이상,121.08,91500,지분,2020-10-31,36.6267,2498.177559,토지,,중랑구,면목동
11099,서울특별시 중랑구 면목동,1**,제3종일반주거지역,25m이상,210.6,168900,,2021-03-18,63.7065,2651.220833,토지,,중랑구,면목동
11104,서울특별시 중랑구 면목동,1**,제3종일반주거지역,-,6.9,5540,,2021-03-18,2.08725,2654.210085,토지,,중랑구,면목동


In [18]:
samjong_df = samjong_df.sort_values(['계약날짜'], ascending=False)

In [19]:
samjong_df

Unnamed: 0,시군구,지번,용도지역,도로조건,대지면적(㎡),거래금액(만원),지분구분,계약날짜,대지면적(평),단가(만원/평),부동산유형,건축년도,구,동
27308,서울특별시 중랑구 면목동,7**,제3종일반주거,25m이상,185.0,224000,,2021-11-11,55.9625,4002.680366,상업업무용,2014.0,중랑구,면목동
27307,서울특별시 중랑구 면목동,1**,제3종일반주거,25m이상,461.4,570000,,2021-10-29,139.5735,4083.869789,상업업무용,1993.0,중랑구,면목동
27305,서울특별시 중랑구 면목동,1**,제3종일반주거,25m이상,338.2,400000,,2021-09-16,102.3055,3909.858219,상업업무용,1983.0,중랑구,면목동
12573,서울특별시 중랑구 면목동,6**,제3종일반주거지역,8m미만,81.0,18000,,2021-08-03,24.5025,734.618916,토지,,중랑구,면목동
12261,서울특별시 중랑구 면목동,1***,제3종일반주거지역,8m미만,72.6,58775,,2021-07-02,21.9615,2676.274389,토지,,중랑구,면목동
12262,서울특별시 중랑구 면목동,1***,제3종일반주거지역,8m미만,58.1,60560,,2021-07-02,17.57525,3445.754683,토지,,중랑구,면목동
11099,서울특별시 중랑구 면목동,1**,제3종일반주거지역,25m이상,210.6,168900,,2021-03-18,63.7065,2651.220833,토지,,중랑구,면목동
11104,서울특별시 중랑구 면목동,1**,제3종일반주거지역,-,6.9,5540,,2021-03-18,2.08725,2654.210085,토지,,중랑구,면목동
27300,서울특별시 중랑구 면목동,6**,제3종일반주거,25m미만,233.0,220000,,2021-02-18,70.4825,3121.342177,상업업무용,1990.0,중랑구,면목동
27298,서울특별시 중랑구 면목동,1**,제3종일반주거,25m이상,401.0,490000,,2021-02-16,121.3025,4039.488057,상업업무용,1976.0,중랑구,면목동


In [None]:
os.listdir('./국토교통부_실거래가_공개시스템/땅값분석/')

In [None]:
basedir = './국토교통부_실거래가_공개시스템/땅값분석/'

In [None]:
samjong_df.to_excel(basedir + '중랑구 면목동 땅값.xlsx', index=False)

In [None]:
cols_to_include = [
    '시군구', '건물(단지)명', '건물연식', '층', '거래구분', '전용면적(㎡)', '계약날짜', '거래금액(만원)', '단가(만원/㎡)',
    '부동산유형', '일괄계약', '지수적용날짜', '지수적용단가'
]

In [None]:
os.makedirs('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/'.format(target_gu, target_dong, date_today_record))

In [None]:
target_1_12_df = target_1_12_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_1_12_df1 = target_1_12_df[cols_to_include]
target_1_12_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식없음_계약최근6개월.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_1_12_30_df1 = target_1_12_df1[target_1_12_df1['전용면적(㎡)'] <= 30]
target_1_12_30_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근6개월_30제곱미터미만.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_1_12_bdyr2_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 2]
target_1_12_bdyr2_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근6개월.xlsx'.format(target_gu, target_dong, date_today_record, 2), index=False)

In [None]:
target_1_12_bdyr5_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 5]
target_1_12_bdyr5_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근6개월.xlsx'.format(target_gu, target_dong, date_today_record, 5), index=False)

In [None]:
target_2_24_df = target_2_24_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_2_24_df1 = target_2_24_df[cols_to_include]
target_2_24_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식없음년_계약최근12개월.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_2_24_30_df1 = target_1_12_df1[target_1_12_df1['전용면적(㎡)'] <= 30]
target_2_24_30_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근12개월_30제곱미터미만.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_2_24_bdyr2_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 2]
target_2_24_bdyr2_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근12개월.xlsx'.format(target_gu, target_dong, date_today_record, 2), index=False)

In [None]:
target_2_24_bdyr5_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 5]
target_2_24_bdyr5_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근12개월.xlsx'.format(target_gu, target_dong, date_today_record, 5), index=False)

In [None]:
target_3_36_df = target_3_36_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_3_36_df1 = target_3_36_df[cols_to_include]
target_3_36_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식없음_계약최근24개월.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_3_36_30_df1 = target_1_12_df1[target_1_12_df1['전용면적(㎡)'] <= 30]
target_3_36_30_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근24개월_30제곱미터미만.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_3_36_bdyr2_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 2]
target_3_36_bdyr2_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근24개월.xlsx'.format(target_gu, target_dong, date_today_record, 2), index=False)

In [None]:
target_3_36_bdyr5_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 5]
target_3_36_bdyr5_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근24개월.xlsx'.format(target_gu, target_dong, date_today_record, 5), index=False)

In [None]:
target_5_60_df = target_5_60_df.sort_values(['시군구', '본번', '부번', '건축년도', '부동산유형', '거래구분', '계약날짜', '전용면적(㎡)']).reset_index(drop=True)
target_5_60_df1 = target_5_60_df[cols_to_include]
target_5_60_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식없음_계약최근36개월.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_5_60_30_df1 = target_1_12_df1[target_1_12_df1['전용면적(㎡)'] <= 30]
target_5_60_30_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근36개월_30제곱미터미만.xlsx'.format(target_gu, target_dong, date_today_record, target_building_age), index=False)

In [None]:
target_5_60_bdyr2_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 2]
target_5_60_bdyr2_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근36개월.xlsx'.format(target_gu, target_dong, date_today_record, 2), index=False)

In [None]:
target_5_60_bdyr5_df1 = target_1_12_df1[target_1_12_df1['건물연식'] <= 5]
target_5_60_bdyr5_df1.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/실거래가_건물연식{}년_계약최근36개월.xlsx'.format(target_gu, target_dong, date_today_record, 5), index=False)

In [None]:
target_df = target_3_36_df.copy()

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('최근 {}개월 {} {} 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_period, target_gu, target_dong, target_building_age, target_df.shape[0]))
plt.grid(True)
ax.xaxis.update_units(target_df['계약날짜'])
sns.regplot(x=ax.xaxis.convert_units(target_df['계약날짜']), y=target_df['단가(만원/㎡)'])
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/산포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
target_last_df = target_df.drop_duplicates(subset=['지번주소', '건축년도'], keep='last').reset_index(drop=True)

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('최근 {}개월 {} {} 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_period, target_gu, target_dong, target_building_age, target_last_df.shape[0]))
plt.grid(True)
ax.xaxis.update_units(target_last_df['계약날짜'])
sns.regplot(x=ax.xaxis.convert_units(target_last_df['계약날짜']), y=target_last_df['단가(만원/㎡)'])
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/마지막_산포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
target_last_df.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/마지막_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_df2 = house_df[
    (house_df['지번주소'].str.contains(target_gu + ' ' + target_dong))
    #&(house_df['동'] == target_dong)
    #&(house_merge_df['계약날짜'] >= (date_today - pd.DateOffset(months=target_period)))
    &(house_df['계약날짜기준_건물연식'] <= target_building_age)
    #&(house_merge_df['용도지역명1'].str.contains(target_purpose))
]
print(target_df2.shape)
target_df2.head()

In [None]:
target_df2 = get_index_applied_prices(target_df2)

In [None]:
font = {'size': 12}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(12,6))
plt.title('{} {} 계약날짜기준 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 {}건의 전용면적 단가'.format(target_gu, target_dong, target_building_age, target_df2.shape[0]))
plt.grid(True)
sns.histplot(target_df2['단가(만원/㎡)'], kde=True)

In [None]:
target_df3 = target_df2[['지번주소', '건물(단지)명', '건축년도', '층', '전용면적(㎡)', '계약날짜', '단가(만원/㎡)', '부동산유형']]
target_df3 = target_df3.sort_values(['지번주소', '건축년도', '계약날짜']).reset_index(drop=True)
target_df3.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/전체_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
target_df3_30 = target_df3[
    (target_df3['전용면적(㎡)'] >= 28)
    &(target_df3['전용면적(㎡)'] <= 32)
]

In [None]:
target_df3_30.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/전체_실거래가_30.xlsx'.format(target_gu, target_dong, date_today_record), index=False)

In [None]:
concat_df = pd.concat([target_df2])

In [None]:
concat_df = concat_df.sort_values(['지번주소', '건축년도', '전용면적(㎡)', '계약날짜']).reset_index(drop=True)
concat_df = concat_df.drop_duplicates(['지번주소', '건축년도', '전용면적(㎡)'], keep='last').reset_index(drop=True)

In [None]:
concat_df['단가평균'] = concat_df[['지번주소', '건축년도', '단가(만원/㎡)']].groupby(['지번주소', '건축년도']).transform('mean')['단가(만원/㎡)']
concat_df['지수적용단가평균'] = concat_df[['지번주소', '건축년도', '지수적용단가']].groupby(['지번주소', '건축년도']).transform('mean')['지수적용단가']

In [None]:
concat_df = concat_df.sort_values(['지번주소', '건축년도', '지수적용날짜']).reset_index(drop=True)
print(concat_df.shape)
concat_df.head()

In [None]:
concat_last_df = concat_df.drop_duplicates(subset=['지번주소', '건축년도'], keep='last')
print(concat_last_df.shape)
concat_last_df.head()

In [None]:
font = {'size': 16}
matplotlib.rc('font', **font)
f, ax = plt.subplots(figsize=(20,10))
plt.title('{} {} 계약날짜기준 건물연식 {}년이하 연립다세대 및 오피스텔 매매 실거래 지번별 {}곳의 전용면적 실거래가격지수 적용후 단가평균'.format(target_gu, target_dong, target_building_age, concat_last_df.shape[0]))
plt.grid(True)
sns.histplot(concat_last_df['지수적용단가'], kde=True)
plt.savefig('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/지번별_분포도.png'.format(target_gu, target_dong, date_today_record))

In [None]:
concat_last_df = concat_last_df.sort_values(['지번주소']).reset_index(drop=True)

In [None]:
concat_last_df = concat_last_df[['지번주소', '건물(단지)명', '건축년도', '전용면적(㎡)', '층', '계약날짜', '단가(만원/㎡)', '지수적용날짜', '지수적용단가', '부동산유형']]
concat_last_df.rename(columns={'계약날짜':'마지막_계약날짜'}, inplace=True)
concat_last_df.head()

In [None]:
concat_last_df.to_excel('./국토교통부_실거래가_공개시스템/집값분석/{}_{}_{}/전체_마지막_실거래가.xlsx'.format(target_gu, target_dong, date_today_record), index=False)