In [1]:
# import libraries

import sys
import os
from pathlib import Path
from datetime import datetime
import time

import pandas as pd
import numpy as np
import json
import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import matplotlib.font_manager as fm

plt.rcParams['figure.figsize'] = (25,15)
plt.rcParams['font.family'] = 'NanumGothic'

import folium
import geopandas as gpd
import fiona
from shapely.geometry import Point, LineString
import pyproj

import warnings
warnings.filterwarnings(action='ignore')

import io
from PIL import Image

import selenium

print(sys.version)

3.7.10 (default, Feb 26 2021, 13:06:18) [MSC v.1916 64 bit (AMD64)]


## 건축물대장

In [2]:
# 건축물대장 데이터 불러오기

building = pd.read_csv('data/4.부산시남구_건축물대장(표제부).csv')
building.head(5)

Unnamed: 0,loc,sgg_cd,emd_cd,land_gbn,bun,ji,reg_cd,reg_fld_cd,address,emd_nm,...,in_mec_area,out_mec_num,out_mec_area,in_sfp_num,in_sfp_area,out_sfp_num,out_sfp_area,us_athr_date,lon,lat
0,부산광역시 남구 대연동 1-4번지,26290,10600,0,"""0001""","""0004""",일반,일반건축물,,,...,0.0,0,0.0,0,0.0,0,0.0,19750211.0,129.101521,35.136334
1,부산광역시 남구 대연동 1-4번지,26290,10600,0,"""0001""","""0004""",일반,일반건축물,,,...,0.0,0,0.0,0,0.0,0,0.0,19850629.0,129.101521,35.136334
2,부산광역시 남구 대연동 10-12번지,26290,10600,0,"""0010""","""0012""",일반,일반건축물,부산광역시 남구 황령대로 484,,...,0.0,0,0.0,0,0.0,0,0.0,19860912.0,129.106861,35.138476
3,부산광역시 남구 대연동 10-2번지,26290,10600,0,"""0010""","""0002""",집합,표제부,부산광역시 남구 황령대로 486,,...,0.0,0,0.0,0,0.0,0,0.0,19850605.0,129.106914,35.138322
4,부산광역시 남구 대연동 1002-10번지,26290,10600,0,"""1002""","""0010""",집합,표제부,부산광역시 남구 유엔로137번길 46,대동레미안VI,...,0.0,0,0.0,10,131.5,2,18.7,20141222.0,129.090568,35.132062


In [3]:
# 각 칼럼별 데이터 확인

building['sgg_cd'].value_counts()

26290    26101
Name: sgg_cd, dtype: int64

In [4]:
building['emd_cd'].value_counts()

10600    8569
10900    6352
10700    4594
11100    3362
11000    2121
10800    1103
Name: emd_cd, dtype: int64

In [5]:
building['mbd_sep_cd'].value_counts()

주건축물     25548
부속건축물      552
Name: mbd_sep_cd, dtype: int64

In [6]:
building['mus_cd_nm'].value_counts()

단독주택          16904
공동주택           3666
제2종근린생활시설      1903
제1종근린생활시설      1591
교육연구시설          527
공장              293
창고시설            241
업무시설            149
종교시설            138
자동차관련시설         117
노유자시설           107
위험물저장및처리시설       99
운수시설             78
숙박시설             76
판매시설             43
교정및군사시설          30
운동시설             28
문화및집회시설          26
의료시설             24
자원순환관련시설         13
동.식물관련시설         10
분뇨.쓰레기처리시설       10
교육연구및복지시설         8
위락시설              5
방송통신시설            3
근린생활시설            2
관광휴게시설            2
Name: mus_cd_nm, dtype: int64

In [7]:
building['etc_us_info'].value_counts() # '주택'이란 단어를 포함한 데이터만 남기는 것을 고려해 볼 필요성 있음

주택                                9008
단독주택                              3436
공동주택                              1528
다세대주택                             1293
근린생활시설                             581
                                  ... 
다세대주택(도시형생활주택-원룸형)및 업무시설(오피스텔)       1
영유아보육시설                              1
교육연구시설(대학교-사택)                       1
교육연구시설및복지지설                          1
산신각                                  1
Name: etc_us_info, Length: 1434, dtype: int64

In [8]:
building['house_num'].value_counts()

0      21157
8        723
4        573
3        496
1        491
       ...  
294        1
379        1
41         1
325        1
192        1
Name: house_num, Length: 199, dtype: int64

In [9]:
building['fam_num'].value_counts()

1      15189
0       7539
2       1387
3        852
4        548
5        193
6        136
7         40
8         38
9         27
12        27
15        16
10        15
14        11
11        11
16         7
13         7
19         6
18         4
23         3
27         2
24         2
48         2
36         2
17         2
51         2
40         2
53         2
31         2
99         1
116        1
84         1
50         1
78         1
37         1
101        1
57         1
33         1
21         1
192        1
22         1
60         1
26         1
149        1
34         1
75         1
30         1
41         1
20         1
25         1
400        1
28         1
86         1
52         1
95         1
70         1
Name: fam_num, dtype: int64

In [10]:
building['reg_fld_cd'].value_counts()

일반건축물    22163
표제부       3938
Name: reg_fld_cd, dtype: int64

In [11]:
# 건축물대장 데이터전처리

# 불필요한 칼럼 제거
new_building = building.drop(columns=['loc', 'sgg_cd', 'land_gbn', 'bun', 'ji', 'emd_nm','address'])

# us_athr_date의 데이터 타입을 날짜 형식으로 바꿔준다
new_building['us_athr_date'] = pd.to_datetime(new_building['us_athr_date'], format='%Y%m%d', errors='coerce')

print(new_building['us_athr_date'].value_counts())
print(new_building['us_athr_date'].isna().value_counts()) # 세분화해서 datetime을 지정하는 방법을 생각해보거나, null값을 제외하고 하는 방법 중 어떤 것이 나은 지 생각해봐야 함

new_building.head(5)

1985-06-29    2606
1974-04-20     224
2003-08-01     171
1985-06-24      62
2018-02-27      58
              ... 
2011-12-20       1
1973-05-02       1
2001-04-13       1
2003-05-22       1
1992-03-10       1
Name: us_athr_date, Length: 8052, dtype: int64
False    21727
True      4374
Name: us_athr_date, dtype: int64


Unnamed: 0,emd_cd,reg_cd,reg_fld_cd,mbd_sep_cd,mus_cd_nm,etc_us_info,house_num,fam_num,grd_flr_num,ugr_flr_num,...,in_mec_area,out_mec_num,out_mec_area,in_sfp_num,in_sfp_area,out_sfp_num,out_sfp_area,us_athr_date,lon,lat
0,10600,일반,일반건축물,주건축물,단독주택,주택,0,1,1,0,...,0.0,0,0.0,0,0.0,0,0.0,1975-02-11,129.101521,35.136334
1,10600,일반,일반건축물,주건축물,단독주택,주택,0,1,1,0,...,0.0,0,0.0,0,0.0,0,0.0,1985-06-29,129.101521,35.136334
2,10600,일반,일반건축물,주건축물,제2종근린생활시설,근린생활시설,0,0,5,1,...,0.0,0,0.0,0,0.0,0,0.0,1986-09-12,129.106861,35.138476
3,10600,집합,표제부,주건축물,제2종근린생활시설,"근린생활시설, 교육연구시설, 업무시설",0,0,5,1,...,0.0,0,0.0,0,0.0,0,0.0,1985-06-05,129.106914,35.138322
4,10600,집합,표제부,주건축물,공동주택,"공동주택,업무시설",10,0,6,0,...,0.0,0,0.0,10,131.5,2,18.7,2014-12-22,129.090568,35.132062


In [12]:
# in_mec_num (옥내기계식 대수), in_mec_area (옥내기계식 면적), out_mec_num (옥외 기계식 대수), out_mec_area (옥외 기계식 면적)
# in_sfp_num (옥내 자주식 대수), in_sfp_area (옥내 자주식 면적), out_sfp_num (옥외 자주식 대수), out_sfp_area (옥외 자주식 면적)
# 를 기준으로 최댓값, 평균 등을 알아본다

new_building.describe(include='all')

Unnamed: 0,emd_cd,reg_cd,reg_fld_cd,mbd_sep_cd,mus_cd_nm,etc_us_info,house_num,fam_num,grd_flr_num,ugr_flr_num,...,in_mec_area,out_mec_num,out_mec_area,in_sfp_num,in_sfp_area,out_sfp_num,out_sfp_area,us_athr_date,lon,lat
count,26101.0,26101,26101,26100,26093,26085,26101.0,26101.0,26101.0,26101.0,...,26101.0,26101.0,26101.0,26101.0,26101.0,26101.0,26101.0,21727,26101.0,26101.0
unique,,2,2,2,27,1434,,,,,...,,,,,,,,8052,,
top,,일반,일반건축물,주건축물,단독주택,주택,,,,,...,,,,,,,,1985-06-29 00:00:00,,
freq,,22163,22163,25548,16904,9008,,,,,...,,,,,,,,2606,,
first,,,,,,,,,,,...,,,,,,,,1945-04-18 00:00:00,,
last,,,,,,,,,,,...,,,,,,,,2021-04-22 00:00:00,,
mean,10795.969503,,,,,,3.619785,1.130263,2.531819,0.192062,...,1.067942,0.018773,0.057186,11.955212,94.793157,3.725681,32.602173,,129.087684,35.129549
std,179.315001,,,,,,18.683339,3.821822,3.50036,0.464947,...,20.016224,0.727743,2.048367,219.325484,1441.106458,44.377475,502.756839,,0.014734,0.010491
min,10600.0,,,,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,129.063702,35.091176
25%,10600.0,,,,,,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,129.075154,35.121008


## 주택노후도

In [13]:
# 주택노후도 데이터 불러오기

house_old = gpd.read_file('data/6.부산시_남구_주택노후도.geojson')
house_old.head(5)

Unnamed: 0,emd_cd,emd_nm,spc_grd_cd,spc_grd_info,jibun,co_bd_cd,co_bd_nm,bd_nm,dong,bd_area,mus_cd,mus_cd_nm,bd_h,grd_flr_num,ugr_flr_num,athr_date,us_athr_date,bd_age,geometry
0,2629010600,부산광역시 남구 대연동,1,일반,6-5,2,집합건축물,레이듐오피스텔,,1830.38,14000,업무시설,29.4,9,1,2001-04-10,2002-07-26,20.0,"MULTIPOLYGON (((129.10670 35.13866, 129.10663 ..."
1,2629010600,부산광역시 남구 대연동,1,일반,6-8,2,집합건축물,대박세븐,,919.73,2000,공동주택,17.4,6,0,2012-03-13,2012-07-26,10.0,"MULTIPOLYGON (((129.10617 35.13848, 129.10616 ..."
2,2629010600,부산광역시 남구 대연동,1,일반,6-10,1,일반건축물,,,380.05,3000,제1종근린생활시설,0.0,5,1,1988-05-10,1988-11-17,34.0,"MULTIPOLYGON (((129.10645 35.13865, 129.10638 ..."
3,2629010600,부산광역시 남구 대연동,1,일반,6-27,2,집합건축물,유니온 해오름,유니온 해오름,386.66,2000,공동주택,13.2,5,0,2003-06-23,2004-02-26,18.0,"MULTIPOLYGON (((129.10608 35.13825, 129.10595 ..."
4,2629010600,부산광역시 남구 대연동,1,일반,6-28,2,집합건축물,채송화뜰,,467.205,2000,공동주택,15.8,6,0,2012-05-15,2013-02-28,9.0,"MULTIPOLYGON (((129.10619 35.13835, 129.10612 ..."


In [14]:
house_old['emd_cd'].value_counts() # 10600 = 대연동, 10900 = 문현동, 10700 = 용호동, 11100 = 감만동, 11000 = 우암동, 10800 = 용당동

2629010600    7135
2629010900    4674
2629010700    3810
2629011100    2683
2629011000    1463
2629010800     769
Name: emd_cd, dtype: int64

In [15]:
house_old['emd_nm'].value_counts()

부산광역시 남구 대연동    7135
부산광역시 남구 문현동    4674
부산광역시 남구 용호동    3810
부산광역시 남구 감만동    2683
부산광역시 남구 우암동    1463
부산광역시 남구 용당동     769
Name: emd_nm, dtype: int64

In [16]:
house_old['spc_grd_cd'].value_counts() # 1 = 일반, 2 = 산

1    20384
2      150
Name: spc_grd_cd, dtype: int64

In [17]:
house_old['spc_grd_info'].value_counts()

일반    20384
산       150
Name: spc_grd_info, dtype: int64

In [18]:
house_old['co_bd_cd'].value_counts() #1 = 일반건축물, 2 = 집합건축물

1    17377
2     3157
Name: co_bd_cd, dtype: int64

In [19]:
house_old['co_bd_nm'].value_counts()

일반건축물    17377
집합건축물     3157
Name: co_bd_nm, dtype: int64

In [20]:
house_old['dong'].value_counts()

주건축물제1동    110
에이동         82
비동          79
나동          66
가동          65
          ... 
명성리치빌        1
청명레미안        1
중앙푸르지오       1
성산그린빌        1
방제대응센터       1
Name: dong, Length: 1241, dtype: int64

In [21]:
house_old['mus_cd'].value_counts()

01000    13129
02000     2965
04000     1704
03000     1363
10000      415
17000      159
18000      123
06000      114
14000      103
11000       95
20000       77
15000       72
19000       57
07000       36
08000       34
09000       18
05000       17
13000       15
22000        8
30000        5
Z8000        5
16000        4
21000        4
Z5000        3
24000        2
27000        2
Name: mus_cd, dtype: int64

In [22]:
house_old['mus_cd_nm'].value_counts() # 주택을 제외한 나머지 시설은 제외 (X종 근린생활시설도 '상업지역'이라고 생각하면 제외해도 될 듯) = '01000','02000'만 살린다

단독주택          13129
공동주택           2965
제2종근린생활시설      1704
제1종근린생활시설      1363
교육연구시설          415
공장              159
창고시설            123
종교시설            114
업무시설            103
노유자시설            95
자동차관련시설          77
숙박시설             72
위험물저장및처리시설       57
판매시설             36
운수시설             34
문화및집회시설          20
의료시설             18
운동시설             15
분뇨.쓰레기처리시설        8
교육연구및복지시설         5
위락시설              4
동.식물 관련시설         4
방송통신시설            2
관광휴게시설            2
Name: mus_cd_nm, dtype: int64

In [23]:
# 주택노후도 데이터 전처리

new_house_old = house_old.drop(columns=['spc_grd_info','emd_nm','co_bd_nm','jibun', 'bd_nm', 'dong','mus_cd_nm'])

# mus_cd = 01000','02000'만 남기기
is_house1 = new_house_old['mus_cd'] == '01000'
is_house2 = new_house_old['mus_cd'] == '02000'

new_house_old = new_house_old[(is_house1) | (is_house2)].reset_index(drop=True)

# emd_cd 26290(시군구 코드)을 제외한 동 코드만 남기기
new_house_old['emd_cd'] = new_house_old['emd_cd'].astype(str)
new_house_old['emd_cd'] = new_house_old['emd_cd'].str.strip()  # 앞 뒤 공백을 제거

new_house_old['emd_cd'] =  new_house_old['emd_cd'].str[5:]

# athr_date 및 us_athr_date datetime 형태로 저장하기

new_house_old['athr_date'] = new_house_old['athr_date'].astype('str')
new_house_old['athr_date'] = new_house_old['athr_date'].str.replace('-','')
new_house_old['athr_date'] = pd.to_datetime(new_house_old['athr_date'], format='%Y%m%d', errors='coerce')

new_house_old['us_athr_date'] = new_house_old['us_athr_date'].astype('str')
new_house_old['us_athr_date'] = new_house_old['us_athr_date'].str.replace('-','')
new_house_old['us_athr_date'] = pd.to_datetime(new_house_old['us_athr_date'], format='%Y%m%d', errors='coerce')

print(new_house_old['athr_date'].isna().count())
print(new_house_old['us_athr_date'].isna().count())
new_house_old.head(5)

16094
16094


Unnamed: 0,emd_cd,spc_grd_cd,co_bd_cd,bd_area,mus_cd,bd_h,grd_flr_num,ugr_flr_num,athr_date,us_athr_date,bd_age,geometry
0,10600,1,2,919.73,2000,17.4,6,0,2012-03-13,2012-07-26,10.0,"MULTIPOLYGON (((129.10617 35.13848, 129.10616 ..."
1,10600,1,2,386.66,2000,13.2,5,0,2003-06-23,2004-02-26,18.0,"MULTIPOLYGON (((129.10608 35.13825, 129.10595 ..."
2,10600,1,2,467.205,2000,15.8,6,0,2012-05-15,2013-02-28,9.0,"MULTIPOLYGON (((129.10619 35.13835, 129.10612 ..."
3,10600,1,1,133.6,1000,7.3,2,0,1997-02-17,1997-07-03,25.0,"MULTIPOLYGON (((129.10626 35.13828, 129.10614 ..."
4,10600,1,1,29.75,1000,0.0,1,0,NaT,NaT,,"MULTIPOLYGON (((129.10637 35.13890, 129.10631 ..."


In [24]:
new_house_old.describe()

Unnamed: 0,bd_area,bd_h,grd_flr_num,ugr_flr_num,bd_age
count,16094.0,16094.0,16094.0,16094.0,14554.0
mean,525.583725,4.074221,2.583137,0.148503,36.153016
std,2614.317221,10.866165,3.478175,0.377971,24.369451
min,0.0,0.0,0.0,0.0,4.0
25%,59.0625,0.0,1.0,0.0,31.0
50%,119.52,0.0,2.0,0.0,37.0
75%,193.0425,6.8,2.0,0.0,44.0
max,103795.1908,141.79,47.0,6.0,2012.0


## 도로명주소(건물)

In [25]:
# 도로명주소(건물) 데이터 불러오기

addr_building = gpd.read_file('data/5.부산시남구_도로명주소(건물).geojson')
addr_building.head(5)

Unnamed: 0,BDTYP_CD,BULD_NM,BULD_NM_DC,BULD_SE_CD,BUL_MAN_NO,EMD_CD,GRO_FLO_CO,LNBR_MNNM,LNBR_SLNO,UND_FLO_CO,geometry
0,11102,엔터모텔,,0,28791,109,0,807,5,0,"MULTIPOLYGON (((129.06709 35.13758, 129.06721 ..."
1,15001,동명주유소,,0,28790,109,0,807,4,0,"MULTIPOLYGON (((129.06721 35.13761, 129.06728 ..."
2,1001,,,0,11855,109,0,375,2,0,"MULTIPOLYGON (((129.06846 35.13766, 129.06848 ..."
3,4999,미래덴탈,,0,28788,109,0,807,2,0,"MULTIPOLYGON (((129.06738 35.13775, 129.06742 ..."
4,4999,신화빌딩,,0,28688,109,6,376,4,1,"MULTIPOLYGON (((129.06824 35.13779, 129.06826 ..."


In [26]:
addr_building['EMD_CD'].value_counts()

106    8847
109    7443
107    4465
111    4021
110    2901
108    1181
Name: EMD_CD, dtype: int64

In [27]:
addr_building['BULD_NM_DC'].value_counts()

A동         90
B동         86
101동       64
102동       60
1동         59
           ..
298         1
245/246     1
243/244     1
251         1
평화교회        1
Name: BULD_NM_DC, Length: 729, dtype: int64

In [28]:
# 도로명주소(건물) 데이터 전처리

# 불필요한 칼럼 정리
#addr_building = addr_building.drop(columns = ['BUL_MAN_NO', 'BULD_NM', 'BULD_NM_DC'])

# 지번 주소 합치기 (다른 데이터도 지번주소가 필요할 지 생각해봐야 함) 
#addr_building

# 지번 vs 번지: http://www.neonet.co.kr/novo-rebank/view/community/CommunityDetail.neo?board_gbn=A&id=668968

In [29]:
#addr_building['LNBR_MNNM']
#addr_building['LNBR_SLNO']

#JIBUN

## 공시지가

In [30]:
# 공시지가 데이터 불러오기

house_prc = gpd.read_file('data/10.부산시남구_공시지가.geojson')
house_prc.head(5)

Unnamed: 0,index,emd_cd,emd_nm,reg_cd,reg_nm,jibun,jimok,year,month,lnd_prc,stndrd_yn,jimok_cd,jimok_nm,lnd_area,date,geometry
0,2629010700105620003,2629010700,부산광역시 남구 용호동,1,일반,562-3,562-3대,2020.0,1.0,1017000,0.0,,,0.0,2021-05-18,"MULTIPOLYGON (((129.11309 35.10997, 129.11292 ..."
1,2629010700105620004,2629010700,부산광역시 남구 용호동,1,일반,562-4,562-4대,2020.0,1.0,960000,0.0,,,0.0,2021-05-18,"MULTIPOLYGON (((129.11326 35.11000, 129.11309 ..."
2,2629010700105620002,2629010700,부산광역시 남구 용호동,1,일반,562-2,562-2대,2020.0,1.0,1160000,0.0,,,0.0,2021-05-18,"MULTIPOLYGON (((129.11307 35.11004, 129.11290 ..."
3,2629011000101240000,2629011000,부산광역시 남구 우암동,1,일반,124,124종,2020.0,1.0,726200,0.0,,,0.0,2021-05-18,"MULTIPOLYGON (((129.07232 35.12728, 129.07236 ..."
4,2629011000101240033,2629011000,부산광역시 남구 우암동,1,일반,124-33,124-33 종,,,0,,,,0.0,2021-05-18,"MULTIPOLYGON (((129.07201 35.12733, 129.07201 ..."


In [31]:
house_prc.reg_cd.value_counts()

1    44218
2     1549
6        9
4        1
Name: reg_cd, dtype: int64

In [32]:
house_prc.reg_nm.value_counts()

일반            44218
산              1549
블럭지번(롯트세분)        9
가지번(부번세분)         1
Name: reg_nm, dtype: int64

In [33]:
house_prc.year.value_counts()

2020    45447
Name: year, dtype: int64

In [34]:
house_prc.month.value_counts()

01    45128
07      319
Name: month, dtype: int64

In [35]:
house_prc.date.value_counts()

2021-05-18    45777
Name: date, dtype: int64