In [1]:
import multiprocess as mp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
from tqdm import tqdm
import requests
from urllib.parse import urlparse
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
import datetime
from sklearn.preprocessing import quantile_transform
pd.options.mode.chained_assignment = None  # default='warn'
import gc

In [2]:
def bon_bun_allocator(x):
    splitted = x.split('-')
    return int(splitted[0])

def bu_bun_allocator(x):
    splitted = x.split('-')
    if len(splitted) == 2:
        return int(splitted[1])
    elif len(splitted) == 1:
        return 0
    else:
        print('error')
        return

In [3]:
def land_plans_full_address_allocator(df):    
    bonbun = df['지번'].apply(bon_bun_allocator)
    bubun = df['지번'].apply(bu_bun_allocator)
    
    df['지번주소'] = df['법정동명'] + ' ' + bonbun.apply(str) + '-' + bubun.apply(str)
    
    return df

In [4]:
def land_plan_unstack(df):
    df = df.set_index(['지번주소', '년', '용도지역지구명']).unstack(-1).reset_index()
    
    return df

In [5]:
def san_addr_modifier(x):
    # x is a string
    
    if pd.isna(x) == True:
        return x
    else:
        splitted = x.split(' ')
        return splitted[0] + ' ' + splitted[1] + ' ' + splitted[2] + ' 산' + splitted[3]

In [6]:
def create_land_plans_df():
    
    basedir = './토지이용계획정보/'
    filenames = [f for f in os.listdir(basedir) if f.endswith('.csv')]
    
    dfs_list = []
    for file in filenames:
        df = pd.read_csv(basedir + file, encoding='euc-kr')
        df = df[['법정동명', '대장구분명', '지번', '저촉여부', '용도지역지구명', '등록일자']]
        dfs_list.append(df)
    
    df = pd.concat(dfs_list).dropna().drop_duplicates().reset_index(drop=True)
    
    df['년'] = df['등록일자'].str.split('-').apply(lambda x: x[0]).astype('int')
    
    df = land_plans_full_address_allocator(df)
    
    san_df = df[df['대장구분명'] == '임야대장']
    san_df['지번주소'] = san_df['지번주소'].apply(san_addr_modifier)
    df.loc[san_df.index, '지번주소'] = san_df['지번주소']
    
    simple_cols = ['지번주소', '저촉여부', '용도지역지구명', '년']
    
    df = df[simple_cols]
    
    df = df.drop_duplicates(subset=['지번주소', '용도지역지구명', '년'], keep='first').reset_index(drop=True)
    
    df = land_plan_unstack(df)
    
    correct_cols = []
    cols = df.columns
    for i in range(len(cols)):
        if (i == 0)|(i == 1):
            correct_cols.append(cols[i][0])
        else:
            correct_cols.append(cols[i][1])
    
    df.columns = correct_cols
    
    return df

In [7]:
%%time
land_plans_df = create_land_plans_df()
land_plans_df.shape

Wall time: 2min 12s


(4528062, 350)

In [8]:
land_plans_df = land_plans_df.sort_values(['지번주소', '년']).reset_index(drop=True)

In [9]:
unique_addrs = land_plans_df['지번주소'].unique()

In [10]:
len(unique_addrs)

983564

In [13]:
first_half = []
for i in range(int(len(unique_addrs) / 2)):
    first_half.append(unique_addrs[i])

In [14]:
first_half[:5]

['서울특별시 강남구 개포동 100-0',
 '서울특별시 강남구 개포동 101-0',
 '서울특별시 강남구 개포동 102-0',
 '서울특별시 강남구 개포동 103-0',
 '서울특별시 강남구 개포동 104-1']

In [15]:
first_half_df = land_plans_df[land_plans_df['지번주소'].isin(first_half)]
first_half_df.shape

(2100736, 350)

In [21]:
second_half = []
for i in range(int(len(unique_addrs)/2), int(len(unique_addrs))):
    second_half.append(unique_addrs[i])

In [22]:
second_half[:5]

['서울특별시 서대문구 연희동 344-233',
 '서울특별시 서대문구 연희동 344-24',
 '서울특별시 서대문구 연희동 344-25',
 '서울특별시 서대문구 연희동 344-26',
 '서울특별시 서대문구 연희동 344-27']

In [23]:
second_half_df = land_plans_df[land_plans_df['지번주소'].isin(second_half)]
second_half_df.shape

(2427326, 350)

In [24]:
def create_complete_land_plans_df_iteratively(df):
    import numpy as np
    import pandas as pd
    import gc
    
    df = df.copy()
    
    dfs_list = []
    count = 0
    for addr in df['지번주소'].unique():
        if (count%5000) == 0:
            gc.collect()
        
        addr_df = df[df['지번주소'] == addr]
        
        if addr_df.shape[0] == 1:
            dfs_list.append(addr_df)
            continue
        
        addr_df = addr_df.fillna(method='ffill')
        
        last_df = addr_df.iloc[[-1],:]
        
        dfs_list.append(last_df)
                
        count += 1

    concat_df = pd.concat(dfs_list).reset_index(drop=True)
    
    return concat_df

In [17]:
def parallelize(df, func, num_processors=6):
    #data_split = np.array_split(data, num_processors)
    
    data_split = []
    unique_addrs = df['지번주소'].unique()
    divided = int(len(unique_addrs) / num_processors)
    for i in range(num_processors):
        if i < (num_processors - 1):
            picked_addrs = unique_addrs[divided*i:divided*(i+1)]
        else:
            picked_addrs = unique_addrs[divided*i:]
        data_split.append(df[df['지번주소'].isin(picked_addrs)])
            
    pool = mp.Pool(num_processors)
    concat_df = pd.concat(pool.map(func, data_split))
    pool.close()
    pool.join()
    
    concat_df = concat_df.sort_values(['지번주소', '년']).reset_index(drop=True)
    
    return concat_df

In [25]:
%%time
completed_df = parallelize(second_half_df, create_complete_land_plans_df_iteratively)
print(completed_df.shape)
completed_df.head()

(491782, 350)
Wall time: 1h 16min 27s


Unnamed: 0,지번주소,년,(한강)오염행위 제한지역,(한강)폐기물매립시설 설치제한지역,4대문안,가로구역별 최고높이 제한지역,가스공급설비,가스공급시설,가축사육제한구역,개발제한구역,개발진흥지구,개발행위허가제한지역,건축선,건축용도지역기타,건축용도지역미분류,건축허가·착공제한지역,경관광장,경관녹지,경관지구,경관지구기타,고가도로,고도지구,고등학교,고속철도,고압선,공간시설미분류,공공공지,공공도서관,공공문화체육시설미분류,공공시설구역,공공시설용지,공공주택지구,공공지원민간임대주택 공급촉진지구,공공청사,공동구,공설화장시설,공용시설보호지구,공원,공원마을지구(공원집단시설지구),공원문화유산지구,공원자연보존지구,공원자연환경지구,공익용산지,공익임지,공장설립승인지역,공장설립제한지역,공항,공항소음피해예상지역,공항소음피해지역,공항시설보호지구,과밀억제권역,광로1류(폭 70M 이상),광로2류(폭 50M~70M),광로3류(폭 40M~50M),광역계획구역,광역복합환승센터,광장,교육환경보호구역,교차점광장,교통광장,교통운수시설미분류,국가산업단지,국가지정문화재구역,국가하천,국립공원,국민임대주택단지예정지구,국지도로,국토이용기타용도지구,국토이용용도지구기타,군사기지 및 군사시설 보호구역,군사시설 보호구역,궤도,근린공원,근린광장,근린상업지역,기업형임대주택 공급촉진지구,기타공공공지시설,기타공공청사시설,기타공원시설,기타교통시설,기타녹지시설,기타도로시설,기타도시공간시설,기타도시방재시설,기타문화시설,기타방송통신시설,기타보건위생시설,기타사회복지시설,기타수도시설,기타시장시설,기타열공급설비,기타용도지역지구기타,기타용도지역지구미분류,기타용지,기타유통및공급시설,기타유통업무설비,기타자동차정류장,기타전기공급설비,기타주차장시설,기타철도시설,기타폐기물처리시설,기타하수도시설,기타학교시설,기타환경기초시설,노외주차장,녹지,농수산물공판장및농수산물종합유통센터,대공방어협조구역,대공방어협조구역(위탁고도:54-236m),대공방어협조구역(위탁고도:77-257m),대로1류(폭 35M~40M),대로2류(폭 30M~35M),대로3류(폭 25M~30M),대학,도로,도로구역,도서관,도시개발구역,도시개발구역기타,도시고속도로,도시관리계획 입안중,도시기타용도지역지구기타,도시기타용도지역지구미분류,도시기타용도지역지구용도지역지구,도시자연공원,도시자연공원구역,도시지역,도시지역기타,도시철도,등록문화재구역,묘지공원,문화공원,문화시설,문화재,문화재보존영향 검토대상구역,문화재보호구역,문화재보호구역기타,문화지구,미관지구기타,박물관,방송통신시설,방수설비,방재지구,방화지구,배수시설,변전소(전원개발사업구역),변전시설,보건위생시설미분류,보전녹지지역,보전산지,보전임지,보조간선도로,보존지구,보행자전용도로,부설주차장,비오톱1등급,비행안전제1구역(전술),비행안전제2구역(전술),비행안전제2구역(지원),비행안전제3구역(전술),비행안전제3구역(지원),비행안전제4구역(전술),비행안전제4구역(지원),비행안전제5구역(전술),비행안전제5구역(지원),비행안전제6구역(전술),사고지,사방시설,사방지,사업지역기타,사업지역미분류,사회복지시설,산업개발진흥지구,산업기술단지,산업시설구역,상대보호구역,상대정화구역,상수원보호구역,상수원보호기타,생산녹지지역,생태·경관보전지역,생태·경관완충보전구역,생태·경관핵심보전구역,소공원,소로1류(폭 10M~12M),소로2류(폭 8M~10M),소로3류(폭 8M 미만),소하천,소하천구역,소하천예정지,수도공급시설,수질오염방지시설,수평표면구역,시·도 생태·경관보전지역,시·도야생생물보호구역,시가지경관지구,시가지조성사업지역,시도지정문화재구역,시장,시장정비구역,시장정비구역기타,시험림구역,아파트지구,액화석유가스충전시설,야생생물보호구역,어린이공원,여객자동차터미널,역사도심,역사문화미관지구,역사문화특화경관지구,역사문화환경보존지역,연결녹지,연구개발특구,연구시설,열공급설비,온천공보호구역,온천원보호지구,완충녹지,용도구역기타,용도구역미분류,용도지구취락지구,운동장,원추표면구역,원형보존지,유류저장및송유설비,유수시설,유수지,유원지,유치원,유통단지,유통상업지역,유통업무설비,일단의공업용지조성사업지역,일단의주택단지조성사업지역,일반광장,일반도로,일반미관지구,일반산업단지,일반상업지역,일반주거지역,일반철도,임업용산지,자동차검사시설,자동차운전학원,자동차전용도로,자동차정류장,자연경관지구,자연공원용도지구기타,자연녹지지역,자연재해위험지구,장애물제한표면구역,재개발구역,재개발구역기타,재정비촉진지구,재정비촉진지구기타,재해위험지구기타,재활용시설,저류시설,전기공급설비,전용주거지역,전이표면구역,전통사찰보존구역,절대보호구역,절대정화구역,정비구역,정비구역기타,정비예정구역,제1종일반주거지역,제1종전용주거지역,제1종지구단위계획구역,제2종일반주거지역,제2종전용주거지역,제3종 구역,제3종일반주거지역,제방,제한보호구역,제한보호구역(방공기지 : 1km),제한보호구역(전술항공:5km),제한보호구역(후방지역:500m),조례로정한지역,조망가로미관지구,조망가로특화경관지구,종합운동장,종합의료시설,주간선도로,주거용지,주거환경개선지구,주요시설광장,주차장,주차장기타,주차환경개선지구,준공업지역,준보전산지,준주거지역,중로1류(폭 20M~25M),중로2류(폭 15M~20M),중로3류(폭 12M~15M),중심대광장,중심상업지역,중심지미관지구,중요시설물보존지구,중요시설물보호지구,중요시설물보호지구(공용),중요시설물보호지구(공항),중점경관관리구역,중학교,지구단위계획구역,지역특화발전특구,지원시설구역,지정문화재구역,지하광장,지하도로,진입표면구역,집단취락지구,집산도로,철도,청사,청소년수련시설,체육공원,체육시설,초등학교,최고고도지구,취수시설,침수위험지구,택지개발예정지구,택지개발예정지구기타,토지거래계약에관한허가구역,토지구획정리사업지구기타,토지형질변경규제지역,통제보호구역,통제보호구역(민통선이남:300m),통제보호구역(방공기지:500m),특수도로(보행자전용도로),특정개발진흥지구,특화경관지구,폐기물처리및재활용시설,폐기물처리시설,하수도,하수종말처리시설,하천,하천구역,하천미분류,학교,학교시설보호지구,학교이적지,학교환경위생 정화구역,현상변경허가 대상구역,홍수관리구역,화장장,환경정비구역
0,서울특별시 서대문구 연희동 344-233,2020,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,접함,,,,,,,저촉,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,접함,,,,,,,,,,,,,,,,저촉,저촉,,,,저촉,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,접함,,,,,,,
1,서울특별시 서대문구 연희동 344-24,2017,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,접함,,,,,,,저촉,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,포함,포함,,,,저촉,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,서울특별시 서대문구 연희동 344-25,2017,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,접함,,,,,,,접함,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,접함,,,,,,,,,,,,,,,,포함,포함,,,,접함,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,서울특별시 서대문구 연희동 344-26,2020,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,접함,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,서울특별시 서대문구 연희동 344-27,2017,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,저촉,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,저촉,저촉,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,포함,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [26]:
completed_df.to_csv('./토지이용계획정보/seoul_land_plans_ver_1_second_half.csv', index=False)