# 필요한 모듈 import

In [243]:
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import geopandas as gpd
import warnings
warnings.filterwarnings(action='ignore')


import requests
from bs4 import BeautifulSoup

# 데이터 불러오기

<b> 지역 특성 </b>
- 임대 시세
- 월 평균 매출액
- 여성인구 비율(유동인구)
- 유동인구, 직장인구, 주거인구
- 가구 수, 소득분위
- 횡단보도 수
- 주차장 수
- 지하철 수
- 집객 시설(은행, 병원, 학교, 버스 정류장 등)
- 공시지가

## 임대 시세
- https://www.bigdata-policing.kr/policy/contents/policy-dataSearch.do?schM=View&dbiIdx=79
- 2014 ~ 2020년 데이터
- null값 없음
- 서울시 전체 행정동은 426개동, 데이터는 총 423개의 행정동
- 전체층, 1층, 1층이외로 구성
- 점포 면적 3.3㎡ 당 월평균 수치

In [2]:
rent = pd.read_csv('./data/rent.csv')
rent.rename(columns={'행정동 코드':'행정동명'}, inplace=True)
rent.head(2)

Unnamed: 0,기준 년 코드,기준 분기 코드,행정동명,임대시세 층구분 코드,임대시세 층구분 코드명,보증금 평균,월임대료 평균,환산 임대료 평균,임대건수
2014,4,11110515,청운효자동,0,전체층,2344938,75635,99084,19
2014,4,11110515,청운효자동,1,1층,2902696,88971,117998,11


In [3]:
print(len(rent['행정동명'].unique()))

423


In [4]:
rent['임대시세 층구분 코드명'].unique()

array(['전체층', '1층', '1층외'], dtype=object)

In [280]:
rent.reset_index(inplace=True)

In [283]:
rent.rename(columns={'index':'기준 년'}, inplace=True)

In [284]:
rent.to_csv('./dataset/임대시세.csv', index=False)

## 월 평균 매출액

- https://data.seoul.go.kr/dataList/OA-15572/S/1/datasetView.do
- 2014년부터 2020년 데이터
- 요일/시간대별, 연령/성별 매출 건수와 금액

In [5]:
sale = []
for i in tqdm([2014, 2015, 2016, 2017, 2018, 2019, 2020]):
    tmp = pd.read_csv('./data/서울시 우리마을가게 상권분석서비스(상권-추정매출)_{}.csv'.format(i), encoding='cp949')
    sale.append(tmp)
sale = pd.concat(sale)

100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:07<00:00,  1.08s/it]


In [6]:
sale.shape

(1091003, 80)

In [7]:
# 서비스 업종에서 '커피-음료'만 추출
coffee_sale = sale[sale['서비스_업종_코드_명']=='커피-음료']

In [8]:
len(coffee_sale['상권_코드_명'].unique())

1329

In [9]:
print(coffee_sale.shape)
coffee_sale.head(2)

(34201, 80)


Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,분기당_매출_금액,분기당_매출_건수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
6,2014,1,A,골목상권,1000393,갈현로7길,CS100010,커피-음료,3730959.0,435,...,23,106,313,0,53,180,99,37,51,1
21,2014,1,A,골목상권,1000271,동소문로2길,CS100010,커피-음료,41289452.0,5207,...,350,1913,3109,24,2129,1560,701,398,215,3


---

In [10]:
area = gpd.read_file(r'./data/TBGIS_TRDAR_RELM.shp',encoding='CP949')
area.head()

Unnamed: 0,TRDAR_SE_C,TRDAR_SE_1,TRDAR_CD,TRDAR_CD_N,XCNTS_VALU,YDNTS_VALU,SIGNGU_CD,ADSTRD_CD,STDR_YM_CD,geometry
0,R,전통시장,1001453,낙성대시장,196121,442084,11620,11620585,201810,"POLYGON ((196213.760 442152.080, 196186.890 44..."
1,R,전통시장,1001454,봉천제일종합시장,195147,442413,11620,11620595,201810,"POLYGON ((195242.520 442426.730, 195236.250 44..."
2,R,전통시장,1001474,도곡시장,204551,444227,11680,11680650,201810,"POLYGON ((204621.690 444236.130, 204643.430 44..."
3,R,전통시장,1001475,강남개포시장,206065,443310,11680,11680670,201810,"POLYGON ((205983.140 443324.320, 206105.890 44..."
4,R,전통시장,1001412,화곡본동시장,186203,449328,11500,11500590,201810,"POLYGON ((186224.350 449258.550, 186195.020 44..."


In [11]:
# 컬럼 이름 변경
area.rename(columns= {'TRDAR_SE_C':'상권_구분_코드', 'TRDAR_SE_1':'상권_구분_코드_명', 'TRDAR_CD':'상권_코드',\
                      'TRDAR_CD_N':'상권_코드_명', 'XCNTS_VALU':'x좌표값', 'YDNTS_VALU':'y좌표값', \
                      'SIGNGU_CD':'시군구_코드', 'ADSTRD_CD':'행정동_코드', 'STDR_YM_CD':'기준_년월_코드'}, inplace=True)
area['상권_코드'] = area['상권_코드'].astype(int)

In [12]:
code = pd.read_csv('./data/행정동코드.csv', index_col=0)
code.head(2)

Unnamed: 0,통계청행정동코드,행자부행정동코드,시도명,시군구명,행정동명
0,1101053,11110530,서울,종로구,사직동
1,1101054,11110540,서울,종로구,삼청동


In [13]:
# 컬럼 이름 변경
code.rename(columns= {'H_SDNG_CD(통계청행정동코드)':'통계청행정동코드', 'H_DNG_CD(행자부행정동코드)':'행자부행정동코드',
                      'DO_NM(시도명)':'시도명', 'CT_NM(시군구명)':'시군구명', 'H_DNG_NM(행정동명)':'행정동명'}, inplace=True)

In [14]:
coffee_sale = pd.merge(coffee_sale, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [15]:
coffee_sale[coffee_sale['행정동_코드'].isnull()]['상권_코드'].unique()

array([1001252], dtype=int64)

In [16]:
coffee_sale[coffee_sale['행정동_코드'].isnull()]['상권_코드_명'].unique()

array(['서울 성북구 미아삼거리역_2'], dtype=object)

In [17]:
area[area['상권_코드'] == 1001252]

Unnamed: 0,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,x좌표값,y좌표값,시군구_코드,행정동_코드,기준_년월_코드,geometry
1227,D,발달상권,1001252,서울 강북구 미아삼거리역_2,202544,457073,11290,11290685,201810,"MULTIPOLYGON (((202667.740 456954.570, 202675...."


- 두 데이터를 합친 결과에서 행정동코드가 null값인 지역은 상권 코드명이 일치하지 않기 때문에 null값으로 처리됐음\
- '상권_코드'는 '1001252'로 일치하고 미아삼거리역을 검색했을 때 강북구로 나오기 때문에 coffee_sale의 '상권_코드_명'을\
- 성북구에서 강북구로 변경해주고 행정동 코드(11290685)를 추가해줌

In [18]:
coffee_sale.loc[coffee_sale[coffee_sale['행정동_코드'].isnull()]['상권_코드_명'].index, '상권_코드_명'] = '서울 강북구 미아삼거리역_2'

In [19]:
coffee_sale.loc[coffee_sale[coffee_sale['행정동_코드'].isnull()]['상권_코드_명'].index, '행정동_코드'] = 11290685

In [20]:
coffee_sale.isnull().sum()

기준_년_코드            0
기준_분기_코드           0
상권_구분_코드           0
상권_구분_코드_명         0
상권_코드              0
                  ..
연령대_40_매출_건수       0
연령대_50_매출_건수       0
연령대_60_이상_매출_건수    0
점포수                0
행정동_코드             0
Length: 81, dtype: int64

In [21]:
coffee_sale.shape

(34201, 81)

In [22]:
coffee_sale['행정동_코드'] = coffee_sale['행정동_코드'].astype('int')

In [23]:
coffee_sale = pd.merge(coffee_sale, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [24]:
coffee_sale.isnull().sum()

기준_년_코드       0
기준_분기_코드      0
상권_구분_코드      0
상권_구분_코드_명    0
상권_코드         0
             ..
통계청행정동코드      0
행자부행정동코드      0
시도명           0
시군구명          0
행정동명          0
Length: 86, dtype: int64

- 결측치 하나도 없음

In [25]:
coffee_sale.head(2)

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,분기당_매출_금액,분기당_매출_건수,...,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수,행정동_코드,통계청행정동코드,행자부행정동코드,시도명,시군구명,행정동명
0,2014,1,A,골목상권,1000393,갈현로7길,CS100010,커피-음료,3730959.0,435,...,99,37,51,1,11380625,1112074,11380625,서울,은평구,역촌동
1,2014,1,A,골목상권,1000271,동소문로2길,CS100010,커피-음료,41289452.0,5207,...,701,398,215,3,11290555,1108082,11290555,서울,성북구,삼선동


In [287]:
coffee_sale.to_csv('./dataset/월 평균 매출액.csv', index=False)

## 유동인구
- https://www.bigdata-policing.kr/policy/contents/policy-dataSearch.do?schM=View&dbiIdx=91
- 2017년부터 2020년 데이터

In [26]:
url = "https://www.bigdata-policing.kr/policy/contents/policy-dataSearch.do?schM=View&dbiIdx=91"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [27]:
column = []
for i in soup.find_all("td", "koname"):
    column.append(i.text)

In [28]:
people = pd.read_csv('./data/길단위 상존인구.csv')

In [289]:
people.to_csv('./dataset/유동인구.csv', index=False)

## 직장인구
- https://data.seoul.go.kr/dataList/OA-15569/S/1/datasetView.do
- 2016년분터 2021년 데이터
- 행정동명이 없어서 매출액과 똑같은 작업해줌

In [29]:
def get_data(startNum, endNum, year):
    api_key = '554262655973756d38377443694869'
    url="http://openapi.seoul.go.kr:8088/{}/json/VwsmTrdarWrcPopltnQq/{}/{}/{}".format(api_key, startNum, endNum, year)
    res = requests.get(url)
    data = res.json()
    return data

In [30]:
work_people = []

startNum = 1
endNum = 1000

for i in tqdm([2016, 2017, 2018, 2019, 2020, 2021]):
    while endNum < 6000:
        data = get_data(startNum, endNum, i)
        startNum += 1000
        endNum += 1000
        if 'VwsmTrdarWrcPopltnQq' in data:
            for j in data['VwsmTrdarWrcPopltnQq']['row']:
                work_people.append(j)
        else:
            continue
    startNum = 1
    endNum = 1000

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:04<00:00,  1.20it/s]


In [31]:
work_people[0]

{'STDR_YY_CD': '2016',
 'STDR_QU_CD': '1',
 'TRDAR_SE_CD': 'A',
 'TRDAR_SE_CD_NM': '골목상권',
 'TRDAR_CD': '1000001',
 'TRDAR_CD_NM': '계동길',
 'TOT_WRC_POPLTN_CO': 885.0,
 'ML_WRC_POPLTN_CO': 398.0,
 'FML_WRC_POPLTN_CO': 487.0,
 'AGRDE_10_WRC_POPLTN_CO': 11.0,
 'AGRDE_20_WRC_POPLTN_CO': 225.0,
 'AGRDE_30_WRC_POPLTN_CO': 267.0,
 'AGRDE_40_WRC_POPLTN_CO': 192.0,
 'AGRDE_50_WRC_POPLTN_CO': 130.0,
 'AGRDE_60_ABOVE_WRC_POPLTN_CO': 60.0,
 'MAG_10_WRC_POPLTN_CO': 8.0,
 'MAG_20_WRC_POPLTN_CO': 69.0,
 'MAG_30_WRC_POPLTN_CO': 98.0,
 'MAG_40_WRC_POPLTN_CO': 100.0,
 'MAG_50_WRC_POPLTN_CO': 83.0,
 'MAG_60_ABOVE_WRC_POPLTN_CO': 40.0,
 'FAG_10_WRC_POPLTN_CO': 3.0,
 'FAG_20_WRC_POPLTN_CO': 156.0,
 'FAG_30_WRC_POPLTN_CO': 169.0,
 'FAG_40_WRC_POPLTN_CO': 92.0,
 'FAG_50_WRC_POPLTN_CO': 47.0,
 'FAG_60_ABOVE_WRC_POPLTN_CO': 20.0}

In [32]:
def create_df(each):
    STDR_YY_CD = each['STDR_YY_CD']
    STDR_QU_CD = each['STDR_QU_CD']
    TRDAR_SE_CD = each['TRDAR_SE_CD']
    TRDAR_SE_CD_NM = each['TRDAR_SE_CD_NM']
    TRDAR_CD = each['TRDAR_CD']
    TRDAR_CD_NM = each['TRDAR_CD_NM']
    TOT_WRC_POPLTN_CO = each['TOT_WRC_POPLTN_CO']
    ML_WRC_POPLTN_CO = each['ML_WRC_POPLTN_CO']
    FML_WRC_POPLTN_CO = each['FML_WRC_POPLTN_CO']
    AGRDE_10_WRC_POPLTN_CO = each['AGRDE_10_WRC_POPLTN_CO']
    AGRDE_20_WRC_POPLTN_CO = each['AGRDE_20_WRC_POPLTN_CO']
    AGRDE_30_WRC_POPLTN_CO = each['AGRDE_30_WRC_POPLTN_CO']
    AGRDE_40_WRC_POPLTN_CO = each['AGRDE_40_WRC_POPLTN_CO']
    AGRDE_50_WRC_POPLTN_CO = each['AGRDE_50_WRC_POPLTN_CO']
    AGRDE_60_ABOVE_WRC_POPLTN_CO = each['AGRDE_60_ABOVE_WRC_POPLTN_CO']
    MAG_10_WRC_POPLTN_CO = each['MAG_10_WRC_POPLTN_CO']
    MAG_20_WRC_POPLTN_CO = each['MAG_20_WRC_POPLTN_CO']
    MAG_30_WRC_POPLTN_CO = each['MAG_30_WRC_POPLTN_CO']
    MAG_40_WRC_POPLTN_CO = each['MAG_40_WRC_POPLTN_CO']
    MAG_50_WRC_POPLTN_CO = each['MAG_50_WRC_POPLTN_CO']
    MAG_60_ABOVE_WRC_POPLTN_CO = each['MAG_60_ABOVE_WRC_POPLTN_CO']
    FAG_10_WRC_POPLTN_CO = each['FAG_10_WRC_POPLTN_CO']
    FAG_20_WRC_POPLTN_CO = each['FAG_20_WRC_POPLTN_CO']
    FAG_30_WRC_POPLTN_CO = each['FAG_30_WRC_POPLTN_CO']
    FAG_40_WRC_POPLTN_CO = each['FAG_40_WRC_POPLTN_CO']
    FAG_50_WRC_POPLTN_CO = each['FAG_50_WRC_POPLTN_CO']
    FAG_60_ABOVE_WRC_POPLTN_CO = each['FAG_60_ABOVE_WRC_POPLTN_CO']
    
 
    result_pd = pd.DataFrame({
        "기준_년_코드": [STDR_YY_CD],
        "기준_분기_코드": [STDR_QU_CD],
        "상권_구분_코드": [TRDAR_SE_CD],
        "상권_구분_코드_명": [TRDAR_SE_CD_NM],
        "상권_코드": [TRDAR_CD],
        "상권_코드_명": [TRDAR_CD_NM],
        "총_직장_인구_수": [TOT_WRC_POPLTN_CO],
        "남성_직장_인구_수": [ML_WRC_POPLTN_CO],
        "여성_직장_인구_수": [FML_WRC_POPLTN_CO],
        "연령대_10_직장_인구_수": [AGRDE_10_WRC_POPLTN_CO],
        "연령대_20_직장_인구_수": [AGRDE_20_WRC_POPLTN_CO],
        "연령대_30_직장_인구_수": [AGRDE_30_WRC_POPLTN_CO],
        "연령대_40_직장_인구_수": [AGRDE_40_WRC_POPLTN_CO],
        "연령대_50_직장_인구_수": [AGRDE_50_WRC_POPLTN_CO],
        "연령대_60_이상_직장_인구_수": [AGRDE_60_ABOVE_WRC_POPLTN_CO],
        "남성연령대_10_직장_인구_수": [MAG_10_WRC_POPLTN_CO],
        "남성연령대_20_직장_인구_수": [MAG_20_WRC_POPLTN_CO],
        "남성연령대_30_직장_인구_수": [MAG_30_WRC_POPLTN_CO],
        "남성연령대_40_직장_인구_수": [MAG_40_WRC_POPLTN_CO],
        "남성연령대_50_직장_인구_수": [MAG_50_WRC_POPLTN_CO],
        "남성연령대_60_이상_직장_인구_수": [MAG_60_ABOVE_WRC_POPLTN_CO],
        "여성연령대_10_직장_인구_수": [FAG_10_WRC_POPLTN_CO],
        "여성연령대_20_직장_인구_수": [FAG_20_WRC_POPLTN_CO],
        "여성연령대_30_직장_인구_수": [FAG_30_WRC_POPLTN_CO],
        "여성연령대_40_직장_인구_수": [FAG_40_WRC_POPLTN_CO],
        "여성연령대_50_직장_인구_수": [FAG_50_WRC_POPLTN_CO],
        "여성연령대_60_이상_직장_인구_수": [FAG_60_ABOVE_WRC_POPLTN_CO],
        
        
    
        
    }, columns=["기준_년_코드", "기준_분기_코드", "상권_구분_코드", "상권_구분_코드_명", "상권_코드", "상권_코드_명",\
                "총_직장_인구_수", "남성_직장_인구_수", "여성_직장_인구_수", "연령대_10_직장_인구_수", "연령대_20_직장_인구_수",\
                "연령대_30_직장_인구_수", "연령대_40_직장_인구_수", "연령대_50_직장_인구_수", "연령대_60_이상_직장_인구_수",\
                "남성연령대_10_직장_인구_수", "남성연령대_20_직장_인구_수", "남성연령대_30_직장_인구_수", "남성연령대_40_직장_인구_수",\
                "남성연령대_50_직장_인구_수", "남성연령대_60_이상_직장_인구_수", "여성연령대_10_직장_인구_수", "여성연령대_20_직장_인구_수",\
                "여성연령대_30_직장_인구_수", "여성연령대_40_직장_인구_수", "여성연령대_50_직장_인구_수", "여성연령대_60_이상_직장_인구_수"])
    return result_pd

In [33]:
result = []
for each in tqdm(work_people):
    pd_result = create_df(each)
    result.append(pd_result)
result = pd.concat(result)

100%|███████████████████████████████████████████████████████████████████████████| 27950/27950 [00:35<00:00, 784.35it/s]


In [34]:
work_people = result

In [35]:
work_people['상권_코드'] = work_people['상권_코드'].astype('int')

In [36]:
work_people = pd.merge(work_people, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [37]:
work_people[work_people['행정동_코드'].isnull()]['상권_코드_명'].unique()

array(['서울 성북구 미아삼거리역_2'], dtype=object)

In [38]:
work_people.loc[work_people[work_people['행정동_코드'].isnull()]['상권_코드_명'].index, '상권_코드_명'] = '서울 강북구 미아삼거리역_2'

In [39]:
work_people.loc[work_people[work_people['행정동_코드'].isnull()]['상권_코드_명'].index, '행정동_코드'] = 11290685

In [40]:
work_people['행정동_코드'] = work_people['행정동_코드'].astype('int')

In [41]:
work_people = pd.merge(work_people, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [290]:
work_people.to_csv('./dataset/직장인구.csv', index=False)

## 생활인구(주거인구)
- https://data.seoul.go.kr/dataList/OA-15568/S/1/datasetView.do

In [296]:
def get_data(startNum, endNum, year):
    api_key = '554262655973756d38377443694869'
    url="http://openapi.seoul.go.kr:8088/{}/json/VwsmTrdarFlpopQq/{}/{}/{}".format(api_key, startNum, endNum, year)
    res = requests.get(url)
    data = res.json()
    return data

In [316]:
live = []

startNum = 1
endNum = 1000

for i in tqdm([2016, 2017, 2018, 2019, 2020, 2021]):
    while endNum < 7000:
        data = get_data(startNum, endNum, i)
        startNum += 1000
        endNum += 1000
        if 'VwsmTrdarFlpopQq' in data:
            for j in data['VwsmTrdarFlpopQq']['row']:
                live.append(j)
        else:
            continue
    startNum = 1
    endNum = 1000

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:33<00:00, 25.58s/it]


In [330]:
def create_df(each):
    STDR_YY_CD = each['STDR_YY_CD']
    STDR_QU_CD = each['STDR_QU_CD']
    TRDAR_SE_CD = each['TRDAR_SE_CD']
    TRDAR_SE_CD_NM = each['TRDAR_SE_CD_NM']
    TRDAR_CD = each['TRDAR_CD']
    TRDAR_CD_NM = each['TRDAR_CD_NM']
    TOT_FLPOP_CO = each['TOT_FLPOP_CO']
    ML_FLPOP_CO = each['ML_FLPOP_CO']
    FML_FLPOP_CO = each['FML_FLPOP_CO']
    AGRDE_10_FLPOP_CO = each['AGRDE_10_FLPOP_CO']
    AGRDE_20_FLPOP_CO = each['AGRDE_20_FLPOP_CO']
    AGRDE_30_FLPOP_CO = each['AGRDE_30_FLPOP_CO']
    AGRDE_40_FLPOP_CO = each['AGRDE_40_FLPOP_CO']
    AGRDE_50_FLPOP_CO = each['AGRDE_50_FLPOP_CO']
    AGRDE_60_ABOVE_FLPOP_CO = each['AGRDE_60_ABOVE_FLPOP_CO']
    TMZON_1_FLPOP_CO = each['TMZON_1_FLPOP_CO']
    TMZON_2_FLPOP_CO = each['TMZON_2_FLPOP_CO']
    TMZON_3_FLPOP_CO = each['TMZON_3_FLPOP_CO']
    TMZON_4_FLPOP_CO = each['TMZON_4_FLPOP_CO']
    TMZON_5_FLPOP_CO = each['TMZON_5_FLPOP_CO']
    TMZON_6_FLPOP_CO = each['TMZON_6_FLPOP_CO']
    MON_FLPOP_CO = each['MON_FLPOP_CO']
    TUES_FLPOP_CO = each['TUES_FLPOP_CO']
    WED_FLPOP_CO = each['WED_FLPOP_CO']
    THUR_FLPOP_CO = each['THUR_FLPOP_CO']
    FRI_FLPOP_CO = each['FRI_FLPOP_CO']
    SAT_FLPOP_CO = each['SAT_FLPOP_CO']
    SUN_FLPOP_CO = each['SUN_FLPOP_CO']
    
 
    result_pd = pd.DataFrame({
        "기준_년_코드": [STDR_YY_CD],
        "기준_분기_코드": [STDR_QU_CD],
        "상권_구분_코드": [TRDAR_SE_CD],
        "상권_구분_코드_명": [TRDAR_SE_CD_NM],
        "상권_코드": [TRDAR_CD],
        "상권_코드_명": [TRDAR_CD_NM],
        "총_생활인구_수": [TOT_FLPOP_CO],
        "남성_생활인구_수": [ML_FLPOP_CO],
        "여성_생활인구_수": [FML_FLPOP_CO],
        "연령대_10_생활인구_수": [AGRDE_10_FLPOP_CO],
        "연령대_20_생활인구_수": [AGRDE_20_FLPOP_CO],
        "연령대_30_생활인구_수": [AGRDE_30_FLPOP_CO],
        "연령대_40_생활인구_수": [AGRDE_40_FLPOP_CO],
        "연령대_50_생활인구_수": [AGRDE_50_FLPOP_CO],
        "연령대_60_이상_생활인구_수": [AGRDE_60_ABOVE_FLPOP_CO],
        "시간대_1_생활인구_수": [TMZON_1_FLPOP_CO],
        "시간대_2_생활인구_수": [TMZON_2_FLPOP_CO],
        "시간대_3_생활인구_수": [TMZON_3_FLPOP_CO],
        "시간대_4_생활인구_수": [TMZON_4_FLPOP_CO],
        "시간대_5_생활인구_수": [TMZON_5_FLPOP_CO],
        "시간대_6_생활인구_수": [TMZON_6_FLPOP_CO],
        "월요일_생활인구_수": [MON_FLPOP_CO],
        "화요일_생활인구_수": [TUES_FLPOP_CO],
        "수요일_생활인구_수": [WED_FLPOP_CO],
        "목요일_생활인구_수": [THUR_FLPOP_CO],
        "금요일_생활인구_수": [FRI_FLPOP_CO],
        "토요일_생활인구_수": [SAT_FLPOP_CO],
        "일요일_생활인구_수": [SUN_FLPOP_CO],

        
    }, columns=["기준_년_코드", "기준_분기_코드", "상권_구분_코드", "상권_구분_코드_명", "상권_코드", "상권_코드_명",\
                "총_생활인구_수", "남성_생활인구_수", "여성_생활인구_수", "연령대_10_생활인구_수", "연령대_20_생활인구_수",\
                "연령대_30_생활인구_수", "연령대_40_생활인구_수", "연령대_50_생활인구_수", "연령대_60_이상_생활인구_수",\
                "시간대_1_생활인구_수", "시간대_2_생활인구_수", "시간대_3_생활인구_수", "시간대_4_생활인구_수", "시간대_5_생활인구_수",\
                "시간대_5_생활인구_수", "월요일_생활인구_수", "화요일_생활인구_수", "수요일_생활인구_수", "목요일_생활인구_수",\
                "금요일_생활인구_수", "토요일_생활인구_수", "일요일_생활인구_수"])
    return result_pd

In [331]:
result = []
for each in tqdm(live):
    pd_result = create_df(each)
    result.append(pd_result)
result = pd.concat(result)

100%|███████████████████████████████████████████████████████████████████████████| 32901/32901 [00:48<00:00, 682.23it/s]


In [334]:
live = result

In [341]:
live.reset_index(drop=True, inplace=True)

In [342]:
idx = []
for i in live['상권_코드']:
    if type(i) != int:
        if not i.isdigit():
            idx.append(i)

In [343]:
remove = []
for i in range(len(live)):
    if live.loc[i, '상권_코드'] in idx:
        remove.append(i)

In [345]:
live.drop(remove, axis=0, inplace=True)

In [346]:
live.reset_index(drop=True, inplace=True)

In [349]:
live['상권_코드'] = live['상권_코드'].astype('int')
live = pd.merge(live, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [353]:
live.loc[live[live['행정동_코드'].isnull()]['상권_코드_명'].index, '상권_코드_명'] = '서울 강북구 미아삼거리역_2'

In [354]:
live.loc[live[live['행정동_코드'].isnull()]['상권_코드_명'].index, '행정동_코드'] = 11290685

In [356]:
live['행정동_코드'] = live['행정동_코드'].astype('int')
live = pd.merge(live, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [358]:
live.to_csv('./dataset/생활인구.csv', index=False)

## 가구 수, 소득분위
- https://data.seoul.go.kr/dataList/OA-15571/S/1/datasetView.do
- 2016년부터 2021년 데이터
- 행정동명 추가 작업

In [42]:
def get_data(startNum, endNum, year):
    api_key = '554262655973756d38377443694869'
    url="http://openapi.seoul.go.kr:8088/{}/json/VwsmTrdhlNcmCnsmpQq/{}/{}/{}".format(api_key, startNum, endNum, year)
    res = requests.get(url)
    data = res.json()
    return data

In [43]:
income = []

startNum = 1
endNum = 1000

for i in tqdm([2016, 2017, 2018, 2019, 2020, 2021]):
    while endNum < 5000:
        data = get_data(startNum, endNum, i)
        startNum += 1000
        endNum += 1000
        if 'VwsmTrdhlNcmCnsmpQq' in data:
            for j in data['VwsmTrdhlNcmCnsmpQq']['row']:
                income.append(j)
        else:
            continue
    startNum = 1
    endNum = 1000

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:03<00:00,  1.75it/s]


In [44]:
income[0]

{'STDR_YY_CD': '2016',
 'STDR_QU_CD': '1',
 'TRDAR_SE_CD': 'A',
 'TRDAR_SE_CD_NM': '골목상권',
 'TRDAR_CD': '1000001',
 'TRDAR_CD_NM': '계동길',
 'MT_AVRG_INCOME_AMT': 3855285.0,
 'INCOME_SCTN_CD': '08',
 'EXPNDTR_TOTAMT': 3119544685.0,
 'FDSTFFS_EXPNDTR_TOTAMT': 803203962.0,
 'CLTHS_FTWR_EXPNDTR_TOTAMT': 381050033.0,
 'LVSPL_EXPNDTR_TOTAMT': 232647671.0,
 'MCP_EXPNDTR_TOTAMT': 413106715.0,
 'TRNSPORT_EXPNDTR_TOTAMT': 536624114.0,
 'LSR_EXPNDTR_TOTAMT': 118473459.0,
 'CLTUR_EXPNDTR_TOTAMT': 91926217.0,
 'EDC_EXPNDTR_TOTAMT': 408295452.0,
 'PLESR_EXPNDTR_TOTAMT': 134217062.0}

In [45]:
def create_df(each):
    STDR_YY_CD = each['STDR_YY_CD']
    STDR_QU_CD = each['STDR_QU_CD']
    TRDAR_SE_CD = each['TRDAR_SE_CD']
    TRDAR_SE_CD_NM = each['TRDAR_SE_CD_NM']
    TRDAR_CD = each['TRDAR_CD']
    TRDAR_CD_NM = each['TRDAR_CD_NM']
    MT_AVRG_INCOME_AMT = each['MT_AVRG_INCOME_AMT']
    INCOME_SCTN_CD = each['INCOME_SCTN_CD']
    EXPNDTR_TOTAMT = each['EXPNDTR_TOTAMT']
    FDSTFFS_EXPNDTR_TOTAMT = each['FDSTFFS_EXPNDTR_TOTAMT']
    CLTHS_FTWR_EXPNDTR_TOTAMT = each['CLTHS_FTWR_EXPNDTR_TOTAMT']
    LVSPL_EXPNDTR_TOTAMT = each['LVSPL_EXPNDTR_TOTAMT']
    MCP_EXPNDTR_TOTAMT = each['MCP_EXPNDTR_TOTAMT']
    TRNSPORT_EXPNDTR_TOTAMT = each['TRNSPORT_EXPNDTR_TOTAMT']
    LSR_EXPNDTR_TOTAMT = each['LSR_EXPNDTR_TOTAMT']
    CLTUR_EXPNDTR_TOTAMT = each['CLTUR_EXPNDTR_TOTAMT']
    EDC_EXPNDTR_TOTAMT = each['EDC_EXPNDTR_TOTAMT']
    PLESR_EXPNDTR_TOTAMT = each['PLESR_EXPNDTR_TOTAMT']
    

 
    result_pd = pd.DataFrame({
        "기준_년_코드": [STDR_YY_CD],
        "기준_분기_코드": [STDR_QU_CD],
        "상권_구분_코드": [TRDAR_SE_CD],
        "상권_구분_코드_명": [TRDAR_SE_CD_NM],
        "상권_코드": [TRDAR_CD],
        "상권_코드_명": [TRDAR_CD_NM],
        "월_평균_소득_금액": [MT_AVRG_INCOME_AMT],
        "소득_구간_코드": [INCOME_SCTN_CD],
        "지출_총금액": [EXPNDTR_TOTAMT],
        "식료품_지출_총금액": [FDSTFFS_EXPNDTR_TOTAMT],
        "의류_신발_지출_총금액": [CLTHS_FTWR_EXPNDTR_TOTAMT],
        "생활용품_지출_총금액": [LVSPL_EXPNDTR_TOTAMT],
        "의료비_지출_총금액": [MCP_EXPNDTR_TOTAMT],
        "교통_지출_총금액": [TRNSPORT_EXPNDTR_TOTAMT],
        "여가_지출_총금액": [LSR_EXPNDTR_TOTAMT],
        "문화_지출_총금액": [CLTUR_EXPNDTR_TOTAMT],
        "교육_지출_총금액": [EDC_EXPNDTR_TOTAMT],
        "유흥_지출_총금액": [PLESR_EXPNDTR_TOTAMT]

        
        
        
        
    }, columns=["기준_년_코드", "기준_분기_코드", "상권_구분_코드", "상권_구분_코드_명", "상권_코드", "상권_코드_명",\
                "월_평균_소득_금액", "소득_구간_코드", "지출_총금액", "식료품_지출_총금액", "의류_신발_지출_총금액",\
                "생활용품_지출_총금액", "의료비_지출_총금액", "교통_지출_총금액", "여가_지출_총금액",\
                "문화_지출_총금액", "교육_지출_총금액", "유흥_지출_총금액"])
    return result_pd

In [46]:
result = []
for each in tqdm(income):
    pd_result = create_df(each)
    result.append(pd_result)
result = pd.concat(result)

100%|███████████████████████████████████████████████████████████████████████████| 22020/22020 [00:22<00:00, 961.07it/s]


In [47]:
income = result

In [48]:
income['상권_코드'] = income['상권_코드'].astype('int')
income = pd.merge(income, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [49]:
income['행정동_코드'] = income['행정동_코드'].astype('int')
income = pd.merge(income, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [291]:
income.to_csv('./dataset/소득분위.csv', index=False)

## 주차장 수
- https://data.seoul.go.kr/dataList/10152/S/2/datasetView.do
- 2016년부터 2020년 데이터

In [50]:
column = ['기간','자치구','동','계_개소','계_면수','공영_개소','공영_면수',
          '민영_개소','민영_면수','계_개소','계_면수','시영_개소','시영_면수',
          '구영_개소','구영_면수','계_개소','계_면수','시영_개소','시영_면수',
          '구영_개소','구영_면수','민영_개소','민영_면수','계_개소','계_면수',
          '일반주택_개소','일반주택_면수','공동주택_개소','공동주택_면수','일반건축물_개소','일반건축물_면수']

In [51]:
parking = []
for i in tqdm([2016, 2017, 2018, 2019, 2020]):
    tmp = pd.read_excel('./data/서울시 주차장 (동별) 통계_{}.xls'.format(i), header=3)
    tmp.columns = column
    parking.append(tmp)
parking = pd.concat(parking)
parking.reset_index(drop=True, inplace=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 23.60it/s]


In [52]:
parking.head(2)

Unnamed: 0,기간,자치구,동,계_개소,계_면수,공영_개소,공영_면수,민영_개소,민영_면수,계_개소.1,...,민영_개소.1,민영_면수.1,계_개소.2,계_면수.1,일반주택_개소,일반주택_면수,공동주택_개소,공동주택_면수,일반건축물_개소,일반건축물_면수
0,2016,종로구,소계,7224,78985,184,6592,7040,72393,151,...,71,2533,6969,69860,2864,5056,1546,19530,2559,45274
1,2016,종로구,사직동,371,12684,17,1543,354,11141,13,...,7,274,347,10867,92,331,68,2428,187,8108


In [68]:
len(parking['동'].unique())

426

In [292]:
parking.to_csv('./dataset/주차장수.csv', index=False)

## 집객 시설(은행, 병원, 학교, 버스 정류장, 지하철 등)
- https://data.seoul.go.kr/dataList/OA-15580/S/1/datasetView.do
- 2016년부터 2021년 데이터
- 행정동명 처리

In [53]:
def get_data(startNum, endNum, year):
    api_key = '554262655973756d38377443694869'
    url="http://openapi.seoul.go.kr:8088/{}/json/VwsmTrdarFcltyQq/{}/{}/{}".format(api_key, startNum, endNum, year)
    res = requests.get(url)
    data = res.json()
    return data

In [54]:
facility = []

startNum = 1
endNum = 1000

for i in tqdm([2016, 2017, 2018, 2019, 2020, 2021]):
    while endNum < 7000:
        data = get_data(startNum, endNum, i)
        startNum += 1000
        endNum += 1000
        if 'VwsmTrdarFcltyQq' in data:
            for j in data['VwsmTrdarFcltyQq']['row']:
                facility.append(j)
        else:
            continue
    startNum = 1
    endNum = 1000

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:04<00:00,  1.27it/s]


In [55]:
def create_df(each):
    STDR_YY_CD = each['STDR_YY_CD']
    TRDAR_SE_CD = each['TRDAR_SE_CD']
    TRDAR_SE_CD_NM = each['TRDAR_SE_CD_NM']
    TRDAR_CD = each['TRDAR_CD']
    TRDAR_CD_NM = each['TRDAR_CD_NM']
    VIATR_FCLTY_CO = each['VIATR_FCLTY_CO']
    PBLOFC_CO = each['PBLOFC_CO']
    BANK_CO = each['BANK_CO']
    GEHSPT_CO = each['GEHSPT_CO']
    GNRL_HSPTL_CO = each['GNRL_HSPTL_CO']
    PARMACY_CO = each['PARMACY_CO']
    KNDRGR_CO = each['KNDRGR_CO']
    ELESCH_CO = each['ELESCH_CO']
    MSKUL_CO = each['MSKUL_CO']
    HGSCHL_CO = each['HGSCHL_CO']
    UNIV_CO = each['UNIV_CO']
    DRTS_CO = each['DRTS_CO']
    SUPMK_CO = each['SUPMK_CO']
    THEAT_CO = each['THEAT_CO']
    STAYNG_FCLTY_CO = each['STAYNG_FCLTY_CO']
    ARPRT_CO = each['ARPRT_CO']
    RLROAD_STATN_CO = each['RLROAD_STATN_CO']
    BUS_TRMINL_CO = each['BUS_TRMINL_CO']
    SUBWAY_STATN_CO = each['SUBWAY_STATN_CO']
    BUS_STTN_CO = each['BUS_STTN_CO']
    STDR_QU_CD = each['STDR_QU_CD']
    

    result_pd = pd.DataFrame({
        "기준_년_코드": [STDR_YY_CD],
        "상권_구분_코드": [TRDAR_SE_CD],
        "상권_구분_코드_명": [TRDAR_SE_CD_NM],
        "상권_코드": [TRDAR_CD],
        "상권_코드_명": [TRDAR_CD_NM],
        "집객시설_수": [VIATR_FCLTY_CO],
        "관공서_수": [PBLOFC_CO],
        "은행_수": [BANK_CO],
        "종합병원_수": [GEHSPT_CO],
        "일반_병원_수": [GNRL_HSPTL_CO],
        "약국_수": [PARMACY_CO],
        "유치원_수": [KNDRGR_CO],
        "초등학교_수": [ELESCH_CO],
        "중학교_수": [MSKUL_CO],
        "고등학교_수": [HGSCHL_CO],
        "대학교_수": [UNIV_CO],
        "백화점_수": [DRTS_CO],
        "슈퍼마켓_수": [SUPMK_CO],
        "극장_수": [THEAT_CO],
        "숙박_시설_수": [STAYNG_FCLTY_CO],
        "공항_수": [ARPRT_CO],
        "철도_역_수": [RLROAD_STATN_CO],
        "버스_터미널_수": [BUS_TRMINL_CO],
        "지하철_역_수": [SUBWAY_STATN_CO],
        "버스_정거장_수": [BUS_STTN_CO],
        "기준_분기_코드": [STDR_QU_CD],
        
        
        
        
    }, columns=["기준_년_코드", "상권_구분_코드", "상권_구분_코드_명", "상권_코드", "상권_코드_명",\
                "집객시설_수", "관공서_수", "은행_수", "종합병원_수", "일반_병원_수", "약국_수", "유치원_수",\
                "초등학교_수", "중학교_수", "고등학교_수", "대학교_수", "백화점_수", "슈퍼마켓_수", "극장_수",\
                "숙박_시설_수", "공항_수", "철도_역_수", "버스_터미널_수", "지하철_역_수", "버스_정거장_수", "기준_분기_코드"])
    return result_pd

In [56]:
result = []
for each in tqdm(facility):
    pd_result = create_df(each)
    result.append(pd_result)
result = pd.concat(result)

100%|███████████████████████████████████████████████████████████████████████████| 31791/31791 [00:38<00:00, 817.99it/s]


In [57]:
facility = result

In [58]:
facility['상권_코드'] = facility['상권_코드'].astype('int')
facility = pd.merge(facility, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [59]:
facility[facility['행정동_코드'].isnull()]['상권_코드_명'].unique()

array(['서울 성북구 미아삼거리역_2'], dtype=object)

In [60]:
facility.loc[facility[facility['행정동_코드'].isnull()]['상권_코드_명'].index, '상권_코드_명'] = '서울 강북구 미아삼거리역_2'
facility.loc[facility[facility['행정동_코드'].isnull()]['상권_코드_명'].index, '행정동_코드'] = 11290685

In [61]:
facility['행정동_코드'] = facility['행정동_코드'].astype('int')
facility = pd.merge(facility, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [293]:
facility.to_csv('./dataset/집객시설.csv', index=False)

## 공시지가
- https://data.seoul.go.kr/dataList/OA-1180/F/1/datasetView.do
- 법정동 단위 -> 행정동 mapping 작업

In [62]:
land = []
for i in tqdm([2016, 2017, 2018, 2019, 2020]):
    tmp = pd.read_csv('./data/공시지가_{}년.csv'.format(i), encoding='cp949', low_memory=False)
    land.append(tmp)
land = pd.concat(land)
land.reset_index(drop=True, inplace=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.77s/it]


In [65]:
land.drop(['토지코드', '필지구분코드', '필지구분명', '본번', '부번', '기준년월'], axis=1, inplace=True)

In [136]:
land = land.drop_duplicates()

In [138]:
land.reset_index(drop=True, inplace=True)

In [255]:
convert_code = pd.read_csv('./data/법정동_행정동_맵핑.csv')

In [256]:
convert_code = convert_code[convert_code['ctpv_nm']=='서울특별시']

In [257]:
convert_code.rename(columns={'lgdng_cd': '법정동코드','ctpv_nm': '시도명','ctgg_nm': '시군구명',
                             'adstrd_nm': '행정동명','adstrd_en_nm': '행정동_영문명',
                             'lgdng_nm': '법정동명','adstrd_cd': '행정동코드'}, inplace=True)

In [258]:
convert_code.drop(['법정동코드', '행정동코드', 'admn_inst_cd', '행정동_영문명'], axis=1, inplace=True)

In [261]:
convert_code[convert_code['법정동명'] == '하월곡동']

Unnamed: 0,시도명,시군구명,행정동명,법정동명
342,서울특별시,성북구,길음제2동,하월곡동
344,서울특별시,성북구,월곡제1동,하월곡동
345,서울특별시,성북구,월곡제2동,하월곡동


In [272]:
land = pd.merge(land, convert_code[['행정동명', '법정동명']], how='left', on='법정동명')

In [294]:
land.to_csv('./dataset/공시지가.csv', index=False)

## 폐업률

In [71]:
closed = []
for i in tqdm([2016, 2017, 2018, 2019, 2020]):
    tmp = pd.read_csv('./data/서울시_우리마을가게_상권분석서비스(상권-점포)_{}년.csv'.format(i), encoding='cp949', low_memory=False)
    tmp = tmp[tmp['서비스_업종_코드_명']=='커피-음료']    
    closed.append(tmp)
closed = pd.concat(closed)
closed.reset_index(drop=True, inplace=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  1.96it/s]


In [73]:
closed

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수
0,2016,1,R,전통시장,1001284,서울중앙시장,CS100010,커피-음료,2,2,50,1,0,0,0
1,2016,1,D,발달상권,1001096,신도림역,CS100010,커피-음료,12,29,7,2,7,2,17
2,2016,1,D,발달상권,1001138,압구정 로데오거리_1,CS100010,커피-음료,16,22,0,0,9,2,6
3,2016,1,D,발달상권,1001134,서울 영등포구 여의도역_2,CS100010,커피-음료,26,38,8,3,8,3,12
4,2016,1,A,골목상권,1000741,동작대로33길,CS100010,커피-음료,3,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28259,2020,4,A,골목상권,1001000,진황도로47길,CS100010,커피-음료,2,3,0,0,0,0,1
28260,2020,4,A,골목상권,1000989,양재대로133길,CS100010,커피-음료,3,4,0,0,0,0,1
28261,2020,4,A,골목상권,1000980,상암로51길,CS100010,커피-음료,16,19,5,1,5,1,3
28262,2020,4,A,골목상권,1001006,천호대로197길,CS100010,커피-음료,2,3,0,0,0,0,1


In [97]:
idx = []
for i in closed['상권_코드']:
    if type(i) != int:
        if not i.isdigit():
            idx.append(i)

In [198]:
remove = []
for i in range(len(closed)):
    if closed.loc[i, '상권_코드'] in idx:
        remove.append(i)

In [201]:
closed.drop(remove, axis=0, inplace=True)

In [203]:
closed.reset_index(drop=True, inplace=True)

In [205]:
closed['상권_코드'] = closed['상권_코드'].astype(int)

In [211]:
closed = pd.merge(closed, area[['상권_코드', '상권_구분_코드_명', '상권_코드_명', '행정동_코드']], how='left', on=['상권_코드', '상권_구분_코드_명', '상권_코드_명'])

In [212]:
closed[closed['행정동_코드'].isnull()]['상권_코드_명'].unique()

array(['서울 성북구 미아삼거리역_2'], dtype=object)

In [213]:
closed.loc[closed[closed['행정동_코드'].isnull()]['상권_코드_명'].index, '상권_코드_명'] = '서울 강북구 미아삼거리역_2'

In [214]:
closed.loc[closed[closed['행정동_코드'].isnull()]['상권_코드_명'].index, '행정동_코드'] = 11290685

In [218]:
closed['행정동_코드'] = closed['행정동_코드'].astype('int')
closed = pd.merge(closed, code, how='left', left_on='행정동_코드', right_on='행자부행정동코드')

In [219]:
closed.head(2)

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,...,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수,행정동_코드,통계청행정동코드,행자부행정동코드,시도명,시군구명,행정동명
0,2016,1,R,전통시장,1001284,서울중앙시장,CS100010,커피-음료,2,2,...,1,0,0,0,11140670,1102067,11140670,서울,중구,황학동
1,2016,1,D,발달상권,1001096,신도림역,CS100010,커피-음료,12,29,...,2,7,2,17,11530510,1117051,11530510,서울,구로구,신도림동


In [295]:
closed.to_csv('./dataset/폐업률.csv', index=False)