# API 활용한 데이터 저장

## 1. 자치구 단위 서울생활 인구 일별 집계표
* [URL](http://data.seoul.go.kr/dataList/OA-15379/S/1/datasetView.do)
* API로만 가져올 수 있는 데이터이므로 종속변수인 배달주문건수 데이터의 기간(2019-07-17 ~ 2020-09-30)만큼 추출

In [1]:
# 필요 라이브러리 import
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time
from tqdm.notebook import tqdm

In [2]:
# 요청변수
url = 'http://openapi.seoul.go.kr:8088/72504a745a73756e35326176635a42/xml/SPOP_DAILYSUM_JACHI/1/100/'
date_list = pd.date_range('2019-07-17', '2020-09-30').strftime('%Y%m%d')

In [3]:
## API를 통해 데이터 저장하기

data_list = []
len_data = {}

for date in tqdm(date_list):
    url_fin = url + date # 각 날짜별로 데이터 바꿔가며 url 결합
    result = requests.get(url_fin) 
    bs_obj = BeautifulSoup(result.content, "html.parser")
    row_list = bs_obj.find_all('row')
    data_list.extend(row_list)
    
    len_data[date] = len(row_list)
    
    

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=442.0), HTML(value='')))




In [42]:
## 최종 데이터 저장

columns = ['stdr_de_id', 'signgu_code_se', 'signgu_nm', 'tot_lvpop_co', 'lvpop_co',
          'lngtr_stay_frgnr_co', 'srtpd_stay_frgnr_co', 'dail_mxmm_mvmn_lvpop_co',
          'dail_mumm_lvpop_co', 'day_lvpop_co', 'night_lvpop_co', 'dail_mxmm_mvmn_lvpop_co',
          'su_else_inflow_lvpop_co', 'sam_adstrd_mvmn_lvpop_co', 'signgu_mvmn_lvpop_co']

population = pd.DataFrame(columns=columns)

for i, data in tqdm(enumerate(data_list)):
    for column in columns:
        population.loc[i, column] = data.find(column).text

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…




In [49]:
population.columns = ['기준일', '시군구코드', '시군구명', '총생활인구수', '내국인생활인구수',
                     '장기체류외국인인구수', '단기체류외국인인구수', '일최대인구수', '일최소인구수',
                     '주간인구수', '야간인구수', '일최대이동인구수', '서울외유입인구수', '동일자치구행정동간이동인구수',
                     '자치구간이동인구수']

In [50]:
population

Unnamed: 0,기준일,시군구코드,시군구명,총생활인구수,내국인생활인구수,장기체류외국인인구수,단기체류외국인인구수,일최대인구수,일최소인구수,주간인구수,야간인구수,일최대이동인구수,서울외유입인구수,동일자치구행정동간이동인구수,자치구간이동인구수
0,20190717,11000,서울시,11047066.27130,10476088.74490,402694.20510,168283.32140,5922652.24620,10703316.45550,11343471.86140,10835347.99270,5922652.24620,1443767.33920,2007540.66260,2471344.24440
1,20190717,11110,종로구,347646.27100,318656.53290,16232.90500,12756.83300,280741.73630,245279.01400,445801.62500,277535.30380,280741.73630,77103.28080,31032.08440,172606.37110
2,20190717,11140,중구,388954.80030,316872.07720,24617.82990,47464.89320,344379.88000,251009.68960,520164.01020,295233.93610,344379.88000,94382.41020,28238.31000,221759.15980
3,20190717,11170,용산구,320144.28800,290097.76260,20526.32610,9520.19930,189644.15720,283575.15860,349216.90930,299378.13000,189644.15720,51772.66770,42762.69170,95108.79780
4,20190717,11200,성동구,354015.03830,337050.22790,15344.48490,1620.32550,191982.93710,336496.73110,364483.55300,346537.52780,191982.93710,37607.98280,59778.12110,94596.83320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11149,20200930,11620,관악구,488365.64850,461918.38660,23353.69920,3093.56280,180057.74870,464149.28660,473604.91330,498909.03080,180057.74870,41039.39720,88471.92170,50546.42980
11150,20200930,11650,서초구,475951.32730,461837.74690,10975.27890,3138.30150,209652.60460,433756.04630,485802.58920,468914.71160,209652.60460,54485.90890,71317.11270,83849.58300
11151,20200930,11680,강남구,640241.58850,618245.67270,15505.25050,6490.66530,276530.32180,577252.55750,657149.64600,628164.40460,276530.32180,68407.89380,96160.59410,111961.83390
11152,20200930,11710,송파구,705577.82950,687794.79230,14015.56320,3767.47400,274754.85890,678936.67900,693101.08580,714489.78940,274754.85890,71580.75170,134322.03180,68852.07540


In [51]:
population.to_csv('자치구단위서울생활인구일별집계표.csv',encoding='utf-8')

## 2. 서울시 일별 평균 대기오염도 정보
* [URL](http://data.seoul.go.kr/dataList/OA-2218/S/1/datasetView.do)
* 2019년 이후의 데이터는 API로만 제공하므로 (2020-01-01 ~ 2020-09-30)만큼 추출

In [54]:
## API를 활용해 데이터 불러오기

date_list = pd.date_range('20200101', '20201001').strftime('%Y%m%d')

url = 'http://openapi.seoul.go.kr:8088/4979477a4d716b7237334655784f52/xml/DailyAverageAirQuality/1/100/'

data_list = []

for date in tqdm(date_list):
    url_final = url + date
    result = requests.get(url_final)
    bs_obj = BeautifulSoup(result.content, 'html.parser')
    
    data_list.extend(bs_obj.find_all('row'))
    

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=275.0), HTML(value='')))




In [55]:
columns=['msrdt_de', 'msrste_nm', 'no2', 'o3', 'co', 'so2', 'pm10', 'pm25']

pollution = pd.DataFrame(columns=columns)


for i, data in tqdm(enumerate(data_list)):
    for column in columns:
        pollution.loc[i, column] = data.find(column).text
        

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…




In [58]:
pollution.columns = ['측정일시', '측정소명', '이산화질소', '오존', '일산화탄소', '아황산가스', '미세먼지', '초미세먼지']

In [61]:
pollution.to_csv('서울시 일별 평균 대기오염도 정보.csv', encoding='utf-8')