In [46]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

from bs4 import BeautifulSoup

import pandas as pd

In [56]:
# RESOURCE_URI: 에어코리아 uri
RESOURCE_URI = "https://www.airkorea.or.kr/web/pastSearch"
# LOCAL_ADDRESS: 측정소 위치(학교에서 제일 가까운 곳)
LOCAL_ADDRESS = "경기 수원시 영통구 영통로 217번길 12 영통2동 행정복지센터"
# Features of data
FEATURES = ['DATE', 'PM10', 'PM2.5', 'O3', 'NO2', 'CO', 'SO2']
# Output directory
OUT_DIR = '../datasets/'

In [57]:
def date_mapping(year, month):
    return 2022 - year, month

def format_and_save_data(src, year, month):
    soup = BeautifulSoup(src, 'html.parser')
    rows = soup.find('div', id='realTable').find('table', class_='st_1').find('tbody').find_all('tr')
    data = []

    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        data.append(cols)

    pd.DataFrame(data=data, columns=FEATURES).to_csv(OUT_DIR + '%04d-%02d.csv' % (year, month), index=False)

def search_data(window=None, year=2021, month=1):
    year_index, month_index = date_mapping(year, month)
    window.find_element(By.ID, 'choice_1').click()
    window.find_element(By.XPATH, f'//*[@id="choice_3"]/option[{year_index}]').click()
    window.find_element(By.XPATH, f'//*[@id="choice_4"]/option[{month_index}]').click()
    window.find_element(By.XPATH, '//*[@id="cont_body"]/div[1]/a[1]').click()
    src = window.page_source
    format_and_save_data(src, year, month)

In [58]:
# Open Chrome Browser
win = webdriver.Chrome('/usr/local/bin/chromedriver')
win.get(RESOURCE_URI)

  win = webdriver.Chrome('/usr/local/bin/chromedriver')


In [59]:
# Save current window as main window
main_win = win.current_window_handle

# Click event for address field
win.find_element(By.ID, 's_condition_input3').click()

# If window has changed for pop up event change window from main to new
for handle in win.window_handles:
    if handle != main_win:
        popup = handle
        win.switch_to.window(popup)

print(f'Successfully switched to "{win.current_url}"')

Successfully switched to "https://www.juso.go.kr/addrlink/addrCoordUrl.do"


In [60]:
# Write local address in address field and click search button
win.find_element(By.ID, 'keyword').send_keys(LOCAL_ADDRESS + Keys.ENTER)
win.find_element(By.ID, 'roadAddrTd1').click()
# Write detail address and click button
win.find_element(By.ID, 'rtAddrDetail').send_keys('공학관')
win.find_element(By.CLASS_NAME, 'btn-bl').click()

# Return to main window
win.switch_to.window(main_win)
print(f'Successfully switched to "{win.current_url}"')

# Search with options
win.find_element(By.CLASS_NAME, 'search').click()

Successfully switched to "https://www.airkorea.or.kr/web/pastSearch"


In [61]:
for y in range(2014, 2022):
    for m in range(1, 13):
        try:
            search_data(win, y, m)
            print(f'{y}-{m} data successfully downloaded!')
        except Exception as e:
            print(f'error generated at year: {y} and month: {m}')
            print(e)

win.quit()

2014-1 data successfully downloaded!
2014-2 data successfully downloaded!
2014-3 data successfully downloaded!
2014-4 data successfully downloaded!
2014-5 data successfully downloaded!
2014-6 data successfully downloaded!
2014-7 data successfully downloaded!
2014-8 data successfully downloaded!
2014-9 data successfully downloaded!
2014-10 data successfully downloaded!
2014-11 data successfully downloaded!
2014-12 data successfully downloaded!
2015-1 data successfully downloaded!
2015-2 data successfully downloaded!
2015-3 data successfully downloaded!
2015-4 data successfully downloaded!
2015-5 data successfully downloaded!
2015-6 data successfully downloaded!
2015-7 data successfully downloaded!
2015-8 data successfully downloaded!
2015-9 data successfully downloaded!
2015-10 data successfully downloaded!
2015-11 data successfully downloaded!
2015-12 data successfully downloaded!
2016-1 data successfully downloaded!
2016-2 data successfully downloaded!
2016-3 data successfully downloa