In [46]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

from bs4 import BeautifulSoup

import pandas as pd

In [33]:
# RESOURCE_URI: 에어코리아 uri
RESOURCE_URI = "https://www.airkorea.or.kr/web/pastSearch"
# LOCAL_ADDRESS: 측정소 위치(학교에서 제일 가까운 곳)
LOCAL_ADDRESS = "경기 수원시 영통구 영통로 217번길 12 영통2동 행정복지센터"
# Features of data
FEATURES = ['DATE', 'PM10', 'PM2.5', 'O3', 'NO2', 'CO', 'SO2']
# Output directory
OUT_DIR = '../datasets/air_pollution_suwon'

In [78]:
def date_mapping(year, month):
    return 2022 - year, month

def format_and_save_data(src, year, month, save=True):
    soup = BeautifulSoup(src, 'html.parser')
    rows = soup.find('div', id='realTable').find('table', class_='st_1').find('tbody').find_all('tr')
    data = []

    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        print(cols[0])
        data.append(cols)

    df = pd.DataFrame(data=data, columns=FEATURES)
    print(df.iloc[0])
    if save:
        df.to_csv(OUT_DIR + '%04d-%02d.csv' % (year, month), index=False)
        return df
    else:
        return df

def search_data(window=None, year=2021, month=1, save=True):
    year_index, month_index = date_mapping(year, month)
    window.find_element(By.XPATH, '//*[@id="choice_1"]').click()
    window.find_element(By.XPATH, f'//*[@id="choice_3"]/option[{year_index}]').click()
    window.find_element(By.XPATH, f'//*[@id="choice_4"]/option[{month_index}]').click()
    window.find_element(By.XPATH, '//*[@id="cont_body"]/div[1]/a[1]').click()
    time.sleep(2)
    src = window.page_source
    return format_and_save_data(src, year, month, save)

In [80]:
# Open Chrome Browser
win = webdriver.Chrome('/usr/local/bin/chromedriver')
win.get(RESOURCE_URI)

# Save current window as main window
main_win = win.current_window_handle

# Click event for address field
win.find_element(By.ID, 's_condition_input3').click()

# If window has changed for pop up event change window from main to new
for handle in win.window_handles:
    if handle != main_win:
        popup = handle
        win.switch_to.window(popup)

print(f'Successfully switched to "{win.current_url}"')

# Write local address in address field and click search button
win.find_element(By.ID, 'keyword').send_keys(LOCAL_ADDRESS + Keys.ENTER)
win.find_element(By.ID, 'roadAddrTd1').click()
# Write detail address and click button
win.find_element(By.ID, 'rtAddrDetail').send_keys('공학관')
win.find_element(By.CLASS_NAME, 'btn-bl').click()

# Return to main window
win.switch_to.window(main_win)
print(f'Successfully switched to "{win.current_url}"')

# Search with options
win.find_element(By.CLASS_NAME, 'search').click()

Successfully switched to "https://www.juso.go.kr/addrlink/addrCoordUrl.do"


In [82]:
for y in range(2014, 2022):
    for m in range(1, 13):
        try:
            search_data(win, y, m)
            print(f'{y}-{m} data successfully downloaded!')
        except Exception as e:
            print(f'error generated at year: {y} and month: {m}')
            print(e)

win.quit()

01-01-01
01-01-02
01-01-03
01-01-04
01-01-05
01-01-06
01-01-07
01-01-08
01-01-09
01-01-10
01-01-11
01-01-12
01-01-13
01-01-14
01-01-15
01-01-16
01-01-17
01-01-18
01-01-19
01-01-20
01-01-21
01-01-22
01-01-23
01-01-24
01-02-01
01-02-02
01-02-03
01-02-04
01-02-05
01-02-06
01-02-07
01-02-08
01-02-09
01-02-10
01-02-11
01-02-12
01-02-13
01-02-14
01-02-15
01-02-16
01-02-17
01-02-18
01-02-19
01-02-20
01-02-21
01-02-22
01-02-23
01-02-24
01-03-01
01-03-02
01-03-03
01-03-04
01-03-05
01-03-06
01-03-07
01-03-08
01-03-09
01-03-10
01-03-11
01-03-12
01-03-13
01-03-14
01-03-15
01-03-16
01-03-17
01-03-18
01-03-19
01-03-20
01-03-21
01-03-22
01-03-23
01-03-24
01-04-01
01-04-02
01-04-03
01-04-04
01-04-05
01-04-06
01-04-07
01-04-08
01-04-09
01-04-10
01-04-11
01-04-12
01-04-13
01-04-14
01-04-15
01-04-16
01-04-17
01-04-18
01-04-19
01-04-20
01-04-21
01-04-22
01-04-23
01-04-24
01-05-01
01-05-02
01-05-03
01-05-04
01-05-05
01-05-06
01-05-07
01-05-08
01-05-09
01-05-10
01-05-11
01-05-12
01-05-13
01-05-14
01-05-15
0