# 19대 대선 결과 분석

### * Selenium과 Beautiful Soup을 이용한 데이터 획득 준비

In [36]:
import pandas as pd
import numpy as np

import platform
import matplotlib.pyplot as plt

%matplotlib inline

path = "c:/Windows/Fonts/malgun.ttf"
from matplotlib import font_manager, rc
if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print('Unknown system, sorry!')
    
plt.rcParams['axes.unicode_minus'] = False

In [37]:
from selenium import webdriver
import time

###### * 크롬 드라이버를 통해 주소 접속

In [38]:
driver = webdriver.Chrome('driver/chromedriver.exe')
driver.get("http://info.nec.go.kr")

###### * 프레임 이동

In [39]:
driver.switch_to_default_content() # 상위 프레임으로 돌아가는 명령어
driver.switch_to_frame('main') 

  """Entry point for launching an IPython kernel.
  


###### * 19대 대통령 선거 개표 결과 웹에서 확인하기

In [41]:
# 역대 선거 -> 투/개표 클릭
driver.find_element_by_xpath("""//*[@id="header"]/ul[1]/li[2]/a""").click()
driver.find_element_by_xpath("""//*[@id="presubmu"]/li[3]/a""").click()

In [42]:
# 개표현황 클릭하기
make_xpath = """//*[@id="header"]/div[4]/ul/li[6]/a"""
driver.find_element_by_xpath(make_xpath).click()

In [43]:
# 대통령선거 클릭하기
driver.find_element_by_xpath("""//*[@id="electionType1"]""").click()

In [44]:
# 19대 대통령선거 드롭다운 선택하기
driver.find_element_by_xpath("""//*[@id="electionName"]/option[2]""").click()

In [46]:
# 대통령선거 드롭다운 클릭하기
driver.find_element_by_xpath("""//*[@id="electionCode"]/option[2]""").click()

In [47]:
# 시/도 리스트 확인하기
sido_list_raw = driver.find_element_by_xpath("""//*[@id="cityCode"]""")
sido_list = sido_list_raw.find_elements_by_tag_name("option")
sido_names_values = [option.text for option in sido_list]
sido_names_values = sido_names_values[1:] # 서울특별시부터~~
sido_names_values

['서울특별시',
 '부산광역시',
 '대구광역시',
 '인천광역시',
 '광주광역시',
 '대전광역시',
 '울산광역시',
 '세종특별자치시',
 '경기도',
 '강원도',
 '충청북도',
 '충청남도',
 '전라북도',
 '전라남도',
 '경상북도',
 '경상남도',
 '제주특별자치도']

In [48]:
# 웹에서 서울특별시 선택하기
element = driver.find_element_by_id("cityCode")
element.send_keys(sido_names_values[0]) # 0 : 서울, 1 : 부산...

In [49]:
# 시/군 리스트 확인하기
sigun_list_raw = driver.find_element_by_xpath("""//*[@id="townCode"]""")
sigun_list = sigun_list_raw.find_elements_by_tag_name("option")
sigun_names_values = [option.text for option in sigun_list]
sigun_names_values = sigun_names_values[1:]
sigun_names_values
# 서울특별시의 군 리스트가 출력된다.

['종로구',
 '중구',
 '용산구',
 '성동구',
 '광진구',
 '동대문구',
 '중랑구',
 '성북구',
 '강북구',
 '도봉구',
 '노원구',
 '은평구',
 '서대문구',
 '마포구',
 '양천구',
 '강서구',
 '구로구',
 '금천구',
 '영등포구',
 '동작구',
 '관악구',
 '서초구',
 '강남구',
 '송파구',
 '강동구']

In [50]:
# 종로구 선택
element = driver.find_element_by_id("townCode")
element.send_keys(sigun_names_values[0]) # 0 : 종로구, 1 : 중구...

In [51]:
# 검색 버튼 누르기
driver.find_element_by_xpath("""//*[@id="searchBtn"]""").click()

###### * Beautiful Soup으로 데이터 추출하기

In [52]:
from bs4 import BeautifulSoup

html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

In [53]:
tmp = soup.find_all('td', 'alignR') # td 태그의 alignR 클래스
tmp[1:5]

[<td class="alignR">102,566</td>,
 <td class="alignR">42,512</td>,
 <td class="alignR">22,325</td>,
 <td class="alignR">22,313</td>]

### * 19대 대선 개표 결과 데이터 획득하기

###### * 광역시도 리스트와 시도 리스트를 얻어서 데이터로 만들기

In [54]:
from tqdm import tqdm_notebook

sido_name_list = []
sigun_name_list = []

for sido_value in tqdm_notebook(sido_names_values):
    element = driver.find_element_by_id("cityCode")
    element.send_keys(sido_value)

    time.sleep(1)
    
    sigun_list_raw = driver.find_element_by_xpath("""//*[@id="townCode"]""")
    sigun_list = sigun_list_raw.find_elements_by_tag_name("option")

    sigun_names_values = [option.text for option in sigun_list]
    sigun_names_values = sigun_names_values[1:]

    for sigun_value in sigun_names_values:
        sido_name_list.append(sido_value)
        sigun_name_list.append(sigun_value)

HBox(children=(IntProgress(value=0, max=17), HTML(value='')))




In [55]:
election_result = pd.DataFrame({'광역시도':sido_name_list, '시군':sigun_name_list})

election_result

Unnamed: 0,광역시도,시군
0,서울특별시,종로구
1,서울특별시,중구
2,서울특별시,용산구
3,서울특별시,성동구
4,서울특별시,광진구
5,서울특별시,동대문구
6,서울특별시,중랑구
7,서울특별시,성북구
8,서울특별시,강북구
9,서울특별시,도봉구


###### * 함수로 득표수 얻는 과정을 만든다. 투표인수, 문재인, 홍준표, 안철수 후보의 득표수 저장

In [56]:
def get_vote_info(n):
    html = driver.page_source
    soup = BeautifulSoup(html, 'lxml') # lxml모듈
    
    tmp = soup.find_all('td', 'alignR')
    tmp_values = [float(tmp_val.get_text().replace(',', '')) for tmp_val in tmp[1:5]]
    # get_text() : 글자만 가져오기
    # replace('찾을값', '바꿀값')
    
    pop[n] = tmp_values[0]
    moon[n] = tmp_values[1]
    hong[n] = tmp_values[2]
    ahn[n] = tmp_values[3]

###### * NaN 처리하기

In [57]:
def fail_procedure(n):
    pop[n] = np.nan
    moon[n] = np.nan
    hong[n] = np.nan
    ahn[n] = np.nan

###### * 최종 결과가 저장될 리스트 생성

In [58]:
pop = [np.nan]*len(election_result)
moon = [np.nan]*len(election_result)
hong = [np.nan]*len(election_result)
ahn = [np.nan]*len(election_result)

len(pop), len(moon), len(hong), len(ahn)

(250, 250, 250, 250)

###### * 각 지역 각 후보자에게 투표한 인원 수 정리

In [59]:
for n in tqdm_notebook(election_result.index):
    try:
        element = driver.find_element_by_id("cityCode")
        element.send_keys(election_result['광역시도'][n])
        
        time.sleep(0.5)
        
        element = driver.find_element_by_id("townCode")
        element.send_keys(election_result['시군'][n])
        
        driver.find_element_by_xpath("""//*[@id="spanSubmit"]/input""").click()
        
        time.sleep(0.5)
        
        get_vote_info(n)
        
    except:
        print('--- Error ---')
        fail_procedure(n)

HBox(children=(IntProgress(value=0, max=250), HTML(value='')))

--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Error ---
--- Er

###### * 위 코드에서 에러가 난 부분만 다시 실행

In [99]:
re_try_index = election_result[election_result['pop'].isnull()].index

for n in tqdm_notebook(re_try_index):
    try:
        element = driver.find_element_by_id("cityCode")
        element.send_keys(election_result['광역시도'][n])
        
        time.sleep(0.5)
        
        element = driver.find_element_by_id("townCode")
        element.send_keys(election_result['시군'][n])
        
        driver.find_element_by_xpath("""//*[@id="spanSubmit"]/input""").click()
        
        time.sleep(0.5)
        
        get_vote_info(n)
        
    except:
        print('--- Error ---')
        fail_procedure(n)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




###### * 각 지역과 투표 인원 수를 데이터프레임에 저장

In [100]:
election_result['pop'] = pop
election_result['moon'] = moon
election_result['hong'] = hong
election_result['ahn'] = ahn
election_result

Unnamed: 0,광역시도,시군,pop,moon,hong,ahn
0,서울특별시,종로구,102566.0,42512.0,22325.0,22313.0
1,서울특별시,중구,82852.0,34062.0,17901.0,19372.0
2,서울특별시,용산구,148157.0,58081.0,35230.0,32109.0
3,서울특별시,성동구,203175.0,86686.0,40566.0,45674.0
4,서울특별시,광진구,240030.0,105512.0,46368.0,52824.0
5,서울특별시,동대문구,236092.0,98958.0,51631.0,53359.0
6,서울특별시,중랑구,265706.0,111450.0,56545.0,62778.0
7,서울특별시,성북구,295866.0,129263.0,57584.0,66518.0
8,서울특별시,강북구,210614.0,89645.0,42268.0,51669.0
9,서울특별시,도봉구,229233.0,94898.0,47461.0,55600.0


###### * 결과를 csv로 저장

In [102]:
election_result.to_csv('data/05. election_result.csv', encoding='utf-8', sep=',')

### * 각 후보의 득표율과 지역 ID 정리하기

In [2]:
import pandas as pd

election_result = pd.read_csv('data/05. election_result.csv', encoding='utf-8', 
                             index_col=0)
election_result.head()

Unnamed: 0,광역시도,시군,pop,moon,hong,ahn
0,서울특별시,종로구,102566.0,42512.0,22325.0,22313.0
1,서울특별시,중구,82852.0,34062.0,17901.0,19372.0
2,서울특별시,용산구,148157.0,58081.0,35230.0,32109.0
3,서울특별시,성동구,203175.0,86686.0,40566.0,45674.0
4,서울특별시,광진구,240030.0,105512.0,46368.0,52824.0
