# 전체 크롤링 코드

In [6]:
import os
from time import sleep

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementNotInteractableException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup

import warnings
warnings.filterwarnings('ignore')
##############################################################  ############
##################### variable related selenium ##########################
##########################################################################
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('lang=ko_KR')
chromedriver_path = "C:/chromedriver"
driver = webdriver.Chrome(os.path.join(os.getcwd(), chromedriver_path), options=options)  # chromedriver 열기


def main():
    global driver, load_wb, review_num

    driver.implicitly_wait(4)  # 렌더링 될때까지 기다린다 4초
    driver.get('https://map.kakao.com/')  # 주소 가져오기

    # 검색할 목록
    place_infos = ['제주도 관광지']

    for i, place in enumerate(place_infos):
        # delay
        if i % 4 == 0 and i != 0:
            sleep(5)
        print("#####", i)
        search(place)

    driver.quit()
    print("finish")


def search(place):
    global driver

    search_area = driver.find_element_by_xpath('//*[@id="search.keyword.query"]')  # 검색 창
    search_area.send_keys(place)  # 검색어 입력
    driver.find_element_by_xpath('//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER)  # Enter로 검색
    sleep(1)

    # 검색된 정보가 있는 경우에만 탐색
    # 1번 페이지 place list 읽기
    html = driver.page_source

    soup = BeautifulSoup(html, 'html.parser')
    place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록

    # 검색된 첫 페이지 장소 목록 크롤링하기
    crawling(place, place_lists)
    search_area.clear()

    # 우선 더보기 클릭해서 2페이지
    try:
        driver.find_element_by_xpath('//*[@id="info.search.place.more"]').send_keys(Keys.ENTER)
        sleep(1)

        # 2~ 5페이지 읽기
        for i in range(2, 6):
            # 페이지 넘기기
            xPath = '//*[@id="info.search.page.no' + str(i) + '"]'
            driver.find_element_by_xpath(xPath).send_keys(Keys.ENTER)
            sleep(1)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            place_lists = soup.select('.placelist > .PlaceItem') # 장소 목록 list

            crawling(place, place_lists)

    except ElementNotInteractableException:
        print('not found')
    finally:
        search_area.clear()

# def save_to_csv(data, filename="output.csv"):
#     with open(filename, mode="w", encoding="utf-8", newline="") as f:
#         writer = csv.writer(f)
#         writer.writerow(["Place Name", "Address", "Rating"])
#         for row in data:
#             writer.writerow(row)

def crawling(place, place_lists):
    """
    페이지 목록을 받아서 크롤링 하는 함수
    :param place: 리뷰 정보 찾을 장소이름
    """
    
#     result = []

    while_flag = False
    for i, place in enumerate(place_lists):
        # 광고에 따라서 index 조정해야함
        #if i >= 3:
         #   i += 1

        place_name = place.select('.head_item > .tit_name > .link_name')[0].text  # place name
        place_address = place.select('.info_item > .addr > p')[0].text  # place address
        element = driver.find_element(By.XPATH, '//*[@id="info.search.place.list"]/li[2]/div[5]/div[2]/p[2]').text
        
        
        detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]'
        driver.find_element_by_xpath(detail_page_xpath).send_keys(Keys.ENTER)
        driver.switch_to.window(driver.window_handles[-1])  # 상세정보 탭으로 변환
        sleep(1)

        print('####', place_name + ',' + element)

        # 첫 페이지
        extract_review(place_name)

#         # 2-5 페이지
#         idx = 3
#         try:
#             page_num = len(driver.find_elements_by_class_name('link_page')) # 페이지 수 찾기
#             for i in range(page_num-1):
#                 # css selector를 이용해 페이지 버튼 누르기
#                 driver.find_element_by_css_selector('#mArticle > div.cont_evaluation > div.evaluation_review > div > a:nth-child(' + str(idx) +')').send_keys(Keys.ENTER)
#                 sleep(1)
#                 extract_review(place_name)
#                 idx += 1
#             driver.find_element_by_link_text('다음').send_keys(Keys.ENTER) # 5페이지가 넘는 경우 다음 버튼 누르기
#             sleep(1)
#             extract_review(place_name) # 리뷰 추출
#         except (NoSuchElementException, ElementNotInteractableException):
#             print("no review in crawling")

#         # 그 이후 페이지
#         while True:
#             idx = 4
#             try:
#                 page_num = len(driver.find_elements_by_class_name('link_page'))
#                 for i in range(page_num-1):
#                     driver.find_element_by_css_selector('#mArticle > div.cont_evaluation > div.evaluation_review > div > a:nth-child(' + str(idx) +')').send_keys(Keys.ENTER)
#                     sleep(1)
#                     extract_review(place_name)
#                     idx += 1
#                 driver.find_element_by_link_text('다음').send_keys(Keys.ENTER) # 10페이지 이상으로 넘어가기 위한 다음 버튼 클릭
#                 sleep(1)
#                 extract_review(place_name) # 리뷰 추출
#             except (NoSuchElementException, ElementNotInteractableException):
#                 print("no review in crawling")
#                 break

        driver.close()
        driver.switch_to.window(driver.window_handles[0])  # 검색 탭으로 전환


def extract_review(place_name):
    global driver

    ret = True

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    # 첫 페이지 리뷰 목록 찾기
    review_lists = soup.select('.list_evaluation > li')

    # 리뷰가 있는 경우
    if len(review_lists) != 0:
        for i, review in enumerate(review_lists):
            comment = review.select('.txt_comment > span') # 리뷰
            star_info = soup.find('div', {'class': 'star_info'})  # star_info 클래스를 갖는 div 태그 찾기
            rating = star_info.select('.grade_star > .star_rate > .inner_star')  # span 태그에서 inner_star 클래스를 갖는 태그 찾기

            # 별점 데이터 추출하기
            star_ratio = float(rating[0]['style'].replace('width:', '').replace('%;', '')) / 100.0
            star_rating = round(star_ratio * 5, 1)
            
            val = ''
            
            if len(comment) != 0:
                if len(rating) != 0:
                    val = comment[0].text + '/' + str(star_rating).replace('점', '')
                else:
                    val = comment[0].text + '/0'
                print(val)

    else:
        print('no review in extract')
        ret = False

    return ret


if __name__ == "__main__":
    main()

##### 0
#### 올레길 7코스(서귀포-월평 올레),
/5.0
Soso/5.0
/5.0


KeyboardInterrupt: 

## 이름, 위치, 별점, 소요시간