In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# 強制等待 (執行期間休息一下)
from time import sleep

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 整理 json 使用的工具
import json

from selenium.common.exceptions import NoSuchElementException

from selenium.common.exceptions import ElementClickInterceptedException

from selenium.common.exceptions import ElementNotInteractableException

from selenium.common.exceptions import TimeoutException

import pandas as pd
import re
import os
import datetime
import glob

#### 抓取並爬取飯店的資訊

In [2]:
# 啟動瀏覽器工具的選項
my_options = webdriver.ChromeOptions()
# my_options.add_argument("--headless")                #不開啟實體瀏覽器背景執行
my_options.add_argument("--start-maximized")         #最大化視窗
my_options.add_argument("--incognito")               #開啟無痕模式
my_options.add_argument("--disable-popup-blocking") #禁用彈出攔截
my_options.add_argument("--disable-notifications")  #取消 chrome 推播通知
my_options.add_argument("--lang=zh-TW")  #設定為正體中文

In [3]:
# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome(
    options = my_options,
    service = Service(ChromeDriverManager().install())
)

[WDM] - Downloading: 100%|████████████████████████████████████████████████████████| 6.30M/6.30M [00:00<00:00, 30.4MB/s]


In [4]:
# 放置要爬取的網址資料
listData = []

In [5]:
# 走訪頁面
def visit():
    
    # 貼自己的網址
    driver.get('https://www.agoda.com/zh-tw/search?city=17048&locale=zh-tw&ckuid=761122a7-e1de-4e24-93e4-6bb5a28f15c2&prid=0&currency=TWD&correlationId=c7fbe0d9-a9a1-48f6-b89b-271fdd2f2df1&analyticsSessionId=-1678796968739073176&pageTypeId=103&realLanguageId=20&languageId=20&origin=TW&cid=1891473&userId=761122a7-e1de-4e24-93e4-6bb5a28f15c2&whitelabelid=1&loginLvl=0&storefrontId=3&currencyId=28&currencyCode=TWD&htmlLanguage=zh-tw&cultureInfoName=zh-tw&machineName=hk-pc-2f-acm-web-user-5cf55cbd74-nsx76&trafficGroupId=5&sessionId=xfnv12cnm44u00haxzbv3mzc&trafficSubGroupId=122&aid=82361&useFullPageLogin=true&cttp=4&isRealUser=true&mode=production&browserFamily=Chrome&checkIn=2023-08-14&checkOut=2023-08-15&rooms=1&adults=2&children=0&priceCur=TWD&los=1&textToSearch=%E5%9F%BA%E9%9A%86%E5%B8%82&travellerType=1&familyMode=off&productType=-1')
               
    sleep(10)
    
# 滾動頁面
def scroll():

    innerHeight = 0  # innerHeight => 瀏覽器內部的高度
    offset = 0  # offset => 當前捲動的量(高度)
    count = 0  # count => 累計無效滾動次數
    limit = 3  # limit => 最大無效滾動次數
    
    # 在捲動到沒有元素動態產生前，持續捲動
    while count <= limit:
        
        # 每次移動高度
        offset += 800

        '''
        或是每次只滾動一點距離，
        以免有些網站會在移動長距離後，
        將先前移動當中的元素隱藏

        例如將上方的 script 改成:
        offset += 600
        '''

        # 捲軸往下滑動
        # smooth : 慢慢滾動
        driver.execute_script(f'''
            window.scrollTo({{
                top: {offset}, 
                behavior: 'smooth' 
            }});
        ''')
        
        '''
        如果要滾動的是 div 裡面的捲軸，可以使用以下的方法
        document.querySelector('div').scrollTo({...})
        '''
        
        # (重要)強制等待，此時若有新元素生成，瀏覽器內部高度會自動增加
        sleep(3)
        
        # 透過執行 js 語法來取得捲動後的當前總高度
        innerHeight = driver.execute_script(
            'return document.documentElement.scrollHeight;'
        )
        
        # 經過計算，如果滾動距離(offset)大於等於視窗內部總高度(innerHeight)，代表已經到底了
        if offset >= innerHeight:
            count += 1
            

# 抓取飯店網址
def get_hotel_links():

    # 取得頁面原始碼
    html = driver.page_source

    # 使用 BeautifulSoup 解析 HTML
    soup = BeautifulSoup(html, 'lxml')
    
    # 找到所有的飯店網址
    elements = soup.find_all('a', class_="PropertyCard__Link")

    # 逐一檢視元素
    for elm in elements:
        
        if elm.get('href') == None:
            continue
        else:
            # 取得飯店連結
            hotelUrl = 'https://www.agoda.com' + elm.get('href')
            # print(hotelUrl)
        
            # 取得飯店名稱
            hotelName = elm.get('aria-label')
            # print(hotelName)

            # 放資料到 list 中
            listData.append({
                "name": hotelName,
                "link": hotelUrl
            })
        
    # print(listData)

    
# 下一頁
def nextPage():
    
    try:
        
        # 等待按鈕出現
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'button.Buttonstyled__ButtonStyled-sc-5gjk6l-0.jndwMN.btn.pagination2__next'))
        )

        # 找到下一頁元素
        btns = driver.find_elements(
            By.CSS_SELECTOR, 
            'button.Buttonstyled__ButtonStyled-sc-5gjk6l-0.jndwMN.btn.pagination2__next'
        )

        # 如果按鈕存在，就按下按鈕
        if btns:
            btns[0].click()

        # 睡個幾秒
        sleep(5)
        return True  # 還有下一頁
        
    except Exception as e:
        return False  # 沒有下一頁了
    
# 將 list 存成 json
def saveJson():
    with open('./agoda_基隆市.json', "w", encoding='utf-8') as file:
        file.write( json.dumps(listData, ensure_ascii=False, indent=4) ) # indent = 4 空4格

In [6]:
if __name__ == '__main__':
    
    # 清空存放資料的變數
    listData.clear()
    visit()
    
    while True:
        scroll()
        get_hotel_links()
        nx = nextPage()
        if not nx:
            break
            
    # 檢查飯店間數
    print(len(listData))

53


In [7]:
if __name__ == '__main__':
    
    # 將飯店清單存成 json 
    saveJson()

#### 開始爬取

In [8]:
# 讀取 json 檔案
def readJson():
    
    # 開啟 json 檔案
    with open('./agoda_基隆市.json', "r", encoding='utf-8') as file:
        
        #取得 json 字串
        strJson = file.read()
    
    # 將 json 轉成 list (裡面是 dict 集合)
    listResult = json.loads(strJson)
    print(f'總間數:{len(listResult)}')
    
    return listResult

# 正式爬取評論
def getComments(listResult):
    
    # 使用 Chrome 的 WebDriver
    driver = webdriver.Chrome(
        options = my_options,
        service = Service(ChromeDriverManager().install())
    )
    
    # 爬取所有評論
    for index, obj in enumerate(listResult):
        
        print(f"第{index}間開始")
        
        # 放置爬取的評論
        listComment = []
        
        driver.get(obj['link'])
        
        try:
            # 等待元素出現
            WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'p.HeaderCerebrum__Name'))
            )
        except TimeoutException:
            WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'h1.HeaderCerebrum__Name'))
            )
        
        # 飯店名稱
        hotel_name_element_p = driver.find_elements(
            By.CSS_SELECTOR,
            'p.HeaderCerebrum__Name'
        )

        hotel_name_element_h1 = driver.find_elements(
            By.CSS_SELECTOR,
            'h1.HeaderCerebrum__Name'
        )

        if hotel_name_element_p:
            hotelName = hotel_name_element_p[0].get_attribute('innerText')
        elif hotel_name_element_h1:
            hotelName = hotel_name_element_h1[0].get_attribute('innerText')
        print(hotelName)
        
        # 地址
        address = driver.find_element(
            By.CSS_SELECTOR,
            'span.Spanstyled__SpanStyled-sc-16tp9kb-0.gwICfd.kite-js-Span.HeaderCerebrum__Address'
        ).get_attribute('innerText')
        print(f'飯店地址：{address}')
        
        # 所有房型
        rooms = driver.find_elements(
            By.CSS_SELECTOR,
            'span.MasterRoom__HotelName'
        )
        
        rooms = [room.get_attribute('innerText') for room in rooms]
        rooms = ','.join(rooms)
        # print(rooms)
        
        # 設施標題
        facilityTitles = driver.find_elements(
            By.CSS_SELECTOR,
            'div.Box-sc-kv6pi1-0.immaYa.AmenitiesFacility div.Box-sc-kv6pi1-0.dtSdUZ h5'
        )
        
        facilityTitles = [x.get_attribute('innerText') for x in facilityTitles]
        # print(facilityTitles)
        
        # 設施清單
        facilitys = driver.find_elements(
            By.CSS_SELECTOR,
            'ul.Liststyled__ListStyled-sc-ksl08h-0.iTjiYt'
        )
        
        list_facilitys = []  # 放置所有設施內容
        
        # 逐一檢視設施清單中的設施內容
        for elm in facilitys:
            f = elm.find_elements(
                By.CSS_SELECTOR,
                'span.Spanstyled__SpanStyled-sc-16tp9kb-0.gwICfd.kite-js-Span'
            )
            
            f = [x.get_attribute('innerText') for x in f]
            facility_text = ','.join(f)
            list_facilitys.append(facility_text)
        
        # 附近景點
        vp = driver.find_elements(
            By.CSS_SELECTOR,
            '[data-element-name="about-hotel-whats-nearby-section"] ul.Liststyled__ListStyled-sc-ksl08h-0.iTjiYt span.Spanstyled__SpanStyled-sc-16tp9kb-0.gwICfd.kite-js-Span'
        )
        vp = [x.get_attribute('innerText') for x in vp]
        
        # 景點距離
        distance = driver.find_elements(
            By.CSS_SELECTOR,
            '[data-element-name="about-hotel-whats-nearby-section"] ul.Liststyled__ListStyled-sc-ksl08h-0.iTjiYt span.Spanstyled__SpanStyled-sc-16tp9kb-0.cUindZ.kite-js-Span'
        )
        distances = [x.get_attribute('innerText') for x in distance]
        
        # 所有景點與距離
        viewPoints = [x + ' ' + y for x, y in zip(vp, distances)]
        viewPoints = ','.join(viewPoints)
        # print(viewPoints)
        
        # 飯店整體分數
        try:
            totalScore = driver.find_element(
                By.CSS_SELECTOR,
                'span.Review__ReviewFormattedScore'
            ).get_attribute('innerText')
        except NoSuchElementException:
            try:
                totalScore = driver.find_element(
                    By.CSS_SELECTOR,
                    'div.ReviewScore-Number.ReviewScore-Number--line-height'
                ).get_attribute('innerText')
            except NoSuchElementException:
                print('沒有評論')
                print('=' * 50)
                continue

        print(f'飯店整體分數:{totalScore}')
        
        # 飯店整體評價
        tCM = driver.find_element(
            By.CSS_SELECTOR,
            'div.ReviewScoreText'
        ).get_attribute('innerText')
        print(f'飯店整體評價:{tCM}')
        
        # 各項目分數
        listScore = driver.find_elements(
            By.CSS_SELECTOR,        
            'div.Review-travelerGrade-Cell span'
        )
        
        listScore = [x.get_attribute('innerText') for x in listScore[1:12:2]]
        
        # 各項目名稱
        listName = driver.find_elements(
            By.CSS_SELECTOR,
            'span.Review-travelerGradeCategory'
        )
        listName = [x.get_attribute('innerText') for x in listName]
        
        print(f'各項目名稱{listName}')
        print(f'各項目分數{listScore}')
        
        # 已驗證評論數
        try:
            checkComment = driver.find_element(
                By.CSS_SELECTOR,
                'span.Review__SummaryContainer--left.Review__SummaryContainer__Text'
            ).get_attribute('innerText')
        except NoSuchElementException:
            continue
        
        checkComment = re.sub("\D", "", checkComment)
        print(f'已驗證評論數:{checkComment}')
        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight*0.9);")
        
        while True:
            
            try:
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, 'div.Review-comment'))
                )
            except TimeoutException:
                print(obj['link'])
                print(f"第{index}間沒有評論")
                print('=' * 50)
                break
            
            # 取得評論元素
            elements = driver.find_elements(
                By.CSS_SELECTOR,
                'div.Review-comment'
                )
            
            print("=" * 50)
            
            # 逐一檢視元素
            for elm in elements:

                # 取得分數
                sc = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-leftScore")
                
                if len(sc) > 0:
                    score = sc[0].get_attribute('innerText')
                else:
                    score = ''
                print(score)

                # 取得總體評論
                tc = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-leftScoreText")
                if len(tc) > 0:
                    totalComment = tc[0].get_attribute('innerText')
                    # print(tc)
                else:
                    totalComment = ''

                # 取得評論者
                nm_ = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-reviewer[data-info-type='reviewer-name'] strong")
                if len(nm_) > 0:
                    name_ = nm_[0].get_attribute('innerText')
                    # print(name_)
                else:
                    name_ = ''

                # 取得評論者國籍
                nt = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-reviewer[data-info-type='reviewer-name'] span")
                if len(nt) > 0:
                    nationality = nt[1].get_attribute('innerText')
                    # print(nationality)
                else:
                    nationality = ''

                # 取得入住人數(模式)
                gp = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-reviewer[data-info-type='group-name'] span")
                if len(gp) > 0:
                    group = gp[0].get_attribute('innerText')
                    # print(group)
                else:
                    group = ''

                # 取得房型
                rm = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-reviewer[data-info-type='room-type'] span")
                if len(rm) > 0:
                    roomType = rm[0].get_attribute('innerText')
                    # print(roomType)
                else:
                    roomType = ''

                # 取得住宿細節
                sd = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-reviewer[data-info-type='stay-detail'] span")
                if len(sd) > 0:
                    stayDetail = sd[0].get_attribute('innerText')
                    # print(stayDetail)
                else:
                    stayDetail = ''

                # 取得評論標題
                cmT = elm.find_elements(By.CSS_SELECTOR, "h3.Review-comment-bodyTitle")
                if len(cmT) > 0:
                    commentTitle = cmT[0].get_attribute('innerText')
                    # print(commentTitle)
                else:
                    commentTitle = ''
                if '”' in commentTitle:
                    commentTitle = commentTitle.replace('”', '')
                
                # 取得副標題
                sub = elm.find_elements(By.CSS_SELECTOR, "div.Review-comment-bodyText[data-type='positive'] div")
                if len(sub) > 0:
                    subtitle = sub[0].get_attribute('innerText')
                    # print(roomType)
                else:
                    subtitle = ''

                # 取得評論內容
                cm = elm.find_elements(By.CSS_SELECTOR, "p.Review-comment-bodyText[data-type='comment']")
                if len(cm) > 0:
                    comment = cm[0].get_attribute('innerText')
                    # print(comment)
                else:
                    comment = ''

                # 取得評論日期
                cmTime = elm.find_elements(By.CSS_SELECTOR, "span.Review-statusBar-date")
                if len(cmTime) > 0:
                    commentTime = cmTime[0].get_attribute('innerText')
                    # print(cmTime)
                else:
                    commentTime = ''
                
                listComment.append({
                    "評論分數": score,
                    "總評": totalComment,
                    "評論者": name_,
                    "評論者國籍": nationality,
                    "入住人數": group,
                    "房型": roomType,
                    "住宿細節": stayDetail,
                    "評論標題": commentTitle,
                    "評論副標題": subtitle,
                    "評論內容": comment,
                    "評論日期": commentTime,
                    "地址": address,
                    "所有房型": rooms,
                    "附近景點": viewPoints,
                    "飯店整體分數": totalScore,
                    "飯店整體評價": tCM,
                    "已驗證評論數": checkComment
                })

                # 遍歷列表，將 facilityTitles 中的元素作為 key，list_facilitys 中的元素作為 value，添加到字典中
                for title, facility in zip(facilityTitles, list_facilitys):
                    listComment[-1][title] = facility

                # 遍歷列表，將 listName 中的元素作為 key，listScore 中的元素作為 value，添加到字典中
                for Name, Score in zip(listName, listScore):
                    listComment[-1][Name] = Score

            sleep(0.5)
            
            # 檢查遮擋元素是否存在
            overlays = driver.find_elements(By.CSS_SELECTOR, "p.BackToSearch-dismissText")

            try:
                # 如果遮擋元素存在，點擊關閉元素
                if len(overlays) > 0:
                    overlays[0].click()
            except (ElementClickInterceptedException, ElementNotInteractableException) as e:
                pass
            
            try:
                # 等待元素出現
                WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, 'span.Review-tab'))
                )
                
                # 選取 agoda 評論
                butt = driver.find_elements(By.CSS_SELECTOR,'span.Review-tab')
                # print(f"{len(butt)}")
                butt[0].click()
            
            except (TimeoutException, ElementClickInterceptedException):
                pass

            
            # 找到下一頁按鈕
            btn = driver.find_elements(By.CSS_SELECTOR, 'i.ficon.ficon-24.ficon-carrouselarrow-right')
            
            try:
                if len(btn) == 0:
                    raise ElementClickInterceptedException
                # 按下按鈕
                btn[1].click()
                
            except ElementClickInterceptedException as e:

                # 將字典列表轉換為 DataFrame
                df = pd.DataFrame(listComment)

                # 將 DataFrame 儲存為 csv 檔
                fName = obj['name']
                
                if '/' in fName:
                    fName = fName.replace('/','_')
                if '\t' in fName:
                    fName = fName.replace('\t', '')
                if '|' in fName:
                    fName = fName.replace('|', '_')
                if '<' in fName:
                    fName = fName.replace('<', '_')
                if '>' in fName:
                    fName = fName.replace('>', '_')
                if '"' in fName:
                    fName = fName.replace('"', '_')
                    
                df.to_csv(f"./{fileName}/{fName}.csv", index=False, encoding='utf_8_sig', escapechar='_')
                
                print(f'第{index}間結束')
                link = obj['link']
                print(f'{link} 完成')
                print("=" * 50)
                
                break

            sleep(0.5)
        

In [9]:
if __name__ == '__main__':
    
    # 開始測量(爬取時間)
    startime = datetime.datetime.now()
    
    # 建立資料夾，不存在就新增 (os.getcwd()會取得當前的程式工作目錄)
    fileName = 'agodaHotelComments_基隆市'
    path = os.path.join(os.getcwd(), fileName)
    if not os.path.exists(path):
        os.makedirs(path)
    
    listResult = readJson()
    getComments(listResult)
    
    # 結束測量
    endtime = datetime.datetime.now()
    
    # 輸出結果
    print("執行時間：", endtime - startime)

總間數:53
第0間開始
粼島旅宿 (Spangle Inn)
飯店地址：基隆, 基隆市, 台灣
飯店整體分數:9.1
飯店整體評價:超棒
各項目名稱['整體狀況及整潔度', '服務', '設施與設備', '位置', 'CP值']
各項目分數['9.5', '9.3', '9.2', '9.2', '9.1']
已驗證評論數:62
10.0
10.0
9.6
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
9.2
10.0
10.0
10.0
9.2
8.4
10.0
9.2
8.8
10.0
8.8
8.8
9.2
9.2
9.2
9.2
8.0
8.8
8.0
6.4
7.6
7.2
5.6
6.4
6.0
6.0
6.4
4.8
9.2
10.0
10.0
9.6
9.6
9.2
10.0
10.0
10.0
9.2
9.6
8.4
6.4
6.4
10.0
8.4
7.2
10.0
第0間結束
https://www.agoda.com/zh-tw/spangle-inn_3/hotel/keelung-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&los=1&searchrequestid=35b024d6-697a-4a96-9ddb-102958177a94 完成
第1間開始
Hotel BEGINS 倉箱蜜境文旅 (Hotel BEGINS)
飯店地址：51號 Lane 446, Beining Road, 基隆, 基隆市, 台灣, 202
飯店整體分

8.7
6.7
6.7
10.0
8.7
8.0
9.3
7.3
8.0
8.7
9.7
8.7
10.0
7.3
8.7
10.0
10.0
8.7
9.7
8.3
8.7
8.0
6.0
8.0
9.3
9.3
8.7
7.3
7.7
9.0
7.3
7.0
10.0
10.0
8.3
6.3
9.3
7.7
8.3
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
9.6
10.0
10.0
10.0
10.0
10.0
10.0
9.6
10.0
10.0
10.0
10.0
9.2
10.0
10.0
10.0
10.0
9.6
10.0
10.0
9.6
10.0
10.0
10.0
9.6
9.2
10.0
10.0
9.6
8.8
9.6
9.6
9.2
10.0
9.2
10.0
9.6
9.6
9.2
9.6
10.0
9.6
10.0
9.6
10.0
9.2
10.0
9.6
9.2
9.2
10.0
10.0
9.6
9.2
10.0
8.8
9.2
9.2
9.2
8.4
9.2
8.8
9.2
8.8
8.8
9.2
8.8
9.2
10.0
9.6
8.8
8.8
8.8
9.2
8.8
9.6
9.2
9.2
9.2
9.2
8.8
8.4
8.8
8.8
8.8
8.8
8.8
8.0
8.8
8.4
7.6
8.4
9.2
7.6
8.8
8.8
8.0
8.0
8.0
8.8
8.8
7.6
8.4
7.6
8.0
8.4
8.8
7.6
7.6
7.6
8.0
8.0
8.0
7.2
7.2
7.6
7.2
7.2
7.2
7.2
6.8
6.4
6.4
6.8
6.0
6.0
5.6
5.2
2.8
3.2
8.4
8.4
10.0
8.0
10.0
10.0
5.2
9.2
8.8
9.2
10.0
7.2
6.0
10.0
8.0
7.6
9.6
8.0
10.0
9.2
9.2
10.0
9.2
8.0
7.6
7.6
8.4
8.4
8.0
10.0
9.2
7.2
6.8
8.4
8.4
8.4
8.8
10.0
8.0
10.0
5.2
10.0
8.0
7.2
8.8
9.2
7.6
7.6
10.0
10.0
8.4
6.0
9.2
8.8
8.8

7.2
8.0
9.6
6.8
7.2
7.2
10.0
10.0
9.6
8.4
10.0
6.4
9.5
10.0
8.4
4.0
10.0
10.0
10.0
7.6
10.0
9.6
10.0
10.0
10.0
5.6
10.0
8.8
9.2
9.2
8.8
9.2
10.0
8.8
10.0
10.0
9.6
10.0
9.2
10.0
10.0
10.0
8.8
10.0
4.4
9.2
9.6
9.6
9.2
10.0
6.8
3.6
7.2
4.4
9.6
9.2
8.4
6.4
6.8
5.2
9.6
8.0
8.0
9.6
6.4
8.8
7.2
5.2
5.6
4.8
8.4
8.0
4.4
3.6
9.2
2.0
2.4
6.8
6.8
7.6
6.8
5.6
7.6
5.2
8.8
7.2
5.6
6.8
8.8
6.4
7.6
6.0
8.4
5.2
2.8
5.2
7.6
6.8
6.8
7.6
7.6
3.2
8.8
6.8
6.0
7.2
6.8
8.0
6.4
7.6
6.8
8.0
8.0
5.6
8.8
7.2
3.6
6.4
8.8
7.6
6.4
6.4
8.8
7.2
9.6
6.0
5.2
8.4
6.4
6.4
6.8
10.0
6.8
8.4
8.0
6.0
7.6
8.4
8.8
8.4
6.8
8.8
8.0
9.6
9.2
9.6
8.0
8.4
7.2
6.0
10.0
7.6
6.4
8.0
7.2
8.4
10.0
7.2
6.4
2.0
9.2
7.6
8.4
8.0
7.6
6.0
7.6
8.8
7.2
7.2
4.4
10.0
10.0
5.2
8.0
3.2
3.6
8.0
8.0
8.0
9.0
6.3
6.0
7.0
7.7
5.3
4.7
6.3
8.7
4.7
5.7
7.3
8.0
8.3
8.0
7.3
8.0
7.7
6.0
8.0
8.3
6.3
7.0
7.3
6.3
8.3
9.3
7.3
6.3
7.7
9.7
7.0
5.7
8.0
7.0
9.0
7.7
7.3
8.0
10.0
8.0
9.7
5.7
6.7
8.0
7.3
8.3
8.7
7.3
7.7
7.7
6.0
8.3
8.0
7.3
6.3
9.7
10.0
10.0
10.0
10.0
9.6
9

8.8
8.4
10.0
9.6
10.0
8.4
8.0
9.6
8.4
10.0
9.6
9.2
10.0
10.0
4.8
9.6
10.0
10.0
7.6
10.0
10.0
7.6
9.6
9.2
10.0
8.0
10.0
8.4
8.8
10.0
9.6
10.0
10.0
8.4
9.6
9.2
10.0
9.2
8.4
10.0
10.0
9.6
10.0
10.0
9.2
10.0
9.6
10.0
6.8
6.8
8.8
8.4
10.0
9.2
10.0
5.6
9.2
5.2
10.0
8.4
8.4
6.8
9.2
8.0
8.4
8.8
8.8
10.0
8.8
8.8
7.6
6.0
4.8
7.6
10.0
7.2
10.0
10.0
9.6
8.4
6.0
7.6
9.2
6.4
6.0
9.2
8.0
9.6
6.8
8.0
8.4
8.4
8.8
7.2
7.2
8.8
5.6
10.0
8.8
10.0
10.0
9.6
10.0
9.6
10.0
9.2
10.0
9.6
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
9.6
10.0
9.6
9.6
10.0
9.6
10.0
10.0
10.0
10.0
9.6
9.6
8.8
10.0
10.0
10.0
10.0
9.2
10.0
9.6
9.2
9.6
9.2
9.6
9.6
8.8
8.8
8.8
8.8
8.8
8.4
9.2
8.4
8.8
8.4
8.0
8.0
7.6
8.4
8.4
8.0
8.0
8.0
7.6
8.0
8.0
7.6
8.0
6.8
7.6
6.8
6.8
7.2
6.8
6.4
5.6
6.8
6.0
6.4
5.2
5.6
5.2
4.4
5.2
5.2
3.6
4.0
3.2
2.0
8.8
8.8
10.0
8.4
7.6
9.6
10.0
6.8
6.0
9.6
8.8
10.0
7.2
8.8
9.2
7.6
10.0
8.4
8.8
8.4
7.6
8.8
10.0
5.2
7.6
10.0
8.8
9.2
10.0
8.0
9.6
9.6
10.0
9.2
9.6
2.0
6.8
8.0
7.6
10.0
8.0
9.2
8.0
8.8
8.8
9.2
9.2
10.0


8.0
8.4
7.2
5.2
10.0
9.6
8.0
10.0
9.2
10.0
10.0
8.4
8.8
8.8
8.8
9.6
10.0
10.0
9.6
9.2
10.0
8.0
5.6
10.0
4.4
4.8
6.8
8.0
7.6
7.6
8.8
6.4
6.8
8.4
5.6
8.4
5.2
8.0
9.2
7.6
7.6
10.0
9.2
4.8
4.8
9.2
6.0
10.0
10.0
7.6
10.0
6.8
6.8
8.8
9.2
6.0
6.4
5.6
4.8
9.6
10.0
9.6
9.2
8.8
10.0
10.0
8.0
10.0
第7間結束
https://www.agoda.com/zh-tw/just-live-inn-keelung/hotel/keelung-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&tspTypes=2&los=1&searchrequestid=35b024d6-697a-4a96-9ddb-102958177a94 完成
第8間開始
基隆阿樂哈大飯店 (Aloha Hotel Keelung)
飯店地址：信二路292之1號, 基隆市中心, 基隆市, 台灣, 20241
飯店整體分數:7.0
飯店整體評價:很好
各項目名稱['整體狀況及整潔度', '設施與設備', '位置', '服務', 'CP值']
各項目分數['6.8', '6.1', '8.2', '7.3', '6.9']
已驗證評論數:99
10.0
10.0
8.8
9.2
9.2
8.0
8.0
7.2
7.6


北都大飯店 (Beidoo Hotel)
飯店地址：信二路319號, 基隆市中心, 基隆市, 台灣, 20241
飯店整體分數:7.8
飯店整體評價:很好
各項目名稱['位置', '服務', '客房舒適度', 'CP值', '整體狀況及整潔度', '設施與設備']
各項目分數['8.8', '8.2', '7.8', '7.6', '7.5', '6.8']
已驗證評論數:338
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
9.2
9.6
10.0
9.2
10.0
9.2
8.8
9.6
9.6
8.4
9.2
9.2
8.8
8.8
9.2
8.8
8.8
8.8
8.0
8.4
8.4
8.4
8.4
8.4
8.4
8.8
8.8
8.4
8.4
8.0
8.0
8.0
8.0
8.8
7.2
6.8
7.6
7.6
6.8
7.6
7.2
7.6
7.6
6.8
7.2
7.6
7.2
7.2
6.8
7.2
7.2
6.8
6.4
6.4
6.4
6.4
6.4
6.0
4.8
5.6
4.8
4.4
4.0
3.6
3.2
9.2
9.2
8.8
9.6
9.6
9.2
9.6
9.6
9.2
8.4
8.0
8.0
10.0
9.2
8.0
7.6
8.0
10.0
10.0
7.6
9.2
9.6
9.2
6.8
10.0
9.2
8.4
8.8
10.0
10.0
4.4
7.6
10.0
9.6
7.2
10.0
8.0
4.8
7.6
10.0
10.0
8.4
6.0
10.0
6.8
6.8
8.0
7.2
8.0
6.8
7.6
6.4
8.8
5.6
7.2
4.4
3.6
7.6
8.0
6.0
8.4
8.4
7.2
7.6
8.4
5.6
6.8
6.4
8.8
8.4
7.2
8.8
8.0
4.8
8.8
4.4
9.6
10.0
4.8
6.8
6.4
6.8
6.8
6.0
9.6
3.6
8.4
8.0
7.6
7.2
4.8
7.2
6.8
9.2
5.2
6.8
8.0
8.4
4.8
8.4
8.0
6.8
5.6
8.8
4.8
7.6
7.2
6.0
8.4
7.6
7.6
6.4
8.8
8.0
4.8
8.8
8.0
4.4
6.8
6.8
6.0
4.4
6

Hotel BEGINS 倉箱蜜境文旅 (Hotel BEGINS)
飯店地址：51號 Lane 446, Beining Road, 基隆, 基隆市, 台灣, 202
飯店整體分數:9.1
飯店整體評價:超棒
各項目名稱['整體狀況及整潔度', '服務', 'CP值', '位置', '設施與設備']
各項目分數['9.6', '9.4', '9.2', '9.1', '9.0']
已驗證評論數:87
10.0
10.0
10.0
10.0
9.6
10.0
10.0
9.2
9.2
10.0
9.6
10.0
10.0
9.2
9.6
8.8
9.6
10.0
10.0
8.0
9.6
9.2
8.0
8.0
7.2
8.0
10.0
10.0
10.0
10.0
10.0
10.0
9.6
10.0
9.6
6.0
6.4
9.6
10.0
10.0
10.0
8.8
9.2
8.0
10.0
9.2
10.0
10.0
9.2
10.0
9.6
10.0
9.6
9.6
7.6
10.0
10.0
8.8
8.4
8.8
10.0
8.8
9.2
9.6
10.0
8.4
8.8
10.0
6.4
10.0
10.0
10.0
10.0
8.0
9.6
10.0
7.2
10.0
10.0
9.6
9.6
9.6
10.0
10.0
9.2
8.8
10.0
第15間結束
https://www.agoda.com/zh-tw/hotel-begins/hotel/taipei-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&tspTypes=

8.0
8.0
8.0
7.2
8.4
8.4
7.7
8.8
8.7
9.2
5.3
9.3
9.3
9.3
6.3
6.7
8.0
6.0
6.4
6.0
7.3
8.4
8.4
6.3
9.3
8.7
9.7
9.3
7.0
8.0
7.7
8.7
8.7
9.0
3.3
9.3
8.0
5.0
8.7
8.7
8.3
7.3
6.7
8.7
7.3
8.0
9.0
7.3
8.7
9.7
6.7
9.3
7.0
10.0
9.3
7.3
9.0
7.7
7.0
6.0
9.0
7.7
9.0
6.3
10.0
8.3
7.0
7.7
8.0
8.3
8.3
9.3
8.3
9.0
8.7
7.0
7.0
7.7
9.0
8.7
9.3
7.7
6.7
7.7
6.3
8.0
7.3
7.3
8.0
7.3
8.3
6.7
7.3
9.0
8.7
8.7
8.0
6.7
9.7
7.0
9.7
7.3
6.3
7.3
9.7
8.0
6.3
7.0
4.7
9.0
10.0
8.0
8.3
3.7
4.7
7.0
6.3
8.7
6.3
8.0
7.0
7.3
8.3
9.3
8.0
10.0
7.3
8.0
9.0
7.0
8.0
5.3
8.0
9.0
6.0
9.7
7.7
8.3
9.3
6.0
8.7
8.0
7.3
6.3
8.3
7.0
8.0
10.0
7.3
9.3
6.0
9.7
6.7
8.0
8.0
9.3
7.0
8.0
8.7
9.3
10.0
9.7
6.3
8.7
8.0
10.0
9.7
8.7
8.0
8.3
6.3
8.3
7.7
10.0
7.3
8.0
8.3
8.0
4.3
8.0
8.3
9.0
8.7
9.0
8.0
8.7
8.7
8.0
8.0
8.7
6.0
6.0
6.7
10.0
7.3
8.3
7.0
7.3
9.3
8.3
8.7
8.0
9.3
9.7
9.0
7.7
8.3
7.7
10.0
7.0
8.0
7.7
8.0
9.3
9.3
7.0
9.0
8.7
7.7
8.7
6.0
7.3
8.7
7.7
6.7
8.3
7.0
7.0
9.3
7.7
7.7
7.0
7.7
9.7
8.7
9.0
8.7
9.0
7.7
9.0
9.3
8.0
9.3
9.0
9.3
9.2
10.0
9

8.8
8.8
5.2
2.4
3.2
2.4
8.4
6.8
7.2
9.2
8.8
6.4
8.0
7.6
7.3
6.4
5.6
7.6
7.6
2.0
9.2
8.4
8.4
6.8
5.2
10.0
8.4
8.0
4.8
4.8
8.8
9.2
2.0
2.8
2.8
7.2
5.2
6.8
2.4
7.6
2.4
8.4
6.0
6.0
3.2
7.6
6.4
6.0
4.8
6.4
6.4
6.8
6.0
4.4
6.4
4.8
7.3
6.7
6.7
7.3
4.0
7.3
6.0
7.2
6.4
4.4
8.4
5.2
8.4
6.4
2.8
4.8
4.4
6.4
3.2
3.6
6.4
4.0
2.0
8.0
8.4
2.0
8.0
10.0
4.8
8.0
7.2
6.8
10.0
9.2
8.0
8.8
8.0
7.0
8.3
4.8
10.0
2.0
2.0
3.6
3.6
9.6
3.2
8.0
4.8
5.6
8.0
10.0
3.6
7.6
7.6
8.0
8.0
8.7
5.0
9.3
7.0
6.4
6.4
2.4
6.0
6.4
6.0
3.7
5.2
3.2
7.6
4.8
第23間結束
https://www.agoda.com/zh-tw/fuchia-hotel/hotel/keelung-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&tspTypes=9&los=1&searchrequestid=35b024d6-697a-4a96-9ddb-102958177a94 完成
第24間開始
Sea

8.0
7.2
8.8
8.8
7.2
9.6
7.6
8.4
8.8
9.2
9.2
8.4
9.2
8.4
10.0
8.0
10.0
8.8
8.7
4.3
9.7
8.7
6.3
9.7
9.0
6.7
9.0
7.3
8.0
6.0
8.7
6.3
8.0
9.3
9.3
6.0
9.7
8.3
8.7
8.7
6.3
8.3
7.0
7.3
8.3
9.0
7.0
9.0
8.3
9.0
8.3
6.4
9.6
8.4
9.2
2.4
6.4
7.6
3.6
7.3
7.0
9.0
9.3
10.0
3.6
10.0
8.0
6.8
8.0
7.3
10.0
10.0
第25間結束
https://www.agoda.com/zh-tw/harbor-view-hotel/hotel/keelung-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&tspTypes=9,9&los=1&searchrequestid=35b024d6-697a-4a96-9ddb-102958177a94 完成
第26間開始
恆昌商旅 (HengChang Business Hotel)
飯店地址：5F, No. 24, Ren 3rd Road, 基隆市中心, 基隆市, 台灣, 200
飯店整體分數:7.3
飯店整體評價:很好
各項目名稱['整體狀況及整潔度', '設施與設備', '位置', '客房舒適度', '服務', 'CP值']
各項目分數['7.1', '7.0', '8.1', '7.0', '7.9', '7.2']
第27間開始
瑞芳旅人客

9.0
8.0
8.0
9.0
10.0
7.0
10.0
9.0
8.0
8.0
7.0
5.0
10.0
10.0
6.0
5.0
8.0
8.0
8.0
8.0
8.0
9.0
10.0
8.3
7.5
8.0
8.0
8.0
9.0
8.0
9.0
10.0
7.0
9.0
10.0
10.0
10.0
9.0
3.8
10.0
8.0
7.0
9.0
8.0
10.0
9.0
8.0
9.0
8.0
5.0
10.0
6.0
6.0
8.8
9.2
8.0
7.0
8.0
9.0
9.0
7.0
8.0
5.0
3.8
8.0
7.1
9.0
10.0
10.0
8.0
8.0
8.0
8.0
10.0
10.0
6.0
8.0
5.0
10.0
10.0
7.0
https://www.agoda.com/zh-tw/manman-house-ruifang/hotel/all/taipei-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedrooms=&familyMode=false&adults=2&children=0&rooms=1&maxRooms=0&checkIn=2023-08-14&isCalendarCallout=false&childAges=&numberOfGuest=0&missingChildAges=false&travellerType=1&showReviewSubmissionEntry=false&currencyCode=TWD&isFreeOccSearch=false&isCityHaveAsq=false&tspTypes=1&los=1&searchrequestid=35b024d6-697a-4a96-9ddb-102958177a94
第31間沒有評論
第32間開始
永遠是晴天民宿 (Forever Sunny B&B)
飯店地址：瑞芳區, 台北市, 台灣
飯店整體分數:8.7
飯店整體評價:很讚
各項目名稱['整體狀況及整潔度', '設施與設備', '位置', '客房舒適度', '服務', 'CP值']
各項目分數['8.8', '8.6', '8.8', '8.6', '9.1', '8.3'

海華休閒別墅民宿 (Villa High Wharf)
飯店地址：萬里區, 台北市, 台灣
沒有評論
第44間開始
十分幸福民宿 (Very Happy Homestay)
飯店地址：平溪區, 台北市, 台灣
飯店整體分數:8.9
飯店整體評價:很讚
各項目名稱['整體狀況及整潔度', '設施與設備', '位置', '客房舒適度', '服務', 'CP值']
各項目分數['8.8', '8.5', '9.2', '8.8', '9.6', '8.7']
已驗證評論數:152
8.0
10.0
9.0
8.0
10.0
9.0
10.0
10.0
10.0
10.0
8.0
10.0
9.0
10.0
9.0
10.0
8.0
10.0
9.0
10.0
9.0
8.0
8.0
9.0
9.0
9.0
9.0
9.0
9.0
5.0
8.0
10.0
9.0
8.0
9.0
9.0
4.0
10.0
7.0
10.0
10.0
10.0
8.0
9.0
5.0
9.0
10.0
10.0
9.0
8.0
9.0
10.0
8.0
10.0
10.0
8.0
10.0
10.0
8.0
10.0
8.0
6.0
10.0
10.0
8.0
8.0
7.0
8.0
10.0
10.0
8.0
9.0
10.0
8.0
9.0
10.0
10.0
8.3
10.0
9.0
9.0
8.3
9.0
9.0
9.0
9.0
10.0
10.0
10.0
10.0
10.0
9.0
10.0
9.0
10.0
10.0
10.0
7.0
9.0
8.0
8.0
10.0
9.0
8.0
7.0
10.0
9.0
9.0
10.0
10.0
8.0
7.0
9.0
9.0
10.0
5.0
7.0
5.0
10.0
10.0
10.0
10.0
8.0
10.0
10.0
7.0
10.0
10.0
9.0
7.0
10.0
10.0
8.0
7.0
10.0
9.0
10.0
8.0
8.0
10.0
https://www.agoda.com/zh-tw/very-happy-homestay/hotel/all/taipei-tw.html?finalPriceView=1&isShowMobileAppPrice=false&cid=1891473&numberOfBedr

9.6
10.0
10.0
9.2
9.6
10.0
9.2
9.6
10.0
10.0
9.2
9.6
10.0
10.0
9.6
10.0
8.8
10.0
10.0
9.2
9.2
9.2
8.8
10.0
10.0
10.0
9.6
8.8
9.2
8.4
10.0
8.8
10.0
9.2
8.4
8.8
8.4
9.2
9.6
9.6
9.2
8.4
8.4
8.4
8.0
9.2
8.8
8.0
8.4
8.4
8.0
8.4
8.8
8.0
8.4
8.8
9.2
7.6
7.6
8.0
8.0
8.8
7.6
8.0
8.0
7.2
7.6
8.0
8.4
8.4
8.0
7.6
8.0
7.2
7.6
7.6
8.0
7.2
7.6
6.4
7.2
7.6
7.2
6.0
6.4
6.4
6.0
6.0
5.2
6.4
5.6
4.8
5.6
3.6
3.2
10.0
8.4
10.0
8.4
8.4
8.7
8.8
9.2
5.6
9.2
10.0
8.8
7.0
10.0
8.4
8.0
6.4
9.2
8.8
8.8
8.8
9.2
10.0
10.0
8.4
9.6
8.0
8.8
9.2
6.0
10.0
6.4
9.6
10.0
9.2
7.6
10.0
10.0
10.0
10.0
6.8
9.6
8.8
10.0
8.8
5.6
8.8
10.0
10.0
9.2
10.0
10.0
10.0
10.0
10.0
9.6
5.6
8.4
10.0
3.6
8.4
3.6
9.6
9.6
8.4
7.2
9.6
10.0
10.0
7.2
10.0
9.6
10.0
9.6
6.8
6.0
7.2
7.6
5.6
9.2
7.6
8.0
4.0
9.2
6.8
5.2
10.0
7.6
5.2
7.2
5.2
5.2
6.8
3.6
3.6
8.0
5.6
4.4
10.0
6.0
2.0
6.3
8.0
7.7
4.3
7.0
6.3
5.3
6.7
8.7
8.0
7.7
8.7
8.3
7.0
5.0
6.7
7.0
6.7
6.7
4.0
7.3
10.0
10.0
10.0
10.0
9.6
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
10.0
9.6
9.6
10.

#### 檢查爬取狀況

In [3]:
listResult = readJson()
len(listResult)
for index, obj in enumerate(listResult):
    if '員林鎮的1臥室公寓' in obj['name']:
        print(obj['name'] , index)

總間數:498


In [14]:
listResult = readJson()
len(listResult)

# 取得所有 CSV 檔案的路徑
csv_files = glob.glob('agodaHotelComments_台北市_1000_/*.csv')

for index, obj in enumerate(listResult):
    for csv in csv_files:
        if obj['name'] in csv:
            print(obj['name'] , index)

總間數:1127
摩莎曼拉精品旅館 - 台北車站館 (Moshamanla-Main Station) 858
富邦藝旅Folio Hotel Daan Taipei (Folio Hotel Daan Taipei) 877
台北官邸飯店 (Grandee Taipei) 906
台北亞太H帝國 (Taipei AP H Imperial) 1002
新尚旅店(防疫旅館) (Hotel 73 (Quarantine Hotel)) 1016
摩莎曼拉精品旅館 - 台北車站館 (Moshamanla-Main Station) 1021
富邦藝旅Folio Hotel Daan Taipei (Folio Hotel Daan Taipei) 1031
Studio Away From City 1032
珂曼旅館 (Comma Boutique Hotel ) 1037
烏來足立司拉別館 - 梯田背包溫泉旅店  (Sla Ulay Backpacker Hostel) 1038
悠逸行旅 (Uinn Travel Hostel) 1041
萬華區樓中樓 (END舊東京4A) 1046
太空艙旅舍衡陽館 (Space Inn Hengyang Branch) 1051
薆悅精品 (inhouse Boutique) 1070
福容大飯店 - 三鶯 (Fullon Sanyin Hotel) 1080
萬事達旅店西門店 (Wonstar Hotel Ximen II) 1090
台北家美飯店 (Welcome Hotel) 1100
Qiaoyuan Bed and Breakfast 1108
新莊客旅 (Landmark Inn) 1114
台北官邸飯店 (Grandee Taipei) 1124
