In [25]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import re

# Define your hotel data DataFrame here
hotels_df = pd.read_csv("hotels_data.csv")

class HotelCrawler:
    def __init__(self, url):
        self.url = url
        self.options = Options()
        self.options.add_argument('--headless')
        self.options.add_argument('--no-sandbox')
        self.options.add_argument('--disable-dev-shm-usage')
        self.options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36')

    def fetch_html(self):
        with webdriver.Chrome(options=self.options) as driver:
            driver.get(self.url)
            time.sleep(3)  # Adjust sleep time as necessary
            return driver.page_source

    def parse_html(self, html):
        return BeautifulSoup(html, "html.parser")

    def extract_hotel_name(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")

    def extract_hotel_data(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")

    def extract_one_night_rate(self, price):
        price = re.sub(r'[^\d.]', '', price)
        return int(price)

    def crawl(self):
        html = self.fetch_html()
        soup = self.parse_html(html)
        hotel_name = self.extract_hotel_name(soup)
        hotel_data = self.extract_hotel_data(soup)
        return hotel_name, hotel_data

    def extract_hotel_data(self, soup):
        room_table = soup.find_all("div", {"class": "MasterRoom"})
        df = pd.DataFrame(columns=["room_price", "room_name"])
        for room in room_table:
            room_name = room.find(class_="MasterRoom__HotelName").text
            room_price = room.find(class_="ChildRoom__PriceContainer").text
            room_price = self.extract_one_night_rate(room_price)
            df.loc[len(df)] = [room_price, room_name]
        return df

class BookingHotelCrawler(HotelCrawler):
    def extract_hotel_name(self, soup):
        return soup.find("h2", {"class": "pp-header__title"}).text

    # def extract_hotel_data(self, soup):
    #     df_rooms_table = pd.read_html(str(soup))[0]
    #     self.df_rooms_table = df_rooms_table
    #     df = pd.DataFrame(columns=["room_price", "room_name"])
    #     time.sleep(3)
    #     df["room_name"] = [desc[:desc.find("룸") + 1] for desc in df_rooms_table["객실 유형"]]
    #     df["room_price"] = [self.extract_one_night_rate(price.split("현재 요금 ")[1].split()[0]) for price in df_rooms_table["오늘 판매가"]]
    #     return df
    def extract_hotel_data(self, soup):
        df_rooms_table = pd.read_html(str(soup))[0]
        self.df_rooms_table = df_rooms_table
        df = pd.DataFrame(columns=["room_price", "room_name"])
        time.sleep(3)
        df["room_name"] = [desc[:desc.find("룸") + 1] for desc in df_rooms_table["객실 유형"]]
        print("1")
        df["room_price"] = [self.extract_one_night_rate(price.split("현재 요금 ")[1].split()[0]) for price in df_rooms_table["오늘 판매가"]]
        print("2")
        return df

class HotelCrawlerManager:
    def __init__(self, hotel_name, check_in, check_out, adults=2, children=0, rooms=1):
        self.hotel_name = hotel_name
        self.check_in = check_in
        self.check_out = check_out
        self.adults = adults
        self.children = children
        self.rooms = rooms
        self.hotel_id = hotels_df[hotels_df["Hotel Name"] == hotel_name]

    def get_crawler(self, site):
        url = self.generate_url(site)
        if site == "agoda":
            return AgodaHotelCrawler(url)
        elif site == "booking":
            return BookingHotelCrawler(url)
        elif site == "expedia":
            return ExpediaHotelCrawler(url)
        elif site == "trip":
            return TripHotelCrawler(url)
        elif site == "yanolja":
            return YanoljaHotelCrawler(url)
        elif site == "yeogi":
            return YeogiHotelCrawler(url)
        else:
            raise ValueError("Unsupported site")

    def generate_url(self, site):
        if site == "agoda":
            agoda_id = self.hotel_id["Agoda ID"].values[0]
            return f"https://www.agoda.com/ko-kr/{agoda_id}/hotel/seoul-kr.html?checkIn={self.check_in}&los=2"
        elif site == "booking":
            booking_id = self.hotel_id["Booking.com ID"].values[0]
            return f"https://www.booking.com/hotel/kr/{booking_id}.ko.html?checkin={self.check_in}&checkout={self.check_out}&group_adults={self.adults}&group_children={self.children}&no_rooms={self.rooms}"
        elif site == "expedia":
            expedia_id = self.hotel_id["Expedia ID"].values[0]
            return f"https://www.expedia.co.kr/{expedia_id}.Hotel-Information?chkin={self.check_in}&chkout={self.check_out}"
        elif site == "trip":
            trip_id = self.hotel_id["Trip.com ID"].values[0]
            return f"https://kr.trip.com/hotels/detail/?hotelId={trip_id}&checkIn={self.check_in}&checkOut={self.check_out}&adult={self.adults}&children={self.children}"
        elif site == "yanolja":
            yanolja_id = self.hotel_id["Yanolja ID"].values[0]
            return f"https://place-site.yanolja.com/places/{yanolja_id}"
        elif site == "yeogi":
            yeogi_id = self.hotel_id["Yeogi ID"].values[0]
            return f"https://www.yeogi.com/domestic-accommodations/{yeogi_id}?checkIn={self.check_in}&checkOut={self.check_out}&personal={self.adults}"
        else:
            raise ValueError("Unsupported site")

    def crawl(self, site):
        crawler = self.get_crawler(site)
        hotel_name, hotel_data = crawler.crawl()
        return hotel_name, hotel_data


In [26]:
url = "https://www.booking.com/hotel/kr/loisir-seoul-myeongdong.ko.html?aid=304142&label=gen173nr-1FCAsofUIUc2t5cGFyay1teWVvbmdkb25nLTNIF1gEaH2IAQGYARe4AQfIAQzYAQHoAQH4AQOIAgGoAgO4Arazg7IGwAIB0gIkMzNkM2MyZGQtYzZkMy00MmMyLWJkMmYtNDZlMmY3YzY5OGJh2AIF4AIB&sid=f43cc81a1aa33436cef9262f4ee7b75a&all_sr_blocks=126897619_0_2_0_0;checkin=2024-05-14;checkout=2024-05-15;dest_id=-716583;dest_type=city;dist=0;group_adults=2;group_children=0;hapos=1;highlighted_blocks=126897619_0_2_0_0;hpos=1;matching_block_id=126897619_0_2_0_0;no_rooms=1;req_adults=2;req_children=0;room1=A%2CA;sb_price_type=total;sr_order=popularity;sr_pri_blocks=126897619_0_2_0_0__11761062;srepoch=1715526140;srpvid=9fa469bc2c6603a6;type=total;ucfs=1&#hotelTmpl"

In [29]:
booking_com = HotelCrawlerManager("밀리오레 호텔 서울 명동", "2024-05-14", "2024-05-15").crawl("booking")

  df_rooms_table = pd.read_html(str(soup))[0]


IndexError: list index out of range

In [None]:
booking_com.

In [1]:
from crawling2 import AgodaHotelCrawler, BookingHotelCrawler, ExpediaHotelCrawler, TripHotelCrawler, YanoljaHotelCrawler, YeogiHotelCrawler
from hotelsurl import HotelURLManager
import datetime
import pandas as pd
import concurrent.futures
from tqdm import tqdm

def crawl_hotel(crawler, url):
    attempt = 0
    while attempt < 2:
        try:
            crawler_instance = crawler(url)
            return crawler_instance.crawl()
        except Exception as e:
            print(f"오류 발생: {e}. {crawler},{url} 재시도 중...")
            attempt += 1
            if attempt == 2:
                print("재시도 실패. 오류 반환합니다.")
                return None, None

def fetch_and_store_details(hotel_name, url, crawler):
    name, price = crawl_hotel(crawler, url)
    if name and (type(price) is pd.core.frame.DataFrame):
        site = url.split('/')[2]
        return {'Hotel Name': hotel_name, 'Price': price, 'Date': str(datetime.date.today()), 'Site': site}
    return None

def get_lowest_price(df, hotel_name):
    try:
        lowest_price = df[df["Hotel Name"] == hotel_name].sort_values("room_price").iloc[0].room_price
    except IndexError:
        return "No data available"
    return lowest_price

In [2]:

def get_hotel_lowest_price():
    start_time = datetime.datetime.now()  # 처리 시작 시간
    
    # 호텔 데이터 로드
    hotels = pd.read_csv('hotels_data.csv')
    hotel_names = hotels["Hotel Name"].to_list()
    
    # 크롤러 인스턴스 리스트
    crawlers = [AgodaHotelCrawler, BookingHotelCrawler, ExpediaHotelCrawler, TripHotelCrawler, YanoljaHotelCrawler, YeogiHotelCrawler]

    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
        futures = []
        for hotel_name in hotel_names:
            hotel_url = HotelURLManager(hotel_name, str(datetime.date.today()), str(datetime.date.today() + datetime.timedelta(days=1)))
            urls = [hotel_url.agoda_url(), hotel_url.booking_url(), hotel_url.expedia_url(), hotel_url.trip_url(), hotel_url.yanolja_url(), hotel_url.yeogi_url()]
            for url, crawler in zip(urls, crawlers):
                futures.append(executor.submit(fetch_and_store_details, hotel_name, url, crawler))

        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Crawling Hotels"):
            result = future.result()
            if result:
                results.append(result)

    # 결과 데이터 처리
    dataframes = [result['Price'] for result in results if result]
    for df, result in zip(dataframes, results):
        df['Hotel Name'] = result['Hotel Name']
        df['Date'] = result['Date']
        df['Site'] = result['Site']

    all_data = pd.concat(dataframes, ignore_index=True)
    json_data = {name: get_lowest_price(all_data, name) for name in hotel_names}

    # 메타데이터 생성
    end_time = datetime.datetime.now()  # 처리 종료 시간
    metadata = {
        "generatedAt": datetime.datetime.now().isoformat(),
        "processingTime": str(end_time - start_time),
        "count": len(json_data)
    }

    # 최종 JSON 데이터 반환
    return {
        "status": "success",
        "metadata": metadata,
        "data": json_data
    }

# get_hotel_lowest_price 함수 호출 예시
# result = get_hotel_lowest_price()
# print(result)

In [5]:

crawlers = [AgodaHotelCrawler, BookingHotelCrawler, ExpediaHotelCrawler, TripHotelCrawler, YanoljaHotelCrawler, YeogiHotelCrawler]

In [4]:
hotel_names

['호텔 스카이파크 명동 3호점',
 '밀리오레 호텔 서울 명동',
 '소테츠 프레사 인 서울 명동',
 '나인트리 호텔 명동',
 '프린스 호텔 명동',
 '스탠포드 호텔 명동']

In [10]:
# list of hotel names
hotel_names = ['호텔 스카이파크 명동 3호점', '밀리오레 호텔 서울 명동']
sites = ["agoda", "expedia"]
start_date = datetime.date.today()

crawlers = {"agoda":AgodaHotelCrawler, 
            "booking":BookingHotelCrawler,
            "expedia":ExpediaHotelCrawler,
            "trip":TripHotelCrawler,
            "yanolja":YanoljaHotelCrawler,
            "yeogi":YeogiHotelCrawler}
urls = {"agoda": HotelURLManager.agoda_url, 
        "booking": HotelURLManager.booking_url, 
        "expedia": HotelURLManager.expedia_url, 
        "trip": HotelURLManager.trip_url, 
        "yanolja": HotelURLManager.yanolja_url, 
        "yeogi": HotelURLManager.yeogi_url}

results = []

for hotel_name in hotel_names:
    for site in sites:
        hotel_url = HotelURLManager(hotel_name, str(start_date), str(start_date + datetime.timedelta(days=1)))
        url = urls[site](hotel_url)
        crawler = crawlers[site]
        result = fetch_and_store_details(hotel_name, url, crawler)
        results.append(result)

오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.AgodaHotelCrawler'>,https://www.agoda.com/ko-kr/hotel-skypark-myeongdong-iii/hotel/seoul-kr.html?checkIn=2024-05-11&los=2 재시도 중...
오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.AgodaHotelCrawler'>,https://www.agoda.com/ko-kr/hotel-skypark-myeongdong-iii/hotel/seoul-kr.html?checkIn=2024-05-11&los=2 재시도 중...
재시도 실패. 오류 반환합니다.
오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.expedia.co.kr/Seoul-Hotels-HOTEL-SKYPARK-Myeongdong-III.h4465738.Hotel-Information?chkin=2024-05-11&chkout=2024-05-12 재시도 중...
오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.expedia.co.kr/Seoul-Hotels-HOTEL-SKYPARK-Myeongdong-III.h4465738.Hotel-Information?chkin=2024-05-11&chkout=2024-05-12 재시도 중...
재시도 실패. 오류 반환합니다.
오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.expedi

In [11]:
results

[None,
 None,
 {'Hotel Name': '밀리오레 호텔 서울 명동', 'Price': Empty DataFrame
  Columns: [room_price, room_name]
  Index: [], 'Date': '2024-05-11', 'Site': 'www.agoda.com'},
 None]

In [7]:
get_hotel_price(hotel_names, start_date, sites)

Crawling Hotels:  25%|██▌       | 1/4 [00:12<00:38, 12.77s/it]

오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.AgodaHotelCrawler'>,https://www.agoda.com/ko-kr/hotel-skypark-myeongdong-iii/hotel/seoul-kr.html?checkIn=2024-05-11&los=2 재시도 중...
오류 발생: 'NoneType' object has no attribute 'find_all'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.booking.com/hotel/kr/loisir-seoul-myeongdong.ko.html?checkin=2024-05-11&checkout=2024-05-12&group_adults=2&group_children=0&no_rooms=1 재시도 중...
오류 발생: 'NoneType' object has no attribute 'find_all'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.booking.com/hotel/kr/skypark-myeongdong-3.ko.html?checkin=2024-05-11&checkout=2024-05-12&group_adults=2&group_children=0&no_rooms=1 재시도 중...


Crawling Hotels:  50%|█████     | 2/4 [00:26<00:26, 13.37s/it]

오류 발생: 'NoneType' object has no attribute 'text'. <class 'crawling2.AgodaHotelCrawler'>,https://www.agoda.com/ko-kr/hotel-skypark-myeongdong-iii/hotel/seoul-kr.html?checkIn=2024-05-11&los=2 재시도 중...
재시도 실패. 오류 반환합니다.


Crawling Hotels:  75%|███████▌  | 3/4 [00:39<00:12, 12.97s/it]

오류 발생: 'NoneType' object has no attribute 'find_all'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.booking.com/hotel/kr/loisir-seoul-myeongdong.ko.html?checkin=2024-05-11&checkout=2024-05-12&group_adults=2&group_children=0&no_rooms=1 재시도 중...
재시도 실패. 오류 반환합니다.


Crawling Hotels: 100%|██████████| 4/4 [00:39<00:00,  9.89s/it]

오류 발생: 'NoneType' object has no attribute 'find_all'. <class 'crawling2.ExpediaHotelCrawler'>,https://www.booking.com/hotel/kr/skypark-myeongdong-3.ko.html?checkin=2024-05-11&checkout=2024-05-12&group_adults=2&group_children=0&no_rooms=1 재시도 중...
재시도 실패. 오류 반환합니다.





In [36]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import pandas as pd
import re
import json

class HotelCrawler:
    def __init__(self, url):
        self.url = url
        self.options = Options()
        self.options.add_argument('--headless')
        self.options.add_argument('--no-sandbox')
        self.options.add_argument('--disable-dev-shm-usage')
        self.options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36')

    def fetch_html(self):
        with webdriver.Chrome(options=self.options) as driver:
            driver.get(self.url)
            time.sleep(3)  # Adjust sleep time as necessary
            return driver.page_source

    def parse_html(self, html):
        return BeautifulSoup(html, "html.parser")

    def extract_hotel_name(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")

    def extract_hotel_data(self, soup):
        raise NotImplementedError("This method should be overridden by subclasses")
        
    def extract_one_night_rate(self, price):
        price = re.sub(r'[^\d.]', '', price)
        return int(price)

    def crawl(self):
        html = self.fetch_html()
        self.soup = self.parse_html(html)
        hotel_name = self.extract_hotel_name(self.soup)
        hotel_data = self.extract_hotel_data(self.soup)
        return hotel_name, hotel_data

In [37]:
class BookingHotelCrawler(HotelCrawler):
    def extract_hotel_name(self, soup):
        return soup.find("h2", {"class": "pp-header__title"}).text

    def extract_hotel_data(self, soup):
        # rooms_rooms_table = soup.find("div", {"id": "rooms_table"})
        # rooms_table = rooms_rooms_table.find("table")
        df_rooms_table = pd.read_html(str(soup))[0]
        df = pd.DataFrame(columns=["room_price", "room_name"])
        time.sleep(3)
        df["room_name"] = [desc[:desc.find("룸") + 1] for desc in df_rooms_table["객실 유형"]]
        df["room_price"] = [self.extract_one_night_rate(price.split("현재 요금 ")[1].split()[0]) for price in df_rooms_table["오늘 판매가"]]    
        return df

In [38]:
url = "https://www.booking.com/hotel/kr/skypark-myeongdong-3.ko.html?aid=304142&label=gen173nr-1FCAsofUIUc2t5cGFyay1teWVvbmdkb25nLTNIF1gEaH2IAQGYARe4AQfIAQzYAQHoAQH4AQOIAgGoAgO4AvLUgLIGwAIB0gIkMTdkYjZlYzItMTk3Zi00YjY3LWFmM2YtY2E2ZjMyODMwMTM52AIF4AIB&sid=f43cc81a1aa33436cef9262f4ee7b75a&all_sr_blocks=30686014_357521673_0_2_0;checkin=2024-05-12;checkout=2024-05-13;dest_id=-716583;dest_type=city;dist=0;group_adults=2;group_children=0;hapos=1;highlighted_blocks=30686014_357521673_0_2_0;hpos=1;matching_block_id=30686014_357521673_0_2_0;no_rooms=1;req_adults=2;req_children=0;room1=A%2CA;sb_price_type=total;sr_order=popularity;sr_pri_blocks=30686014_357521673_0_2_0__10614240;srepoch=1715481224;srpvid=591b11fffb450079;type=total;ucfs=1&#hotelTmpl"

In [39]:
booking = BookingHotelCrawler(url=url)

In [40]:
booking.crawl()

  df_rooms_table = pd.read_html(str(soup))[0]


('호텔 스카이파크 명동 3호점',
    room_price            room_name
 0      106142                     
 1      132678             스탠다드 더블룸
 2      134521                  트윈룸
 3      158049                  트윈룸
 4      134521                  더블룸
 5      155823                  더블룸
 6      139478  [K-라면 패키지] 스탠다드 더블룸
 7      139478  [K-라면 패키지] 스탠다드 트윈룸)

In [18]:
expdia.soup

<html><head><title>로봇인가요, 아닌가요?</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta content="width=device-width, initial-scale=1.0" name="viewport"/><meta content="yes" name="apple-mobile-web-app-capable"/><meta content="yes" name="mobile-web-app-capable"/><meta content="#000000" name="theme-color"/><link href="/favicon.ico" rel="shortcut icon" sizes="16x16 32x32" type="image/x-icon"/><link href="/favicon.ico" rel="icon" sizes="16x16 32x32" type="image/x-icon"/><link href="/favicon.ico" rel="apple-touch-icon"/><style>:root{background-color:#eff3f7;cursor:default;font-family:Centra No2,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,sans-serif;-webkit-font-smoothing:antialiased;-webkit-text-size-adjust:100%;text-size-adjust:100%}body{--egds-legacy-background-color:#eff3f7;background-color:var(--egds-surface-container-variant,var(--egds-legacy-background-color));color:var(--egds-on-surface,unset)}:root,a,abbr,address,ar

In [72]:
agoda_my=AgodaHotelCrawler(url=url)

In [73]:
agoda_my.crawl()

('호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)',
   room_price                                          room_name
 0     134694                 스탠다드 트윈룸 A  (Standard Twin Room A)
 1     134694                스탠다드 더블룸 A (Standard Double Room A)
 2     146000  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
 3     146000          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
 4     140467       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
 5     140467     스타샤인 스탠다드 더블룸 (Starshine Standard Double Room)
 6     163177  스탠다드 트윈룸 (특가 상품) (Standard Twin Room - Special...
 7     163177  스탠다드 더블룸 (특가 상품) (Standard Double Room - Speci...)

In [55]:
agoda_my.room.find(class_="MasterRoom__HotelName").text

'스탠다드 트윈룸 A  (Standard Twin Room A)'

In [44]:
agoda_my.crawl()

NameError: name 'room' is not defined

In [29]:
agoda_my.soup

<html class="chrome no-js" dir="ltr" kite-lang="ko" lang="ko"><head><script async="" src="https://www.googletagmanager.com/gtag/js?id=G-T408Z268D2&amp;l=dataLayer&amp;cx=c" type="text/javascript"></script><script async="" src="https://www.googletagmanager.com/gtm.js?id=GTM-5TXL8JK&amp;gtm_auth=&amp;gtm_preview=&amp;gtm_cookies_win=x"></script><script>
      (function(w,d,s,l,i){w[l]=w[l]||[];
        w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js', });
        var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';
        j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl+'&gtm_auth=&gtm_preview=&gtm_cookies_win=x';
        f.parentNode.insertBefore(j,f);
      })(window,document,'script','dataLayer','GTM-5TXL8JK');</script>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="origin-when-cross-origin" name="referrer"/>
<title>호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong

In [2]:

# import needed libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time

import pandas as pd

import smtplib
from email.message import EmailMessage

# import schedule

flight_inputs = {
    'Departure': "LAX", 
    'Destination': "ABQ",
    'Date_leaving': "May 5, 2024",
    'Date_returning': "May 15, 2024"
}

def find_cheapest_flights(flight_info):
    print(flight_info)
    PATH = r'/Users/mateus/Desktop/personalProjects/chromedriver'
    # Create Chromeoptions instance 
    options = webdriver.ChromeOptions() 
    
    # Adding argument to disable the AutomationControlled flag 
    options.add_argument("--disable-blink-features=AutomationControlled") 
    
    # Exclude the collection of enable-automation switches 
    options.add_experimental_option("excludeSwitches", ["enable-automation"]) 
    
    # Turn-off userAutomationExtension 
    options.add_experimental_option("useAutomationExtension", False) 
    
    # Setting the driver path and requesting a page 
    driver = webdriver.Chrome(options=options) 
    
    # Changing the property of the navigator value for webdriver to undefined 
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    leaving_from = flight_info['Departure']
    going_to = flight_info['Destination']
    trip_leave_date = flight_info['Date_leaving']
    trip_date_return = flight_info['Date_returning']

    driver.get('https://www.expedia.com/');
    time.sleep(.5) 

    #click on flights
    flight_xpath = '//a[@aria-controls="search_form_product_selector_flights"]'
    flight_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, flight_xpath)))
    flight_element.click()
    time.sleep(.5)

    #click on roundtrip
    roundtrip_xpath = '//a[@aria-controls="FlightSearchForm_ROUND_TRIP"]'
    roundtrip_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, roundtrip_xpath)))
    time.sleep(.5)

    """ fill out form for departure """
    #click on leaving from
    leaving_from_xpath = '//button[@aria-label="Leaving from"]'
    leaving_from_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, leaving_from_xpath)))
    # leaving_from_element.clear()
    leaving_from_element.click()
    print(leaving_from)
    print(leaving_from_element)
    leaving_from_text_xpath = '//input[@id="origin_select"]'
    leaving_from_text_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, leaving_from_text_xpath)))
    time.sleep(1)
    #leaving_from_text_element.send_keys(leaving_from)
    for i in range(len(leaving_from)):
        leaving_from_text_element.send_keys(leaving_from[i])
        time.sleep(.05)
    time.sleep(1)
    leaving_from_text_element.send_keys(Keys.DOWN,Keys.RETURN)
    time.sleep(1)

    """" fill out form for destination """
    going_to_xpath = '//button[@aria-label="Going to"]'
    going_to_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, going_to_xpath)))
    going_to_element.click()
    time.sleep(1)
    leaving_from_text_xpath = '//input[@id="destination_select"]'
    leaving_from_text_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, leaving_from_text_xpath)))
    time.sleep(1)
    for i in range(len(going_to)):
        leaving_from_text_element.send_keys(going_to[i])
        time.sleep(.05)
    time.sleep(1)
    leaving_from_text_element.send_keys(Keys.DOWN, Keys.RETURN)
    time.sleep(1)

    """ get the appropriate dates of the flights """

    """ click on the departing date"""
    date_xpath = '//button[@data-stid="uitk-date-selector-input1-default"]'
    calender_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, date_xpath)))
    calender_element.click()
    time.sleep(.5)

    trip_date_xpath = '//div[contains(@aria-label, "{}")]/..'.format(trip_leave_date)

    departing_date_element = ""
    while departing_date_element == "":
        try:
            departing_date_element = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.XPATH, trip_date_xpath)))
            departing_date_element.click() #Click on the departure date
        except TimeoutException:
           departing_date_element= ""
           next_month_xpath = '//button[@data-stid="uitk-calendar-navigation-controls-next-button"]'
           next_month_element = WebDriverWait(driver,2).until(EC.presence_of_element_located((By.XPATH, next_month_xpath)))
           next_month_element.click()
           time.sleep(1)
    
    """click on return date"""
    return_trip_date_xpath = '//div[contains(@aria-label, "{}")]/..'.format(trip_date_return)
    return_date_element = ""
    while return_date_element == "":
        try:
            return_date_element = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.XPATH, return_trip_date_xpath)))
            return_date_element.click() #Click on the return date
        except TimeoutException:
           return_date_element= ""
           next_month_xpath = '//button[@data-stid="uitk-calendar-navigation-controls-next-button"]'
           next_month_element = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.XPATH, next_month_xpath)))
           next_month_element.click()
           time.sleep(1)
    
    #exit out of the calender and return to the home page
    calender_element.send_keys(Keys.ESCAPE)

    """ click on finding flights"""
    search_flights_xpath = '//button[@id="search_button"]'
    search_flights_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, search_flights_xpath)))
    search_flights_element.click()
    time.sleep(15)

    """ sort the flights by lowest price"""
    nonstop_flights_xpath = '//input[@name="NUM_OF_STOPS"]'
    nonstop_flights_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, nonstop_flights_xpath)))
    if len(driver.find_elements(By.XPATH,nonstop_flights_xpath)) > 0:
        print("found nonstop flights")
        nonstop_flights_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, nonstop_flights_xpath)))
        nonstop_flights_element.click()
        time.sleep(1)
    time.sleep(1)

find_cheapest_flights(flight_inputs)

{'Departure': 'LAX', 'Destination': 'ABQ', 'Date_leaving': 'May 5, 2024', 'Date_returning': 'May 15, 2024'}


SessionNotCreatedException: Message: session not created: Chrome failed to start: exited normally.
  (session not created: DevToolsActivePort file doesn't exist)
  (The process started from chrome location /usr/bin/google-chrome is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
#0 0x56cf86de5e43 <unknown>
#1 0x56cf86ad44e7 <unknown>
#2 0x56cf86b08a66 <unknown>
#3 0x56cf86b049c0 <unknown>
#4 0x56cf86b4d7f0 <unknown>
#5 0x56cf86b411f3 <unknown>
#6 0x56cf86b1228a <unknown>
#7 0x56cf86b12c5e <unknown>
#8 0x56cf86daa16b <unknown>
#9 0x56cf86dae0bb <unknown>
#10 0x56cf86d96281 <unknown>
#11 0x56cf86daec22 <unknown>
#12 0x56cf86d7b13f <unknown>
#13 0x56cf86dd4f98 <unknown>
#14 0x56cf86dd5170 <unknown>
#15 0x56cf86de4f94 <unknown>
#16 0x7c56cd494ac3 <unknown>


In [49]:
hotels = pd.read_csv('hotels_data.csv')
hotel_names = hotels["Hotel Name"].to_list()
# Usage
hotel_manager = HotelCrawlerManager(hotel_names[4], "2024-05-12", "2024-05-13")
hotel_name, hotel_data = hotel_manager.crawl("yeogi")
print(hotel_name)
print(hotel_data)

호텔 프린스
    room_price                        room_name
0       100000  [반나절 호캉스-숙박불가] 트윈 A (당일 8시간 이용)
1       110000  [반나절 호캉스-숙박불가] 더블 A (당일 8시간 이용)
2       110000  [반나절 호캉스-숙박불가] 트윈 B (당일 8시간 이용)
3       120000                             더블 A
4       120000                             트윈 B
5       120000                             트윈 A
6       120000  [반나절 호캉스-숙박불가] 더블 B (당일 8시간 이용)
7       120000  [반나절 호캉스-숙박불가] 트윈 C (당일 8시간 이용)
8       130000                             트윈 C
9       130000                             더블 B
10      160000                              트리플
11      200000                           디럭스 트윈
12      220000                               온돌
13      240000                      스위트 (남산타워뷰)


In [57]:
hotel_names

['호텔 스카이파크 명동 3호점',
 '밀리오레 호텔 서울 명동',
 '소테츠 프레사 인 서울 명동',
 '나인트리 호텔 명동',
 '프린스 호텔 명동',
 '스탠포드 호텔 명동']

In [6]:
from crawling import HotelCrawlerManager
import pandas as pd
import datetime

hotel_names = ['호텔 스카이파크 명동 3호점',
#  '밀리오레 호텔 서울 명동',
#  '소테츠 프레사 인 서울 명동',
#  '나인트리 호텔 명동',
#  '프린스 호텔 명동',
 '스탠포드 호텔 명동']

sites = ["yeogi", 
        #  "agoda", 
        #  "yanolja", 
         "booking"]

start_date = str(datetime.date.today())
end_date = str(datetime.date.today() + datetime.timedelta(days=1))
# hotel_manager = HotelCrawlerManager('호텔 스카이파크 명동 3호점', "2024-05-12", "2024-05-13")
# hotel_name, hotel_data = hotel_manager.crawl(sites[3])

In [7]:

def get_hotel_price(hotel_names:list, start_date, end_date, sites:list):
    results = []
    for hotel_name in hotel_names:
        hotel_manager = HotelCrawlerManager(hotel_name, start_date, end_date)
        try:
            for site in sites:
                _ , hotel_data = hotel_manager.crawl(site)
                results.append((hotel_name, hotel_data, site))
        except Exception as e:
            print(f"Error occurred while processing hotel {hotel_name} on site {site}: {e}")
    return results

def merge_hotel_data(result_list:list):
    df = pd.DataFrame(columns=["Hotel Name", "Site"])
    for hotel_name, hotel_data, site in result_list:
        hotel_data["Hotel Name"] = hotel_name
        hotel_data["Site"] = site
        df = pd.concat([df, hotel_data], ignore_index=True)
    return df

In [13]:
result

[('호텔 스카이파크 명동 3호점',
     room_price                      room_name       Hotel Name        Date  \
  0      107730                        스탠다드 트윈  호텔 스카이파크 명동 3호점  2024-05-12   
  1      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 트윈  호텔 스카이파크 명동 3호점  2024-05-12   
  2      109395          [K-Ramen 패키지] 스탠다드 트윈  호텔 스카이파크 명동 3호점  2024-05-12   
  3      129690            [조식 2인 패키지] 스탠다드 더블  호텔 스카이파크 명동 3호점  2024-05-12   
  
      Site  
  0  yeogi  
  1  yeogi  
  2  yeogi  
  3  yeogi  ,
  'yeogi'),
 ('호텔 스카이파크 명동 3호점',
     room_price            room_name       Hotel Name        Date     Site
  0      122472             스탠다드 트윈룸  호텔 스카이파크 명동 3호점  2024-05-12  booking
  1      124173                  트윈룸  호텔 스카이파크 명동 3호점  2024-05-12  booking
  2      150676                  트윈룸  호텔 스카이파크 명동 3호점  2024-05-12  booking
  3      129782  [K-라면 패키지] 스탠다드 트윈룸  호텔 스카이파크 명동 3호점  2024-05-12  booking,
  'booking'),
 ('스탠포드 호텔 명동',
  Empty DataFrame
  Columns: [room_price, room_name, Hotel Name, Date, Site]

In [9]:
hotel_lowest_price = price_data.groupby(['Hotel Name', 'Site'])['room_price'].min().reset_index()

hotels = list(hotel_lowest_price['Hotel Name'])
prices = list(hotel_lowest_price['room_price'])
numeric_prices = [p if isinstance(p, int) else None for p in prices]

[('호텔 스카이파크 명동 3호점',
     room_price                      room_name       Hotel Name
  0      107730                        스탠다드 트윈  호텔 스카이파크 명동 3호점
  1      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 트윈  호텔 스카이파크 명동 3호점
  2      109395          [K-Ramen 패키지] 스탠다드 트윈  호텔 스카이파크 명동 3호점
  3      129690            [조식 2인 패키지] 스탠다드 더블  호텔 스카이파크 명동 3호점,
  'yeogi'),
 ('호텔 스카이파크 명동 3호점',
     room_price            room_name
  0      122472             스탠다드 트윈룸
  1      124173                  트윈룸
  2      150676                  트윈룸
  3      129782  [K-라면 패키지] 스탠다드 트윈룸,
  'booking'),
 ('스탠포드 호텔 명동',
  Empty DataFrame
  Columns: [room_price, room_name]
  Index: [],
  'yeogi')]

In [11]:
def merge_hotel_data(result_list:list):
    df = pd.DataFrame(columns=["Hotel Name", "Date", "Site"])
    for hotel_name, hotel_data, site in result_list:
        hotel_data["Hotel Name"] = hotel_name
        hotel_data["Date"] = str(datetime.date.today())
        hotel_data["Site"] = site
        df = pd.concat([df, hotel_data], ignore_index=True)
    return df# merge_hotel_data(result)


In [14]:
price_data = merge_hotel_data(result)

  df = pd.concat([df, hotel_data], ignore_index=True)


In [18]:
price_data

Unnamed: 0,Hotel Name,Date,Site,room_price,room_name
0,호텔 스카이파크 명동 3호점,2024-05-12,yeogi,107730.0,스탠다드 트윈
1,호텔 스카이파크 명동 3호점,2024-05-12,yeogi,107730.0,[13시 체크인&K-Ramen 패키지] 스탠다드 트윈
2,호텔 스카이파크 명동 3호점,2024-05-12,yeogi,109395.0,[K-Ramen 패키지] 스탠다드 트윈
3,호텔 스카이파크 명동 3호점,2024-05-12,yeogi,129690.0,[조식 2인 패키지] 스탠다드 더블
4,호텔 스카이파크 명동 3호점,2024-05-12,booking,122472.0,스탠다드 트윈룸
5,호텔 스카이파크 명동 3호점,2024-05-12,booking,124173.0,트윈룸
6,호텔 스카이파크 명동 3호점,2024-05-12,booking,150676.0,트윈룸
7,호텔 스카이파크 명동 3호점,2024-05-12,booking,129782.0,[K-라면 패키지] 스탠다드 트윈룸


In [15]:
hotel_lowest_price = price_data.groupby(['Hotel Name', 'Site'])['room_price'].min().reset_index()

hotels = list(hotel_lowest_price['Hotel Name'])
prices = list(hotel_lowest_price['room_price'])
numeric_prices = [p if isinstance(p, int) else None for p in prices]

In [17]:
hotel_lowest_price

Unnamed: 0,Hotel Name,Site,room_price
0,호텔 스카이파크 명동 3호점,booking,122472.0
1,호텔 스카이파크 명동 3호점,yeogi,107730.0


In [21]:
type(prices[0])

float

In [126]:
# merge the results result[0][1], result[1][1] etc
df = pd.DataFrame(columns=["Hotel Name", "Price", "Date", "Site"])
for hotel_name, hotel_data in result:
    hotel_data["Hotel Name"] = hotel_name
    hotel_data["Date"] = start_date
    df = pd.concat([df, hotel_data], ignore_index=True)

In [127]:
df

Unnamed: 0,Hotel Name,Price,Date,Site,room_price,room_name
0,호텔 스카이파크 명동 3호점,,2024-05-12,,107730.0,스탠다드 더블
1,호텔 스카이파크 명동 3호점,,2024-05-12,,107730.0,스탠다드 트윈
2,호텔 스카이파크 명동 3호점,,2024-05-12,,107730.0,[13시 체크인&K-Ramen 패키지] 스탠다드 트윈
3,호텔 스카이파크 명동 3호점,,2024-05-12,,109395.0,[K-Ramen 패키지] 스탠다드 트윈
4,호텔 스카이파크 명동 3호점,,2024-05-12,,109395.0,[K-Ramen 패키지] 스탠다드 더블
...,...,...,...,...,...,...
162,스탠포드호텔 명동,,2024-05-12,,255600,"스탠다드 더블2인 조식, 주차불가"
163,스탠포드호텔 명동,,2024-05-12,,352000,패밀리 트윈주차불가
164,스탠포드호텔 명동,,2024-05-12,,385000,패밀리 트윈 엣지주차불가
165,스탠포드호텔 명동,,2024-05-12,,387600,"패밀리 트윈2인 조식, 주차불가"


In [106]:
for hotel in hotel_names:
    hotel_manager = HotelCrawlerManager(hotel, "2024-05-12", "2024-05-13")
    for site in sites:
        hotel_name, hotel_data = hotel_manager.crawl(site)
        print(hotel_name)
        print(hotel_data)


호텔 스카이파크 명동 3호점
   room_price                      room_name
0      107730                        스탠다드 더블
1      107730                        스탠다드 트윈
2      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 더블
3      109395          [K-Ramen 패키지] 스탠다드 트윈
4      109395          [K-Ramen 패키지] 스탠다드 더블
호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)
  room_price                                          room_name
0     102165                스탠다드 더블룸 A (Standard Double Room A)
1     116649  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
2     116649          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
3     118349       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
4     118349     스타샤인 스탠다드 더블룸 (Starshine Standard Double Room)
5     128106  스탠다드 더블룸 (특가 상품) (Standard Double Room - Speci...
호텔 스카이파크 명동 3호점
   room_price                               room_name
0      110250  스탠다드 더블3객실 한정★K-RAMEN + 얼리 체크인 1PM PKG
1      110250                                 스탠다드 트윈
2      110250                            

  df_rooms_table = pd.read_html(str(soup))[0]


호텔 스카이파크 명동 3호점
   room_price            room_name
0      112674             스탠다드 더블룸
1      114239                  트윈룸
2      138622                  트윈룸
3      114239                  더블룸
4      136670                  더블룸
5      119400  [K-라면 패키지] 스탠다드 더블룸
6      119400  [K-라면 패키지] 스탠다드 트윈룸
호텔 스카이파크 명동 3호점
   room_price                      room_name
0      107730                        스탠다드 트윈
1      107730                        스탠다드 더블
2      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 더블
3      109395          [K-Ramen 패키지] 스탠다드 더블
4      109395          [K-Ramen 패키지] 스탠다드 트윈
호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)
  room_price                                          room_name
0     100565                스탠다드 더블룸 A (Standard Double Room A)
1     116649  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
2     116649          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
3     118349       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
4     118349     스타샤인 스탠다드 더블룸 (Starshine Standard 

  df_rooms_table = pd.read_html(str(soup))[0]


호텔 스카이파크 명동 3호점
   room_price            room_name
0      111450             스탠다드 더블룸
1      112997                  트윈룸
2      137115                  트윈룸
3      112997                  더블룸
4      135184                  더블룸
5      118102  [K-라면 패키지] 스탠다드 더블룸
6      118102  [K-라면 패키지] 스탠다드 트윈룸
호텔 스카이파크 명동 3호점
   room_price                      room_name
0      107730                        스탠다드 더블
1      107730                        스탠다드 트윈
2      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 더블
3      109395          [K-Ramen 패키지] 스탠다드 트윈
4      109395          [K-Ramen 패키지] 스탠다드 더블
호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)
  room_price                                          room_name
0     100565                스탠다드 더블룸 A (Standard Double Room A)
1     116649  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
2     116649          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
3     118349       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
4     118349     스타샤인 스탠다드 더블룸 (Starshine Standard 

  df_rooms_table = pd.read_html(str(soup))[0]


호텔 스카이파크 명동 3호점
   room_price            room_name
0      111450             스탠다드 더블룸
1      112997                  트윈룸
2      137115                  트윈룸
3      112997                  더블룸
4      135184                  더블룸
5      118102  [K-라면 패키지] 스탠다드 더블룸
6      118102  [K-라면 패키지] 스탠다드 트윈룸


In [100]:
for hotel_name in hotel_names:
    for site in sites:
        hotel_manager = HotelCrawlerManager(hotel_name, start_date, end_date)
        # try:
        print(site)
        hotel_name, hotel_data = hotel_manager.crawl(site)
        print(hotel_name)
        print(hotel_data)
        # except Exception as e:
        #     print(f"Error occurred while processing hotel {hotel_name} on site {site}: {e}")


yeogi
호텔 스카이파크 명동 3호점
   room_price                      room_name
0      107730                        스탠다드 더블
1      107730                        스탠다드 트윈
2      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 더블
3      109395          [K-Ramen 패키지] 스탠다드 트윈
4      109395          [K-Ramen 패키지] 스탠다드 더블
agoda
호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)
  room_price                                          room_name
0     102165                스탠다드 더블룸 A (Standard Double Room A)
1     116649  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
2     116649          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
3     118349       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
4     118349     스타샤인 스탠다드 더블룸 (Starshine Standard Double Room)
5     128106  스탠다드 더블룸 (특가 상품) (Standard Double Room - Speci...
yanolja


IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# get the hotel price
def get_price(hotel_name:list, start_date:str, end_date:str, sites:list):
    for hotel in hotel_name:
        
        hotel_manager = HotelCrawlerManager(hotel, start_date, end_date)
        hotel_name, hotel_data = hotel_manager.crawl(sites)
        print(hotel_name)
        print(hotel_data)
        print("\n")


In [None]:

def crawl_hotel(crawler, url):
    attempt = 0
    while attempt < 2:
        try:
            crawler_instance = crawler(url)
            return crawler_instance.crawl()
        except Exception as e:
            print(f"오류 발생: {e}. {crawler},{url} 재시도 중...")
            attempt += 1
            if attempt == 2:
                print("재시도 실패. 오류 반환합니다.")
                return None, None

def fetch_and_store_details(hotel_name, url, crawler):
    name, price = crawl_hotel(crawler, url)
    if name and (type(price) is pd.core.frame.DataFrame):
        site = url.split('/')[2]
        return {'Hotel Name': hotel_name, 'Price': price, 'Date': str(datetime.date.today()), 'Site': site}
    return None

def get_lowest_price(df, hotel_name):
    try:
        lowest_price = df[df["Hotel Name"] == hotel_name].sort_values("room_price").iloc[0].room_price
    except IndexError:
        return "No data available"
    return lowest_price



def get_hotel_lowest_price():
    start_time = datetime.datetime.now()  # 처리 시작 시간
    
    # 호텔 데이터 로드
    hotels = pd.read_csv('hotels_data.csv')
    hotel_names = hotels["Hotel Name"].to_list()
    
    # 크롤러 인스턴스 리스트
    crawlers = [AgodaHotelCrawler, BookingHotelCrawler, ExpediaHotelCrawler, TripHotelCrawler, YanoljaHotelCrawler, YeogiHotelCrawler]

    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
        futures = []
        for hotel_name in hotel_names:
            hotel_url = HotelURLManager(hotel_name, str(datetime.date.today()), str(datetime.date.today() + datetime.timedelta(days=1)))
            urls = [hotel_url.agoda_url(), hotel_url.booking_url(), hotel_url.expedia_url(), hotel_url.trip_url(), hotel_url.yanolja_url(), hotel_url.yeogi_url()]
            for url, crawler in zip(urls, crawlers):
                futures.append(executor.submit(fetch_and_store_details, hotel_name, url, crawler))

        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Crawling Hotels"):
            result = future.result()
            if result:
                results.append(result)

    # 결과 데이터 처리
    dataframes = [result['Price'] for result in results if result]
    for df, result in zip(dataframes, results):
        df['Hotel Name'] = result['Hotel Name']
        df['Date'] = result['Date']
        df['Site'] = result['Site']

    all_data = pd.concat(dataframes, ignore_index=True)
    json_data = {name: get_lowest_price(all_data, name) for name in hotel_names}

    # 메타데이터 생성
    end_time = datetime.datetime.now()  # 처리 종료 시간
    metadata = {
        "generatedAt": datetime.datetime.now().isoformat(),
        "processingTime": str(end_time - start_time),
        "count": len(json_data)
    }

    # 최종 JSON 데이터 반환
    return {
        "status": "success",
        "metadata": metadata,
        "data": json_data
    }

# get_hotel_lowest_price 함수 호출 예시
# result = get_hotel_lowest_price()
# print(result)

In [60]:
import re
import datetime

In [65]:
hotel_names
sites = ["agoda", "expedia", "booking", "trip", "yanolja", "yeogi"]
start_date = str(datetime.date.today())
end_date = str(datetime.date.today() + datetime.timedelta(days=1))


In [62]:
# Collect results
results = []

for hotel_name in hotel_names:
    for site in sites:
        try:
            hotel_manager = HotelCrawlerManager(hotel_name, start_date, end_date)
            print(hotel_manager)
            hotel_name, hotel_data = hotel_manager.crawl(site)
            results.append({
                "hotel_name": hotel_name,
                "site": site,
                "data": hotel_data
            })
        except Exception as e:
            print(f"Error crawling {hotel_name} on {site}: {e}")

# Print results
for result in results:
    print(f"Hotel: {result['hotel_name']}, Site: {result['site']}")
    print(result['data'])
    print("-" * 80)

<crawling.HotelCrawlerManager object at 0x7d3e6fdea830>
<crawling.HotelCrawlerManager object at 0x7d3e6fe6ad40>
Error crawling 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III) on expedia: index 0 is out of bounds for axis 0 with size 0
<crawling.HotelCrawlerManager object at 0x7d3e7d0dd180>
Error crawling 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III) on booking: index 0 is out of bounds for axis 0 with size 0
<crawling.HotelCrawlerManager object at 0x7d3e6fdea5c0>
Error crawling 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III) on trip: index 0 is out of bounds for axis 0 with size 0
<crawling.HotelCrawlerManager object at 0x7d3e6fe6ad40>
Error crawling 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III) on yanolja: index 0 is out of bounds for axis 0 with size 0
<crawling.HotelCrawlerManager object at 0x7d3e6fdead70>
Error crawling 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III) on yeogi: index 0 is out of bounds for axis 0 with size 0
<crawling.HotelCrawlerManager object at 0x7d3e6fe6ad40>
<cr

KeyboardInterrupt: 

In [101]:
for hotel_name in hotel_names:
    for site in sites:
        try:
            hotel_manager = HotelCrawlerManager(hotel_name, start_date, end_date)
            print(f"Processing hotel: {hotel_name}, site: {site}")
            try:
                hotel_name, hotel_data = hotel_manager.crawl(site)
                print(hotel_name)
                print(hotel_data)
            except IndexError as e:
                print(f"IndexError occurred while processing hotel {hotel_name} on site {site}: {e}")
            except Exception as e:
                print(f"Error occurred while processing hotel {hotel_name} on site {site}: {e}")
        except Exception as e:
            print(f"Error occurred while initializing HotelCrawlerManager for hotel {hotel_name} on site {site}: {e}")


Processing hotel: 호텔 스카이파크 명동 3호점, site: yeogi
호텔 스카이파크 명동 3호점
   room_price                      room_name
0      107730                        스탠다드 트윈
1      107730                        스탠다드 더블
2      107730  [13시 체크인&K-Ramen 패키지] 스탠다드 더블
3      109395          [K-Ramen 패키지] 스탠다드 더블
4      109395          [K-Ramen 패키지] 스탠다드 트윈
Processing hotel: 호텔 스카이파크 명동 3호점, site: agoda
호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III)
  room_price                                          room_name
0     100565                스탠다드 더블룸 A (Standard Double Room A)
1     116649  스탠다드 트윈 (싱글베드 2개) (Standard Twin (2 Single beds))
2     116649          스탠다드 더블 (침대 1개) (Standard Double (1 Bed))
3     118349       스타샤인 스탠다드 트윈룸 (Starshine Standard Twin Room)
4     118349     스타샤인 스탠다드 더블룸 (Starshine Standard Double Room)
5     128106  스탠다드 더블룸 (특가 상품) (Standard Double Room - Speci...
Processing hotel: 호텔 스카이파크 명동 3호점 (Hotel Skypark Myeongdong III), site: yanolja
IndexError occurred while processing hotel 호텔