In [2]:
import time
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup


# --------------------------------------------------
# Create and configure Selenium WebDriver
# --------------------------------------------------
def create_driver():
    # Launch Chrome browser
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver


# --------------------------------------------------
# Navigate to Starbucks store search page
# --------------------------------------------------
def open_store_search(driver):
    driver.get('https://www.starbucks.co.kr/index.do')
    time.sleep(1)  # Allow initial page rendering

    # ActionChains is required for hover-based menus
    action = ActionChains(driver)

    # First-level menu (STORE)
    first_tag = driver.find_element(
        By.CSS_SELECTOR,
        '#gnb > div > nav > div > ul > li.gnb_nav03'
    )

    # Second-level menu (Find a Store)
    second_tag = driver.find_element(
        By.CSS_SELECTOR,
        '#gnb > div > nav > div > ul > li.gnb_nav03 > div > div > div > ul:nth-child(1) > li:nth-child(3) > a'
    )

    # Hover → Hover → Click sequence
    action.move_to_element(first_tag)\
          .move_to_element(second_tag)\
          .click()\
          .perform()


# --------------------------------------------------
# Select "Seoul" from the region list
# --------------------------------------------------
def select_seoul(driver):
    # Wait until Seoul button becomes clickable
    seoul_tag = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((
            By.CSS_SELECTOR,
            '#container > div > form > fieldset > div > section > '
            'article.find_store_cont > article > article:nth-child(4) > '
            'div.loca_step1 > div.loca_step1_cont > ul > li:nth-child(1) > a'
        ))
    )
    seoul_tag.click()


# --------------------------------------------------
# Select the first district (구) under Seoul
# --------------------------------------------------
def select_first_gu(driver):
    # Ensure all district buttons are loaded
    WebDriverWait(driver, 5).until(
        EC.presence_of_all_elements_located(
            (By.CLASS_NAME, 'set_gugun_cd_btn')
        )
    )
    gu_elements = driver.find_elements(
        By.CLASS_NAME, 'set_gugun_cd_btn'
    )

    # Click the first district (e.g., 종로구)
    gu_elements[0].click()

    # Wait until store list appears
    WebDriverWait(driver, 5).until(
        EC.presence_of_all_elements_located(
            (By.CLASS_NAME, 'quickResultLstCon')
        )
    )


# --------------------------------------------------
# Extract store data from HTML using BeautifulSoup
# --------------------------------------------------
def parse_store_data(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Each <li> contains one store's information
    stores = soup.find(
        'ul', 'quickSearchResultBoxSidoGugun'
    ).find_all('li')

    store_list, addr_list, lat_list, lng_list = [], [], [], []

    for store in stores:
        # Store name
        store_list.append(store.find('strong').text)

        # Address (remove phone number at the end)
        addr = store.find('p').text
        addr = re.sub(r'\d{4}-\d{4}$', '', addr).strip()
        addr_list.append(addr)

        # Coordinates stored as data attributes
        lat_list.append(store['data-lat'])
        lng_list.append(store['data-long'])

    # Convert extracted data to DataFrame
    return pd.DataFrame({
        'store': store_list,
        'addr': addr_list,
        'lat': lat_list,
        'lng': lng_list
    })


# --------------------------------------------------
# Orchestrator function
# --------------------------------------------------
def fetch_starbucks_seoul():
    driver = create_driver()
    try:
        open_store_search(driver)
        select_seoul(driver)
        select_first_gu(driver)
        return parse_store_data(driver)
    finally:
        # Always close the browser, even if an error occurs
        driver.quit()


# --------------------------------------------------
# Run crawler and save results
# --------------------------------------------------
if __name__ == "__main__":
    starbucks_df = fetch_starbucks_seoul()
    starbucks_df.to_csv(
        'starbucks_seoul.csv',
        index=False,
        encoding='utf-8-sig'
    )
    print("데이터가 starbucks_seoul.csv 파일로 저장되었습니다.")


데이터가 starbucks_seoul.csv 파일로 저장되었습니다.
