# 카카오 쇼핑 크롤링

목표
- 정적 페이지, 동적 페이지 구분하기
- `스크롤`을 활용하여 300개의 상품 정보 추출
- 상품 정보
  1. 상품명
  2. 상페 상품 페이지 링크
  3. 판매가격
  4. 판매처
  5. 배송정보 e.g. 무료배송, 유료배송
  6. 상품 이미지

chrome settings -> preferences -> disable javascript를 통해 동적인지 정적인지 확인해보니 `동적`이다.


In [None]:
from dataclasses import dataclass
import random
from time import sleep
from typing import ClassVar, Optional

import pandas as pd
from urllib.parse import urljoin
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.remote.webelement import WebElement
from bs4 import BeautifulSoup


KAKAO_SHOPPING_ROOT = "https://shoppinghow.kakao.com"
KAKAO_SHOPPING_PATH = "/siso/p/sale/mall/talkstore"

@dataclass
class DriverController:
  DEFAULT_DELAY: ClassVar[int] = 5 # 클래스 상수 ClassVar를 사용안하면 에러 발생
  RANDOM_SLEEP_MIN: ClassVar[float] = 0.8
  RANDOM_SLEEP_MAX: ClassVar[float] = 1.5
  driver: webdriver.Chrome
  start_url: Optional[str] = None

  # __init__ 후에 바로 실행되는 special method
  def __post_init__(self) -> None:
    if self.start_url:
      self.driver.get(self.start_url)

      WebDriverWait(self.driver, 10).until(
        lambda d: d.execute_script("return document.readyState") == "complete"
      )

  # 기본적으로 명시적 대기 기반 요소 찾기 
  def find_element(
    self, 
    selector: str, 
    driver_selector = By.CSS_SELECTOR, 
    delay = DEFAULT_DELAY
  ) -> WebElement:
    return WebDriverWait(self.driver, delay).until(
      EC.presence_of_element_located((driver_selector, selector))
    )
  
  def click_element(self, selector: str) -> None:
    element = self.find_element(selector)
    element.click()
    self._random_sleep()
  
  def key_input(self, selector: str, keys: list[str]) -> None:
    element = self.find_element(selector)
    element.send_keys(*keys)
    self._random_sleep()
  
  # 동일한 term으로 실행시 차단될 수 있으니 random한 값으로 sleep 수행
  def _random_sleep(self) -> None:
    sleep(random.uniform(self.RANDOM_SLEEP_MIN, self.RANDOM_SLEEP_MAX))
  
  
driver = webdriver.Chrome()
dc = DriverController(driver=driver, start_url=urljoin(KAKAO_SHOPPING_ROOT, KAKAO_SHOPPING_PATH))


In [33]:
# item이 300까지 일 때 까지 scroll

items_wrapper_selector = "#SaleSectionProductsWrap > li"

js_script = f"""
itemsLength = document.querySelectorAll(arguments[0]).length;
window.scrollBy(0, 2800);

return itemsLength;
"""

while True:
  items_length = driver.execute_script(js_script, items_wrapper_selector)
  sleep(1)
  
  if items_length >= 300: break


In [None]:
page_soup = BeautifulSoup(driver.page_source, "html.parser")
items = page_soup.select(items_wrapper_selector)
result = []

for item in items:
  product_name = item.select_one(".info_name").get_text(strip=True)
  product_link = urljoin(KAKAO_SHOPPING_ROOT, item.select_one("a")["href"])
  product_price = item.select_one(".info_sales").get_text(strip=True).replace(",", "").replace("원", "")
  product_seller = item.select_one(".txt_shop").text
  product_shipping = item.select_one(".info_etc").contents[-1]
  product_image_link = "https:" + item.select_one(".wrap_thumb > img")["src"]
  # print(product_name, product_link, product_price, product_seller, product_shipping, product_image_link, sep="\n")

  result.append([
    product_name, 
    product_link, 
    product_price, 
    product_seller, 
    product_shipping, 
    product_image_link
  ])



[['[2+2] 피죤 퓨어 시카 핸드솝 300mlx2개+2개', 'https://shoppinghow.kakao.com/go.daum?ikey=aQw00_.7oAmZKEKD9y_dGdPSTqj8EGyGfwaDQ&val=GT.c0iRowYzPzc4pMu4pOXHiOMuYgqQ5yNCI1Vg9z7xwj8eeJxVbFhNhaPVAUKWo5OM8BW48ciIQ8txB.uIRhuiH_hXRWRgoFIJOXdxss4Fzp486pL.4gflB_ZerlPXuCfeyuOszZUb.MvsN7GZ23UCFPMpzo.v2lShedFKFURSxFv7cIISfFR15UyV_NvXukzn9h4qAg6UXTn-8P_heJ_cpsxYNggHTHGzDC7BXTlTSCTN8LY.CfrDf15EHvgkdbWPECTVRw-pWerLga4ywolYk51g00&shopid=talkstore&gateyn=Y', '14900', '톡딜', '무료배송', 'https://shop3.daumcdn.net/thumb/R250x250.q90/?fname=http%3A%2F%2Fshop3.daumcdn.net%2Fshophow%2Fp%2FX16373603676.jpg%3Fut%3D202511'], ['피톤케어 올인원클린 프리미엄 캡슐세제 9입 30입 건조기시트 모음 / 꽃집향 아기세제 유아세제', 'https://shoppinghow.kakao.com/go.daum?ikey=aQw00_.7oAmZKEKD9y_dGdPSTqj8EGyGfwaDQ&val=CC4g0ETnLV4TkHnfoMDM6vvhgESmir57So_Tu1dSczCfED3bPqjX6pFzyCmYvaHkOqS7hx6tsvLl3SqVh-8lSxxrX9y4m_KXN5WVUrJOdBYHJ1KpsU2EPnOt38uEtgv_PhdPvHg89sxbVd1dTwl6pK9DgKLgH.dEPREYKtcgEseMuPC.dWjTMgBil9S7rpF.m-UV.-L3NJJC_ybelpgqVqCxCT6Y9nJHBFsWogOSj8RK3keuZh2ac4uWxMxVS4XT8KzTqEYJ

In [51]:
import pandas as pd


df = pd.DataFrame(result, columns=["상품명", "상품 링크", "가격", "판매처", "배송정보", "상품 이미지"])
df.to_excel("./outputs/test.xlsx",  index=False)


In [52]:
driver.quit()
