In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from bs4 import BeautifulSoup
import time
import csv

# 크롬 드라이버 자동 업데이트
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
class RetriesExceededError(Exception):
    def __init__(self, message):
        super().__init__(message)

        
def find_element_with_retry(driver_, by, value, waiting_sec=2.5, max_retries=10):
    retries = 0
    while retries < max_retries:
        try:
            element = WebDriverWait(driver_, waiting_sec).until(EC.presence_of_element_located((by, value)))
            return element
        except (NoSuchElementException, StaleElementReferenceException):
            retries += 1
            
    raise RetriesExceededError(f'{value}를 찾을 수 없음\n재시도 횟수 초과: {waiting_sec}초로 {max_retries}회 시도')

In [3]:
# 브라우저 꺼짐 방지
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
chrome_options.add_argument(f'user-agent={user_agent}')

# 불필요한 에러 메세지 없애기
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)

category = 112747  # CPU
# category = 112751  # 메인보드
# category = 112752  # 램
# category = 112753  # 그래픽카드
# category = 112760  # SSD
# category = 112763  # HDD
# category = 112775  # 케이스
# category = 112777  # 파워
# category = 11236855  # 쿨러
save_file = True  # 파일 저장 여부
waiting_sec = 3

In [4]:
# 크롤링 시작
driver.get(f'https://prod.danawa.com/list/?cate={category}')
driver.implicitly_wait(10)

# 한번에 보기 90개 설정
qnt_element = find_element_with_retry(driver, By.CSS_SELECTOR, '#productListArea > div.prod_list_opts > div.view_opt > div.view_item.view_qnt > select')
Select(qnt_element).select_by_value("90")
# Select(driver.find_element(By.CSS_SELECTOR, '#productListArea > div.prod_list_opts > div.view_opt > div.view_item.view_qnt > select')).select_by_value("90")
time.sleep(waiting_sec)

# 제품 번호 리스트 초기화
p_seq_list = []

while True:
    # bs 초기화
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    page_list = soup.select('div.number_wrap > a')  # 페이지 네비게이션 바

    for i in range(1, len(page_list)+1):
        target_page = find_element_with_retry(driver, By.CSS_SELECTOR, f'#productListArea > div.prod_num_nav > div > div > a:nth-child({i})')
        target_page.click()
        time.sleep(waiting_sec)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_list = soup.select('li.prod_item.prod_layer')  # 제품 리스트
        for product in product_list:
            if 'id' in product.attrs:
                p_seq_list.append(product['id'][11:])  # 제품 번호

    try:
        nav_next = find_element_with_retry(driver, By.CLASS_NAME, 'nav_next')
        nav_next.click()
        time.sleep(waiting_sec)
    except NoSuchElementException as e:
        print('다음 버튼이 없음')
        break
    except Exception as e:
        print(e)
        break
        
driver.quit()
# 크롤링 끝

Message: 



In [5]:
print(len(p_seq_list))

425


In [6]:
# 제품 번호 텍스트로 저장
if save_file:
    with open(f'seq_{category}.csv', 'w') as f:
        writer = csv.writer(f)

        writer.writerow(["seq"])
        for seq in p_seq_list:
            writer.writerow([seq])

In [2]:
a = ['A', 'B', 'C']
b = [1, 2, 3]
for q, w in zip(a, b):
    print(q, w)

A 1
B 2
C 3
