!pip install selenium

In [54]:
import requests
import json

In [55]:
def get_twentyninecm_category(category1_code):
    url = 'https://cache.29cm.co.kr/item/category/'
    params = {
        'category1_code': category1_code,
        'sortBy': 'VISIT_COUNT'
    }
    response = requests.get(url, params=params)

    try:
        response.raise_for_status()
        print("Request was successful!")
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error: {err}")

    results = response.json()
    return results


get_twentyninecm_category(268100100)

Request was successful!


{'count': 1,
 'next': None,
 'previous': None,
 'results': [{'category': {'category_name': '여성의류',
    'category_code': 268100100,
    'category2': [{'category_name': '상의',
      'category_code': 268103100,
      'category3': [{'category_name': '긴소매 티셔츠',
        'category_code': 268103102,
        'count': 0},
       {'category_name': '스웨트셔츠', 'category_code': 268103104, 'count': 0},
       {'category_name': '블라우스', 'category_code': 268103107, 'count': 0},
       {'category_name': '셔츠', 'category_code': 268103106, 'count': 0},
       {'category_name': '반소매 티셔츠', 'category_code': 268103101, 'count': 0},
       {'category_name': '후디', 'category_code': 268103105, 'count': 0},
       {'category_name': '슬리브리스', 'category_code': 268103103, 'count': 0}],
      'count': 0},
     {'category_name': '아우터',
      'category_code': 268102100,
      'category3': [{'category_name': '재킷',
        'category_code': 268102104,
        'count': 0},
       {'category_name': '점퍼', 'category_code': 268102119

In [56]:
def get_twentyninecm_products_list(categoryLargeCode, categoryMediumCode, categorySmallCode):
    url = 'https://search-api.29cm.co.kr/api/v4/products/category/'
    params = {
        'categoryLargeCode': categoryLargeCode,
        'categoryMediumCode': categoryMediumCode,
        'categorySmallCode': categorySmallCode,
        'count': 2,
        'page': 1,
        'sort': 'latest',
        'init': 'T',
        'excludeSoldOut': False,
    }

    # 카테고리별로 상품 2개만 조회 -> res의 productsTotalCount를 저장 -> 모든 상품 조회
    is_get_count = False
    while True:
        response = requests.get(url, params=params)
        
        try:
            response.raise_for_status()
            print("Request was successful!")
        except requests.exceptions.HTTPError as err:
            print(f"HTTP Error: {err}")
    
        results = response.json()

        if is_get_count:
            return results
            
        params['count'] = results.get('data', {}).get('productsTotalCount', 0)
        is_get_count = True


get_twentyninecm_products_list(268100100, 268103100, 268103102)

Request was successful!
Request was successful!


{'result': 'SUCCESS',
 'data': {'products': [{'itemNo': 2469332,
    'itemName': 'NU NOSTALGIA LONG SLEEVE',
    'frontBrandNo': 16723,
    'frontBrandNameKor': '글로니',
    'frontBrandNameEng': 'GLOWNY',
    'categoryNames': None,
    'consumerPrice': 72000,
    'sellPrice': 64800,
    'imageUrl': '/item/202403/11eedba99778179291a5efbae40926c1.jpg',
    'visibleBeginTimestamp': '2024-03-11T09:40:00.024000+09:00',
    'visibleEndTimestamp': None,
    'useWithoutVisibleEndDate': 'T',
    'availableBeginTimestamp': '2024-03-06T20:12:20.450217+09:00',
    'availableEndTimestamp': None,
    'useWithoutAvailableEndDate': None,
    'discountRate': 10,
    'couponDiscountRate': 5,
    'isCouponAllowedInOrder': True,
    'heartCount': 1437,
    'reviewCount': 0,
    'reviewAveragePoint': 0.0,
    'isSoldOut': False,
    'lastSalePrice': 61560,
    'lastSalePercent': 15,
    'subjectDescriptions': [],
    'isFreeShipping': True,
    'isNew': True,
    'heartOn': False,
    'saleInfoV2': {'consume

- product
    - main_category
    - sub_category
    - gender: 
    - name: itemName
    - price: consumerPrice
    - quantity: 크롤링
    - brand_name: frontBrandNameKor, frontBrandNameEng
    - size: 크롤링
    - color: 크롤링
    - fee: 
    - image: https://img.29cm.co.kr/{imageUrl}
    - code:
    - url: https://product.29cm.co.kr/catalog/{item_no}
    - detail_images: 이미지 url 리스트
    - detail_html: 상품 상세 정보 html

- reviews
    - count: 댓글 개수
    - content: 댓글 내용
    - created_at: 게시일
    - images: 이미지 리스트 (있으면 가져오고 없으면 안 가져오기), 없으면 null
    - point: 평점 (있으면 가져오고 없으면 안 가져오기) int, 없으면 null
    - productOption: 상품 상세 정보 ['color', 'size'] 없으면 null
    - userSize: ['키', '몸무게'] 없으면 null

In [74]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def crawling_twentyninecm_product_info(itemNo):
    product_info = {'size': [], 'color': []}
    product_images = []
    
    url = f'https://product.29cm.co.kr/catalog/{itemNo}'

    # Chrome 옵션 설정
    chrome_options = Options()
    # chrome_options.add_argument('--headless')  # 무인 모드로 실행
    chrome_options.add_experimental_option('detach', True)

    # WebDriver 실행
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)

    # color 및 size 옵션 입력 요소 찾기
    i = 0
    while True:
        try:
            i += 1
            # dropdown 요소를 찾아 클릭
            option_selector = f'div.css-129gw94.e1yaqq956 > div > div:nth-child({i}) > div > input'
            option_element = driver.find_element(By.CSS_SELECTOR, option_selector)
            option_element.click()
            
            # dropdown ul태그의 li 요소 리스트를 찾음
            ul_selector = 'ul.css-1sxz8vl.e15gsm0h4'
            ul_element = driver.find_element(By.CSS_SELECTOR, ul_selector)
            li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
            
            # 첫 번째 요소는 옵션명
            option_name = li_elements[0].text.lower()
            
            # 옵션 목록을 text로 변환한 후 저장
            option_values = list(map(lambda x: x.text, li_elements[1:]))
            product_info[option_name] = option_values

            # dropdown 닫음
            option_element.click()
        # 요소를 찾을 수 없는 경우
        except:
            break
            
    # 페이지 로드 대기 시간
    timeout = 10
    
    # 상품 상세 정보 더보기 버튼 클릭
    detail_button_selector = 'button.efgb0b60.css-h7utre.e12h9sp60'
    detail_button_element = WebDriverWait(driver, timeout).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, detail_button_selector))
    )
    detail_button_element.click()

    # 상품 상세 정보를 포함하는 div 요소를 찾음
    detail_selector = 'div.e1jr1djm0.css-1wvn7e9.e1esfft0'
    detail_element = driver.find_element(By.CSS_SELECTOR, detail_selector)
    
    # 상품 상세 정보 HTML을 가져옴
    detail_html = detail_element.get_attribute("innerHTML")
    
    # BeautifulSoup을 사용하여 HTML을 파싱
    soup = BeautifulSoup(detail_html, 'html.parser')
    
    # img 태그를 찾아서 이미지 URL을 추출하고 product_images에 추가
    img_tags = soup.find_all('img')
    for img_tag in img_tags:
        img_src = img_tag.get('src')
        if img_src:
            product_images.append(img_src)
    
    print(f'product_info: {product_info}')
    print(f'product_images: {product_images}')
    print(f'detail_html: {detail_html}')
    
    # WebDriver 종료
    driver.quit()


crawling_twentyninecm_product_info(2169397)

product_info: {'size': ['11차 다크인디고 0', '11차 다크인디고 1', '11차 다크인디고 1+', '11차 다크인디고 2', '11차 다크인디고 3', '12차 블랙 0', '11차 블랙 1 [품절]', '11차 블랙 1+ [품절]', '11차 블랙 2', '12차 블랙 3'], 'color': []}
product_images: ['https://img.29cm.co.kr/item/202402/11eec6585466a9ecb2384fc61ad7df04.jpeg', 'https://img.29cm.co.kr/item/202402/11eec65859ffec6791eb31035456b8c5.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585b99515e837743ae6171c788.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585c4d5ed991eb9d31914f3fbf.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585d0b08d083778be19ccb8bfa.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585daca050b2385d56cfcbaa87.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585e5ed952b2387b8c2941ae7b.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585f024544b23893cffb04d781.jpeg', 'https://img.29cm.co.kr/item/202402/11eec6585fc60add91eb619251e1d40c.jpeg', 'https://img.29cm.co.kr/item/202402/11eec658606c840f91eb0bc5bede8c40.jpeg', 'https://img.29cm.co.kr/item/202402/11

In [60]:
def get_twentyninecm_reviews_list(itemNo, reviewCount):
    url = 'https://review-api.29cm.co.kr/api/v4/reviews/'
    params = {
        'itemId': itemNo,
        'page': 0,
        'size': reviewCount,
    }
    response = requests.get(url, params=params)

    try:
        response.raise_for_status()
        print("Request was successful!")
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error: {err}")

    results = response.json()
    return results


get_twentyninecm_reviews_list(1896123, 354)

Request was successful!


{'result': 'SUCCESS',
 'data': {'count': 354,
  'giftCount': 0,
  'averagePoint': 4.5,
  'results': [{'itemReviewNo': 8263809,
    'optionValue': ['[SIZE]FREE'],
    'userNo': 6709481,
    'userId': 'anais9***',
    'point': 5,
    'contents': '역시나 예쁨~~\n여리여리 여성스러우면서도 캐쥬얼 느낌\n핑크가 신의 한수네요^^\n잘 입을께요',
    'reviewType': 0,
    'registrationType': 'USER',
    'isGift': 'F',
    'isReported': 'F',
    'giftReview': None,
    'userSize': ['168cm', '52kg'],
    'uploadFiles': [{'filename': '72598E47-46E9-4158-8628-3DF573F4345F.jpeg',
      'contentType': 'image/jpeg',
      'url': '/next-product/2024/03/15/76ddc9b08cd54e14a6fa234497011bd6_20240315140845.jpeg',
      'size': 3922011,
      'isDeleted': 'F',
      'insertTimestamp': '2024-03-15 14:10:04',
      'updatedTimestamp': '2024-03-15 14:10:04'}],
    'partnerComment': None,
    'surveyList': [{'surveyType': 'SIZE', 'optionValue': 2}],
    'insertTimestamp': '2024-03-15 14:10:04',
    'orderNo': None,
    'orderSerial': None,
    'admin