In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

# CSV 파일에서 URL 리스트 가져오기
title_df = pd.read_csv("kakao_listly.csv")
id_list = title_df['url'].astype(str).tolist()
print(len(id_list))
print(id_list[:5])  # 잘 불러오는지 확인

# WebDriver 설정
options = Options()
options.add_argument('headless')
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
driver = webdriver.Chrome(options=options)

# 데이터 저장 리스트
data = []

# URL 순회하며 정보 수집
for url in id_list[:5]:  # 테스트용으로 5개만 진행
    # 오버뷰 페이지 접속
    overview_url = f"{url}?tab_type=overview"
    driver.get(overview_url)
    time.sleep(2)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # 제목
    title_span = soup.find("span", class_="font-large3-bold mb-3pxr text-ellipsis break-all text-el-70 line-clamp-2")
    title_text = title_span.get_text(strip=True) if title_span else "제목 없음"

    # 작가
    writer_span = soup.find("span", class_="font-small2 mb-6pxr text-ellipsis text-el-70 opacity-70 break-word-anywhere line-clamp-2")
    writer_text = writer_span.get_text(strip=True) if writer_span else "작가 없음"

    # 장르 (웹툰 제외)
    genre_spans = soup.find_all("span", class_="break-all align-middle")
    genres = [g.get_text(strip=True) for g in genre_spans]
    genre_text = genres[1] if len(genres) > 1 else "장르 없음"

    # 조회수 & 별점 (같은 클래스에서 0: 조회수, 1: 별점)
    view_rating_spans = soup.find_all("span", class_="text-el-70 opacity-70")
    view_count_text = view_rating_spans[0].get_text(strip=True) if len(view_rating_spans) > 0 else "조회수 없음"
    rating_text = view_rating_spans[1].get_text(strip=True) if len(view_rating_spans) > 1 else "별점 없음"

    # 회차수 & 댓글수 (같은 클래스에서 0: 회차수, 1: 댓글수)
    episode_comment_spans = soup.find_all("span", class_="text-ellipsis break-all line-clamp-1 font-small2-bold text-el-70")
    episode_count_text = episode_comment_spans[0].get_text(strip=True) if len(episode_comment_spans) > 0 else "회차수 없음"
    comment_count_text = episode_comment_spans[1].get_text(strip=True) if len(episode_comment_spans) > 1 else "댓글수 없음"

    # 어바웃 페이지 접속
    about_url = f"{url}?tab_type=about"
    driver.get(about_url)
    time.sleep(2)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # 줄거리
    story_span = soup.find("span", class_="font-small1 mb-8pxr block whitespace-pre-wrap break-words text-el-70")
    story_text = story_span.get_text(strip=True) if story_span else "줄거리 없음"

    # 해시태그
    hashtags = []
    hashtag_spans = soup.find_all("span", class_="font-small2-bold text-ellipsis text-el-70 line-clamp-1")
    for tag in hashtag_spans:
        hashtags.append(tag.get_text(strip=True))
    hashtags_text = ', '.join(hashtags) if hashtags else "해시태그 없음"

    # 연령 등급
    age_rating_text = "연령 등급 없음"
    info_divs = soup.find_all("div", class_="font-small1 flex w-full pt-6pxr")
    for div in info_divs:
        label_span = div.find("span")
        if label_span and "연령등급" in label_span.get_text():
            value_span = div.find_all("span")[-1]
            age_rating_text = value_span.get_text(strip=True)
            break

    # 데이터 저장
    data.append({
        'Title': title_text,
        'Writer': writer_text,
        'Genre': genre_text,
        'View Count': view_count_text,
        'Rating': rating_text,
        'Episode Count': episode_count_text,
        'Comment Count': comment_count_text,
        'Story': story_text,
        'Hashtags': hashtags_text,
        'Age Rating': age_rating_text
    })

    print(f"현재 작품: {title_text}")
    time.sleep(2)

# 드라이버 종료
driver.quit()

# 데이터프레임 생성 및 CSV 저장
df = pd.DataFrame(data)[[
    'Title', 'Writer', 'Genre', 'View Count', 'Rating',
    'Episode Count', 'Comment Count', 'Story', 'Hashtags', 'Age Rating'
]]
df.to_csv('kakao_crawling_all.csv', index=False, encoding='utf-8-sig')


2677
['https://page.kakao.com/content/50866481', 'https://page.kakao.com/content/56976992', 'https://page.kakao.com/content/62477346', 'https://page.kakao.com/content/56271898', 'https://page.kakao.com/content/55566760']
현재 작품: 나 혼자만 레벨업
현재 작품: 접근 불가 레이디
현재 작품: 제목 없음
현재 작품: 무협지 악녀인데 내가 제일 쎄!
현재 작품: 그녀와 야수
