In [None]:
import pandas as pd
import random
import time
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs

def get_youtube_comments(video_url, max_comments=100, max_scrolls=10):
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--lang=ko-KR')
    options.add_argument('--no-sandbox')

    driver = webdriver.Chrome(options=options)
    driver.set_page_load_timeout(20)

    try:
        driver.get(video_url)
        time.sleep(3)

        scrolls = 0
        last_height = driver.execute_script("return document.documentElement.scrollHeight")
        for _ in range(max_scrolls):
            driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.documentElement.scrollHeight")
            if new_height == last_height or len(driver.find_elements(By.ID, 'content-text')) >= max_comments:
                break
            last_height = new_height

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        comment_elems = soup.select('#content-text')
        comments = [c.text.strip() for c in comment_elems[:max_comments]]

    except Exception as e:
        print(f"❌ 댓글 로딩 중 예외 발생: {video_url}\n→ {e}")
        comments = []

    finally:
        driver.quit()

    return comments

def extract_video_id(url):
    parsed = urlparse(url)
    return parse_qs(parsed.query).get('v', [url.split("v=")[-1]])[0]

if __name__ == '__main__':
    input_file = 'youtube_metadata_250510_김문수_이재명_대선.csv'
    df = pd.read_csv(input_file)

    # ✅ 날짜 및 키워드 추출
    base_filename = os.path.basename(input_file).replace('youtube_metadata_', '').replace('.csv', '')
    # → base_filename = '250510_김문수_이재명_대선'

    all_data = []

    for i, row in df.iterrows():
        video_url = row['url']
        video_id = extract_video_id(video_url)
        print(f"\n🎬 {i+1}/{len(df)} | {row['title']} | 채널: {row['channel']} | ID: {video_id}")

        try:
            comments = get_youtube_comments(video_url, max_comments=100)
            print(f"🗨️ {len(comments)}개 댓글 수집 완료")

            if comments:
                print("📌 댓글 미리보기:")
                for preview in comments[:3]:
                    print(f"   - {preview}")
            else:
                print("⚠️ 댓글 없음 또는 비활성화 영상")

            for c in comments:
                all_data.append({
                    'videoId': video_id,
                    'title': row['title'],
                    'channel': row['channel'],
                    'comment': c
                })

        except Exception as e:
            print(f"❌ 오류 발생: {video_id} → {e}")

        time.sleep(4 + random.uniform(0, 2))

    df_comments = pd.DataFrame(all_data)

    # ✅ 미리보기
    print("\n📝 최종 댓글 통합 미리보기:")
    print(df_comments.head(3))

    # ✅ 자동 파일명 설정
    output_file = f'youtube_comments_{base_filename}.csv'
    df_comments.to_csv(output_file, index=False, encoding='utf-8-sig')
    print(f"\n✅ 총 댓글 수집 완료: {len(df_comments)}건 → {output_file}")
