In [None]:
# 삼성

from selenium import webdriver as wb
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

import requests
from time import sleep
from datetime import datetime, timedelta
import re
import csv


# --------------------------------------------------------------------
# 0. 설정
# --------------------------------------------------------------------

BASE_URL = "https://www.teamblind.com"
SEARCH_URL = "https://www.teamblind.com/kr/search/%EC%82%BC%EC%84%B1"

START_DATE = datetime(2025, 1, 14).date()
END_DATE   = datetime(2026, 1, 14).date()

MAX_SCROLL = 300
SCROLL_PAUSE = 1.5

CSV_PATH = "blind_samsung_2025_1year_final.csv"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}


# --------------------------------------------------------------------
# 1. 날짜 파싱
# --------------------------------------------------------------------

def parse_date_kor(text: str):
    text = text.replace("작성시간", "").strip(" .")
    now = datetime.now()

    m = re.search(r"\d+", text)
    num = int(m.group()) if m else None

    if "분" in text and num is not None:
        return now - timedelta(minutes=num)
    if "시간" in text and num is not None:
        return now - timedelta(hours=num)
    if "어제" in text:
        return now - timedelta(days=1)
    if "일" in text and "." not in text and num is not None:
        return now - timedelta(days=num)

    for fmt in ("%y.%m.%d", "%Y.%m.%d"):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
            pass

    if re.fullmatch(r"\d{2}\.\d{2}", text):
        mth, d = map(int, text.split("."))
        y = now.year
        dt = datetime(y, mth, d)
        if dt.date() > now.date():
            dt = datetime(y - 1, mth, d)
        return dt

    return None


# --------------------------------------------------------------------
# 2. Selenium 초기화 (네 방식 그대로)
# --------------------------------------------------------------------

options = Options()
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = wb.Chrome(options=options)
wait = WebDriverWait(driver, 10)

driver.get(SEARCH_URL)
sleep(2)


# --------------------------------------------------------------------
# 3. 필터 설정 (안정적인 네 패턴 그대로)
# --------------------------------------------------------------------

wait.until(EC.element_to_be_clickable(
    (By.XPATH, "//*[contains(text(),'전체') and contains(text(),'(')]"))
).click()
sleep(1)

wait.until(EC.element_to_be_clickable(
    (By.XPATH, "//*[contains(text(),'주식·투자')]"))
).click()
sleep(2)

wait.until(EC.element_to_be_clickable(
    (By.XPATH, "//*[contains(text(),'최신순') or contains(text(),'추천순')]"))
).click()
sleep(1)

wait.until(EC.element_to_be_clickable(
    (By.XPATH, "//*[contains(text(),'최신순')]"))
).click()
sleep(2)


# --------------------------------------------------------------------
# 4. 리스트 수집 (2025-01-14까지)
# --------------------------------------------------------------------

posts = []
seen_urls = set()
oldest_date_seen = None

for i in range(MAX_SCROLL):
    soup = BeautifulSoup(driver.page_source, "html.parser")
    cards = soup.select("div.article-list-pre")

    for card in cards:
        title_tag = card.select_one("div.tit a")
        date_tag  = card.select_one("a.past")

        if not title_tag or not date_tag:
            continue

        url = BASE_URL + title_tag.get("href")
        if url in seen_urls:
            continue
        seen_urls.add(url)

        dt = parse_date_kor(date_tag.get_text(strip=True))
        if not dt:
            continue

        d = dt.date()
        oldest_date_seen = d if oldest_date_seen is None else min(oldest_date_seen, d)

        if not (START_DATE <= d <= END_DATE):
            continue

        title = title_tag.get_text(strip=True)

        # 조회수
        views = "0"
        pv_tag = card.select_one("a.pv")
        if pv_tag:
            m = re.search(r"\d+", pv_tag.get_text())
            views = m.group() if m else "0"

        # 좋아요
        likes = "0"
        like_tag = card.select_one("span.like")
        if like_tag:
            m = re.search(r"\d+", like_tag.get_text())
            likes = m.group() if m else "0"

        # 댓글
        comments = "0"
        cmt_tag = card.select_one("a.cmt")
        if cmt_tag:
            m = re.search(r"\d+", cmt_tag.get_text())
            comments = m.group() if m else "0"

        posts.append({
            "title": title,
            "url": url,
            "date": d.isoformat(),
            "views": views,
            "likes": likes,
            "comments": comments,
            "content": ""
        })

    print(f"[스크롤 {i+1}] 누적 {len(posts)} / 최하단 날짜 {oldest_date_seen}")

    # ✅ 진짜 종료 조건
    if oldest_date_seen and oldest_date_seen <= START_DATE:
        print("✅ 2025-01-14 도달 → 리스트 수집 종료")
        break

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(SCROLL_PAUSE)


# --------------------------------------------------------------------
# 5. 상세 페이지 본문 수집 (정확한 본문)
# --------------------------------------------------------------------

for i, post in enumerate(posts, 1):
    try:
        r = requests.get(post["url"], headers=HEADERS, timeout=5)
        soup = BeautifulSoup(r.text, "html.parser")

        content_tag = soup.select_one("p#contentArea.contents-txt")
        if content_tag:
            text = content_tag.get_text("\n", strip=True)
            post["content"] = re.sub(r"\n{2,}", "\n", text)
        else:
            post["content"] = ""

    except Exception:
        post["content"] = ""

    if i % 50 == 0:
        print(f"[본문] {i}/{len(posts)}")

driver.quit()


# --------------------------------------------------------------------
# 6. CSV 저장
# --------------------------------------------------------------------

with open(CSV_PATH, "w", newline="", encoding="utf-8-sig") as f:
    writer = csv.DictWriter(
        f,
        fieldnames=["title", "date", "views", "likes", "comments", "content", "url"]
    )
    writer.writeheader()
    writer.writerows(posts)

print(f"✅ CSV 저장 완료: {CSV_PATH} / 총 {len(posts)}개")


[스크롤 1] 누적 13 / 최하단 날짜 2026-01-13
[스크롤 2] 누적 33 / 최하단 날짜 2026-01-08
[스크롤 3] 누적 53 / 최하단 날짜 2026-01-04
[스크롤 4] 누적 73 / 최하단 날짜 2025-12-28
[스크롤 5] 누적 93 / 최하단 날짜 2025-12-18
[스크롤 6] 누적 113 / 최하단 날짜 2025-12-04
[스크롤 7] 누적 133 / 최하단 날짜 2025-11-27
[스크롤 8] 누적 153 / 최하단 날짜 2025-11-20
[스크롤 9] 누적 173 / 최하단 날짜 2025-11-11
[스크롤 10] 누적 193 / 최하단 날짜 2025-11-04
[스크롤 11] 누적 213 / 최하단 날짜 2025-10-30
[스크롤 12] 누적 233 / 최하단 날짜 2025-10-28
[스크롤 13] 누적 253 / 최하단 날짜 2025-10-25
[스크롤 14] 누적 273 / 최하단 날짜 2025-10-19
[스크롤 15] 누적 293 / 최하단 날짜 2025-10-13
[스크롤 16] 누적 313 / 최하단 날짜 2025-10-02
[스크롤 17] 누적 333 / 최하단 날짜 2025-09-19
[스크롤 18] 누적 353 / 최하단 날짜 2025-09-10
[스크롤 19] 누적 373 / 최하단 날짜 2025-08-27
[스크롤 20] 누적 392 / 최하단 날짜 2025-08-15
[스크롤 21] 누적 412 / 최하단 날짜 2025-08-05
[스크롤 22] 누적 432 / 최하단 날짜 2025-07-29
[스크롤 23] 누적 452 / 최하단 날짜 2025-07-25
[스크롤 24] 누적 472 / 최하단 날짜 2025-07-10
[스크롤 25] 누적 492 / 최하단 날짜 2025-06-29
[스크롤 26] 누적 512 / 최하단 날짜 2025-06-20
[스크롤 27] 누적 512 / 최하단 날짜 2025-06-20
[스크롤 28] 누적 532 / 최하단 날짜 2025-06-14
[스크롤 2

In [41]:
from selenium import webdriver as wb
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from time import sleep
from datetime import datetime, timedelta
import re, csv, random
from urllib.parse import quote


# ==============================
# 0) 설정
# ==============================
BASE_URL  = "https://www.teamblind.com"
KEYWORD   = "하이닉스"
SEARCH_URL = f"{BASE_URL}/kr/search/{quote(KEYWORD)}"

START_DATE = datetime(2025, 1, 14).date()
END_DATE   = datetime(2026, 1, 14).date()

MAX_SCROLL   = 400
SCROLL_PAUSE = 1.2

CSV_PATH = f"blind_{KEYWORD}_2025_1year_final.csv"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
REQUEST_PAUSE_MIN, REQUEST_PAUSE_MAX = 0.12, 0.3


# ==============================
# 1) 유틸
# ==============================
def parse_date_kor(text: str):
    text = text.replace("작성시간", "").strip(" .")
    now = datetime.now()
    m = re.search(r"\d+", text)
    num = int(m.group()) if m else None

    if "분" in text and num is not None:   return now - timedelta(minutes=num)
    if "시간" in text and num is not None: return now - timedelta(hours=num)
    if "어제" in text:                     return now - timedelta(days=1)
    if "일" in text and "." not in text and num is not None:
        return now - timedelta(days=num)

    for fmt in ("%y.%m.%d", "%Y.%m.%d"):
        try:  return datetime.strptime(text, fmt)
        except ValueError:  pass

    if re.fullmatch(r"\d{2}\.\d{2}", text):
        mth, d = map(int, text.split("."))
        y = now.year
        dt = datetime(y, mth, d)
        if dt.date() > now.date(): dt = datetime(y-1, mth, d)
        return dt
    return None

def build_session():
    s = requests.Session()
    retry = Retry(total=3, connect=3, read=3, backoff_factor=0.4,
                  status_forcelist=[429,500,502,503,504], allowed_methods=["GET"])
    ad = HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16)
    s.mount("https://", ad); s.mount("http://", ad)
    s.headers.update(HEADERS)
    return s

def safe_click_js(driver, el):
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", el)
    sleep(0.2)
    driver.execute_script("arguments[0].click();", el)

def select_topic_stock(wait, driver):
    """
    주식·투자 선택: value('1') → 텍스트 포함('주식·투자') → JS 강제 변경(+change)
    어떤 UI라도 한 단계가 반드시 먹게 만든다.
    """
    sel = wait.until(EC.presence_of_element_located((By.ID, "m_channel")))
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", sel)
    sleep(0.2)

    # 1) by value
    try:
        Select(sel).select_by_value("1")
        driver.execute_script("arguments[0].dispatchEvent(new Event('change',{bubbles:true}));", sel)
        sleep(1)
        return
    except Exception:
        pass

    # 2) by visible text contains
    try:
        for opt in Select(sel).options:
            if "주식·투자" in opt.text:
                safe_click_js(driver, opt)
                driver.execute_script("arguments[0].dispatchEvent(new Event('change',{bubbles:true}));", sel)
                sleep(1)
                return
    except Exception:
        pass

    # 3) JS 강제
    driver.execute_script("""
        const s = arguments[0];
        s.value = '1';
        s.dispatchEvent(new Event('change', {bubbles:true}));
    """, sel)
    sleep(1)

def select_sort_latest(wait, driver):
    """
    최신순 선택: select value('id') → 텍스트('최신순') → JS 강제.
    """
    try:
        sort_sel = wait.until(EC.presence_of_element_located((By.XPATH, "//div[contains(@class,'sort')]//select")))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'});", sort_sel)
        sleep(0.2)
        try:
            Select(sort_sel).select_by_value("id")
        except Exception:
            for opt in Select(sort_sel).options:
                if "최신순" in opt.text:
                    safe_click_js(driver, opt); break
        driver.execute_script("arguments[0].dispatchEvent(new Event('change',{bubbles:true}));", sort_sel)
        sleep(1)
        return
    except Exception:
        pass

    # 드물게 버튼 드롭다운일 때
    btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[contains(text(),'최신순') or contains(text(),'추천순')]")))
    safe_click_js(driver, btn); sleep(0.5)
    latest = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[contains(text(),'최신순')]")))
    safe_click_js(driver, latest); sleep(1)

def wait_cards(wait):
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.article-list-pre")))


# ==============================
# 2) Selenium 시작
# ==============================
options = Options()
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-notifications")

driver = wb.Chrome(options=options)
wait = WebDriverWait(driver, 15)

driver.get(SEARCH_URL)
sleep(2.5)

# 1. 재검색(있으면 사용)
try:
    box = WebDriverWait(driver, 3).until(
        EC.presence_of_element_located((By.XPATH, "//input[contains(@placeholder,'검색') or @type='search']"))
    )
    box.clear(); box.send_keys(KEYWORD); box.send_keys(Keys.ENTER)
    sleep(2)
except Exception:
    pass

# 2. 주식·투자
select_topic_stock(wait, driver)

# 3. 최신순
select_sort_latest(wait, driver)

# 카드 등장 대기
wait_cards(wait)

# ==============================
# 3) 스크롤(2025-01-14까지)
# ==============================
posts, seen, oldest = [], set(), None
prev_count, stuck_rounds = 0, 0

for i in range(MAX_SCROLL):
    soup = BeautifulSoup(driver.page_source, "html.parser")
    cards = soup.select("div.article-list-pre")

    if len(cards) == prev_count:
        stuck_rounds += 1
    else:
        prev_count = len(cards); stuck_rounds = 0

    for c in cards:
        tit = c.select_one("div.tit a")
        dat = c.select_one("a.past")
        if not tit or not dat: continue

        url = BASE_URL + tit.get("href")
        if url in seen: continue
        seen.add(url)

        dt = parse_date_kor(dat.get_text(strip=True))
        if not dt: continue
        d = dt.date()
        oldest = d if oldest is None else min(oldest, d)
        if not (START_DATE <= d <= END_DATE):
            continue

        def pick(sel):
            t = c.select_one(sel)
            if not t: return "0"
            m = re.search(r"\d+", t.get_text())
            return m.group() if m else "0"

        posts.append({
            "title": tit.get_text(strip=True),
            "date": d.isoformat(),
            "views": pick("a.pv"),
            "likes": pick("span.like"),
            "comments": pick("a.cmt"),
            "url": url,
            "content": ""
        })

    print(f"[스크롤 {i+1}] 누적 {len(posts)} / 최하단 {oldest}")

    if oldest and oldest <= START_DATE:
        print("✅ 2025-01-14 도달, 리스트 수집 종료")
        break
    if stuck_rounds >= 5:
        print("⚠️ 새 카드가 더 안 나와 종료")
        break

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(SCROLL_PAUSE)

driver.quit()

# ==============================
# 4) 본문 수집
# ==============================
session = build_session()
for i, p in enumerate(posts, 1):
    try:
        r = session.get(p["url"], timeout=7)
        s = BeautifulSoup(r.text, "html.parser")
        tag = s.select_one("p#contentArea.contents-txt") or \
              s.select_one("div.contents") or \
              s.select_one("div.article-content") or \
              s.select_one("div.view-content")
        p["content"] = tag.get_text("\n", strip=True) if tag else ""
    except Exception:
        p["content"] = ""
    if i % 50 == 0:
        print(f"[본문] {i}/{len(posts)}")
    sleep(random.uniform(REQUEST_PAUSE_MIN, REQUEST_PAUSE_MAX))

# ==============================
# 5) CSV 저장
# ==============================
with open(CSV_PATH, "w", newline="", encoding="utf-8-sig") as f:
    writer = csv.DictWriter(
        f, fieldnames=["title","date","views","likes","comments","content","url"]
    )
    writer.writeheader(); writer.writerows(posts)

print(f"✅ CSV 저장 완료: {CSV_PATH} / 총 {len(posts)}개")

[스크롤 1] 누적 13 / 최하단 2025-10-28
[스크롤 2] 누적 187 / 최하단 2025-07-17
[스크롤 3] 누적 844 / 최하단 2023-05-07
✅ 2025-01-14 도달, 리스트 수집 종료
[본문] 50/844
[본문] 100/844
[본문] 150/844
[본문] 200/844
[본문] 250/844
[본문] 300/844
[본문] 350/844
[본문] 400/844
[본문] 450/844
[본문] 500/844
[본문] 550/844
[본문] 600/844
[본문] 650/844
[본문] 700/844
[본문] 750/844
[본문] 800/844
✅ CSV 저장 완료: blind_하이닉스_2025_1year_final.csv / 총 844개


In [None]:
# 현대차: 검색 → (토픽: 주식·투자 value=2) → (정렬: 최신순 value=id) → 스크롤(2025.01.14까지)
# → 제목/조회/좋아요/댓글/날짜/URL 수집 → 본문(contentArea) 수집 → CSV 저장

from selenium import webdriver as wb
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException

from bs4 import BeautifulSoup

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from time import sleep
from datetime import datetime, timedelta
import re
import csv
import random
from urllib.parse import quote


# ==============================
# 0) 설정
# ==============================
BASE_URL = "https://www.teamblind.com"
KEYWORD  = "현대차"  # ✅ 검색어
SEARCH_URL = f"{BASE_URL}/kr/search/{quote(KEYWORD)}"

START_DATE = datetime(2025, 1, 14).date()
END_DATE   = datetime(2026, 1, 14).date()

MAX_SCROLL = 800
SCROLL_PAUSE = 3.2
WAIT_NEW_TIMEOUT = 18
MAX_STUCK = 10

CSV_PATH = f"blind_{KEYWORD}_2025_1year_final.csv"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": BASE_URL
}


# ==============================
# 1) 날짜 파싱
# ==============================
def parse_date_kor(text: str):
    text = text.replace("작성시간", "").strip()
    text = text.strip(" .\n\t")

    now = datetime.now()
    m = re.search(r"\d+", text)
    num = int(m.group()) if m else None

    if "분" in text and num is not None:
        return now - timedelta(minutes=num)
    if "시간" in text and num is not None:
        return now - timedelta(hours=num)
    if "어제" in text:
        return now - timedelta(days=1)
    if "일" in text and "." not in text and num is not None:
        return now - timedelta(days=num)

    for fmt in ("%y.%m.%d", "%Y.%m.%d"):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
            pass

    if re.fullmatch(r"\d{2}\.\d{2}", text):
        mth, d = map(int, text.split("."))
        y = now.year
        dt = datetime(y, mth, d)
        if dt.date() > now.date():
            dt = datetime(y - 1, mth, d)
        return dt

    return None


def fmt_date_only(d):
    # ✅ "작성시간" 없이 날짜만
    return d.strftime("%Y.%m.%d")


# ==============================
# 2) requests 세션(본문 수집 안정화)
# ==============================
def build_session():
    s = requests.Session()
    retry = Retry(
        total=4, connect=4, read=4,
        backoff_factor=0.7,
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=["GET"],
    )
    ad = HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16)
    s.mount("https://", ad)
    s.mount("http://", ad)
    s.headers.update(HEADERS)
    return s


def extract_content(html: str) -> str:
    soup = BeautifulSoup(html, "html.parser")
    tag = (
        soup.select_one("p#contentArea.contents-txt")
        or soup.select_one("p#contentArea")
        or soup.select_one("div.contents")
        or soup.select_one("div.article-content")
        or soup.select_one("div.view-content")
    )
    if not tag:
        return ""
    text = tag.get_text("\n", strip=True)
    return re.sub(r"\n{2,}", "\n", text).strip()


# ==============================
# 3) Selenium 초기화
# ==============================
options = Options()
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--blink-settings=imagesEnabled=false")  # 안정화

driver = wb.Chrome(options=options)
wait = WebDriverWait(driver, 15)

driver.get(SEARCH_URL)
sleep(3)


# ==============================
# 4) 필터 적용: select로 확정 (주식·투자=2, 최신순=id)
# ==============================
def apply_stock_and_latest(driver):
    w = WebDriverWait(driver, 15)

    # 토픽: select#m_channel (주식·투자 value=2)
    topic_sel = w.until(EC.presence_of_element_located((By.ID, "m_channel")))
    Select(topic_sel).select_by_value("2")
    sleep(1.5)

    # 정렬: div.sort select (최신순 value=id)
    sort_sel = w.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.sort select")))
    Select(sort_sel).select_by_value("id")
    sleep(2.0)

    # 검증 로그
    topic_now = Select(driver.find_element(By.ID, "m_channel")).first_selected_option
    sort_now  = Select(driver.find_element(By.CSS_SELECTOR, "div.sort select")).first_selected_option
    print("✅ 필터 적용 결과")
    print(f" - 토픽: value={topic_now.get_attribute('value')!r}, text={topic_now.text.strip()!r}")
    print(f" - 정렬: value={sort_now.get_attribute('value')!r}, text={sort_now.text.strip()!r}")

    if topic_now.get_attribute("value") != "2":
        raise RuntimeError("토픽 '주식·투자(value=2)' 적용 실패")
    if sort_now.get_attribute("value") != "id":
        raise RuntimeError("정렬 '최신순(value=id)' 적용 실패")


apply_stock_and_latest(driver)


# ==============================
# 5) 스크롤 + 리스트 수집 (new_cards만)
# ==============================
posts = []
seen_urls = set()

prev_card_count = 0
oldest_date_seen = None
stuck = 0
stop_here = False

for i in range(MAX_SCROLL):
    soup = BeautifulSoup(driver.page_source, "html.parser")
    cards = soup.select("div.article-list-pre")
    cur_count = len(cards)
    new_cards = cards[prev_card_count:]

    print(f"[스크롤 {i+1}] 총 카드: {cur_count} / 새 카드: {len(new_cards)} / 누적(범위내): {len(posts)}")

    if not new_cards:
        stuck += 1
        if stuck >= MAX_STUCK:
            print("⚠️ 새 카드가 더 이상 안 늘어남 → 종료")
            break
    else:
        stuck = 0

    for card in new_cards:
        title_tag = card.select_one("div.tit a")
        date_tag  = card.select_one("a.past")

        if not title_tag or not date_tag:
            continue

        href = title_tag.get("href", "")
        url = href if href.startswith("http") else (BASE_URL + href)
        if not url or url in seen_urls:
            continue
        seen_urls.add(url)

        dt = parse_date_kor(date_tag.get_text(strip=True))
        if not dt:
            continue

        d = dt.date()
        oldest_date_seen = d if oldest_date_seen is None else min(oldest_date_seen, d)

        # 최신순 상태 전제: START_DATE보다 과거가 나오면 그 아래는 더 과거 → 종료
        if d < START_DATE:
            stop_here = True
            break

        # 기간 밖은 스킵
        if not (START_DATE <= d <= END_DATE):
            continue

        def pick(selector):
            t = card.select_one(selector)
            if not t:
                return "0"
            m = re.search(r"\d+", t.get_text())
            return m.group() if m else "0"

        posts.append({
            "title": title_tag.get_text(strip=True),
            "date": fmt_date_only(d),
            "views": pick("a.pv"),
            "likes": pick("span.like"),
            "comments": pick("a.cmt"),
            "content": "",
            "url": url
        })

    print(f"  → 최하단 날짜(관측): {oldest_date_seen}")

    if stop_here:
        print("✅ 2025.01.14 이하 도달 → 리스트 수집 종료")
        break

    prev_card_count = cur_count

    # ✅ 안정 스크롤: 마지막 카드로 이동 + 카드 증가 대기
    prev_cnt = len(driver.find_elements(By.CSS_SELECTOR, "div.article-list-pre"))
    els = driver.find_elements(By.CSS_SELECTOR, "div.article-list-pre")
    if els:
        driver.execute_script("arguments[0].scrollIntoView({block:'end'});", els[-1])
    else:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    sleep(SCROLL_PAUSE)

    try:
        WebDriverWait(driver, WAIT_NEW_TIMEOUT).until(
            lambda d: len(d.find_elements(By.CSS_SELECTOR, "div.article-list-pre")) > prev_cnt
        )
    except TimeoutException:
        pass

driver.quit()

print("=" * 60)
print(f"✅ 리스트 수집 완료(범위내): {len(posts)}개")


# ==============================
# 6) 본문 수집 (requests)
# ==============================
session = build_session()

for i, post in enumerate(posts, 1):
    try:
        r = session.get(post["url"], timeout=12)
        post["content"] = extract_content(r.text)
    except Exception:
        post["content"] = ""

    if i % 50 == 0:
        print(f"[본문] {i}/{len(posts)}")

    sleep(random.uniform(0.18, 0.45))


# ==============================
# 7) CSV 저장
# ==============================
with open(CSV_PATH, "w", newline="", encoding="utf-8-sig") as f:
    writer = csv.DictWriter(
        f,
        fieldnames=["title", "date", "views", "likes", "comments", "content", "url"]
    )
    writer.writeheader()
    writer.writerows(posts)

print(f"✅ CSV 저장 완료: {CSV_PATH} / 총 {len(posts)}개")
