In [6]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd


url = "https://www.fmkorea.com/search.php?mid=stock&listStyle=list&search_keyword=%EC%82%BC%EC%84%B1&search_target=title_content&page=1"

all_data = []

try:
    res = requests.get(url)
    soup = bs(res.text, "lxml")
    rows = soup.find_all("tr")

    for r in rows:
        cols = r.find_all("td")
        if not cols:
            continue
        row_data = [c.get_text(strip=True) for c in cols]
        all_data.append(row_data)

except Exception as e:
    print(f"오류 발생: {e}")

# 3. 데이터프레임으로 변환 (컬럼 개수가 다를 수 있으므로 기본 출력)
if all_data:
    # 최대 컬럼 수에 맞춰 데이터프레임 생성
    df = pd.DataFrame(all_data)
    print(f"--- 전체 수집 결과 (총 {len(df)}행) ---")
    display(df)
else:
    print("HTML 응답은 성공했으나 표(tr) 데이터를 찾지 못했습니다.")
    print("응답 본문 앞부분 일부:", res.text[:500])

HTML 응답은 성공했으나 표(tr) 데이터를 찾지 못했습니다.
응답 본문 앞부분 일부: <script>
var kyC = "";
var bju = ["elKPw==Myr", "QTSaw==zan", "vjHdw==Hwh", "sEecA==eTY", "bVEbw==GnT", "ptTQQ==hqR", "HqSDQ==UiH", "HjhPw==Azq", "hHGaw==dVJ", "bWQaA==DfJ", "tGLZA==pYK", "IUkZw==JxJ", "ANjQQ==fSx", "jRIDQ==Bqv", "dzFPw==QjP", "PrJcA==tow", "EalaA==vmg", "DOYdw==Rhc", "NVOZA==hCI", "EUlIw==CIM", "EEhaw==Gmx", "JxOdw==jWE", "ZQSdw==UPc", "WVQcw==wTV", "JbwMA==cou", "ZMpaA==fjs", "MkidA==Auk", "zPKeA==TNj", "yknbA==ORH", "ksOeQ==Abx", "KjMQA==QPr", "fdDJQ==smY", "hnhRg==DNi", "Zlz


In [None]:
print("test")

In [3]:
import time
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

BASE_URL = "https://www.fmkorea.com/search.php"
BASE_PARAMS = {
    "mid": "stock",
    "category": "2997203870",
    "search_keyword": "삼성전자",
    "search_target": "title_content",
    "listStyle": "list",
}

SLEEP_SEC = 2
MAX_RETRY = 3

def parse_one_page(html: str):
    soup = bs(html, "html.parser")
    table = soup.select_one("table.bd_lst.bd_tb_lst.bd_tb")
    tbody = table.select_one("tbody") if table else None
    rows = tbody.select("tr") if tbody else []

    out = []
    for tr in rows:
        td_cate = tr.select_one("td.cate a")
        td_title_a = tr.select_one("td.title a.hx")
        td_author = tr.select_one("td.author a")
        td_time = tr.select_one("td.time")
        tds_mno = tr.select("td.m_no")

        if not (td_cate and td_title_a and td_author and td_time and len(tds_mno) >= 2):
            continue

        views = tds_mno[0].get_text(strip=True)
        votes = tds_mno[1].get_text(strip=True)

        out.append({
            "탭": td_cate.get_text(strip=True),
            "제목": td_title_a.get_text(" ", strip=True),
            "글쓴이": td_author.get_text(strip=True),
            "날짜": td_time.get_text(strip=True),
            "조회": int(views.replace(",", "")) if views else None,
            "추천": int(votes.replace(",", "")) if votes else None,
        })
    return out

all_rows = []

with requests.Session() as s:
    for page in range(1, 11):  # 테스트용
        params = dict(BASE_PARAMS)
        params["page"] = page

        ok = False
        for attempt in range(1, MAX_RETRY + 1):
            try:
                r = s.get(BASE_URL, params=params, timeout=200)
                r.raise_for_status()  # 4xx/5xx면 예외 발생 [web:142]

                all_rows.extend(parse_one_page(r.text))
                ok = True
                break

            except requests.RequestException as e:
                # 실패하면 더 길게 쉬었다가 재시도(점점 증가) = backoff [web:133]
                wait = 60 * attempt   # 60초, 120초, 180초...
                print(f"[FAIL] page={page} attempt={attempt}/{MAX_RETRY} err={e}")
                print(f"-> {wait}초 쉬고 재시도")
                time.sleep(wait)

        if ok:
            print(f"[OK] page={page} total_rows={len(all_rows)}")
        else:
            print(f"[SKIP] page={page} (재시도 {MAX_RETRY}회 실패)")

        time.sleep(SLEEP_SEC)

df = pd.DataFrame(all_rows)
df.to_csv("fmkorea_search_page1_500.csv", index=False, encoding="utf-8")
print("saved:", len(df))


[OK] page=1 total_rows=20
[OK] page=2 total_rows=40
[OK] page=3 total_rows=60
[OK] page=4 total_rows=80
[OK] page=5 total_rows=100
[OK] page=6 total_rows=120
[OK] page=7 total_rows=140
[OK] page=8 total_rows=160
[OK] page=9 total_rows=180
[OK] page=10 total_rows=200
saved: 200


In [None]:
import time
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

url_base = "https://www.fmkorea.com/search.php?mid=stock&category=2997203870&search_keyword=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90&search_target=title_content&listStyle=list&page={}"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
}

data = {
    "탭": [],
    "제목": [],
    "글쓴이": [],
    "날짜": [],
    "조회": [],
    "추천": [],
}

for page in range(1, 501):
    url = url_base.format(page)
    r = requests.get(url, headers=headers, timeout=2000)
    r.raise_for_status()  # 요청 실패면 에러 내고 멈춤 [web:53]

    soup = bs(r.text, "lxml")

    # 글 목록 행(tr)들
    rows = soup.select("table.bd_lst.bd_tb_lst.bd_tb tbody tr")  # select 사용 [web:39]

    for tr in rows:
        cate_a = tr.select_one("td.cate a")
        title_a = tr.select_one("td.title a.hx")
        author_a = tr.select_one("td.author a")
        time_td = tr.select_one("td.time")
        mno_tds = tr.select("td.m_no")

        # 공지/광고처럼 구조가 다르면 스킵
        if not (cate_a and title_a and author_a and time_td and len(mno_tds) >= 2):
            continue

        views = mno_tds[0].get_text(strip=True)
        votes = mno_tds[1].get_text(strip=True)

        data["탭"].append(cate_a.get_text(strip=True))
        data["제목"].append(title_a.get_text(" ", strip=True))
        data["글쓴이"].append(author_a.get_text(strip=True))
        data["날짜"].append(time_td.get_text(strip=True))
        data["조회"].append(int(views.replace(",", "")) if views else None)
        data["추천"].append(int(votes.replace(",", "")) if votes else None)

    print(f"{page}페이지 완료 / 누적 {len(data['제목'])}개")
    time.sleep(1.5)

df = pd.DataFrame(data)
df = df.drop_duplicates()
df.to_csv("fmkorea_page_1_500.csv", index=False, encoding="utf-8")
df.head()
