In [5]:
import sys
import csv
import os
import webbrowser
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
from PyQt5.QtWidgets import QApplication, QMainWindow, QListWidget, QLabel, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QWidget, QLineEdit
from PyQt5.QtGui import QFont

# ✅ CSV 파일 경로
CSV_FILE_PATH = r"C:\pthon_basic\webcrolingProject\news_crawler_project\data\naver_news.csv"

# KoBART 모델 경로
MODEL_PATH = "digit82/kobart-summarization"
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_PATH)
model = BartForConditionalGeneration.from_pretrained(MODEL_PATH)

class NewsViewer(QMainWindow):
    def __init__(self):
        super().__init__()

        # 윈도우 설정
        self.setWindowTitle("네이버 경제 뉴스 뷰어")
        self.setGeometry(100, 100, 900, 650)  # 창 크기 조정

        # 메인 레이아웃
        layout = QHBoxLayout()

        # 🔍 왼쪽 검색창 및 뉴스 리스트
        left_layout = QVBoxLayout()

        self.search_input = QLineEdit(self)
        self.search_input.setPlaceholderText("🔍 검색어 입력 후 Enter")
        self.search_input.setFont(QFont("맑은 고딕", 14))
        self.search_input.returnPressed.connect(self.update_news_list)
        left_layout.addWidget(self.search_input)

        # 📌 뉴스 리스트
        self.news_list = QListWidget(self)
        self.news_list.setFont(QFont("맑은 고딕", 14))
        self.news_list.itemClicked.connect(self.show_news)
        left_layout.addWidget(self.news_list)

        # 뉴스 불러오기 버튼
        self.load_button = QPushButton("🔄 뉴스 불러오기", self)
        self.load_button.setFont(QFont("맑은 고딕", 14))
        self.load_button.clicked.connect(self.load_news)
        left_layout.addWidget(self.load_button)

        # 오른쪽 뉴스 제목과 본문 표시 레이아웃
        right_layout = QVBoxLayout()

        # 📰 뉴스 제목
        self.title_label = QLabel("뉴스 제목", self)
        self.title_label.setFont(QFont("맑은 고딕", 16, QFont.Weight.Bold))
        right_layout.addWidget(self.title_label)

        # 📜 뉴스 본문
        self.news_content = QTextEdit(self)
        self.news_content.setFont(QFont("맑은 고딕", 14))
        self.news_content.setReadOnly(True)
        right_layout.addWidget(self.news_content)

        # 요약 버튼 추가
        self.summarize_button = QPushButton("📑 요약하기", self)
        self.summarize_button.setFont(QFont("맑은 고딕", 14))
        self.summarize_button.clicked.connect(self.summarize_news)
        right_layout.addWidget(self.summarize_button)

        # 뉴스 링크 (클릭하면 웹에서 열림)
        self.link_label = QLabel("", self)
        self.link_label.setFont(QFont("맑은 고딕", 14))
        self.link_label.setStyleSheet("color: blue; text-decoration: underline; cursor: pointer;")
        self.link_label.mousePressEvent = self.open_link
        right_layout.addWidget(self.link_label)

        layout.addLayout(left_layout)
        layout.addLayout(right_layout)

        # 중앙 위젯 설정
        central_widget = QWidget()
        central_widget.setLayout(layout)
        self.setCentralWidget(central_widget)

        # 뉴스 데이터 저장할 리스트
        self.news_data = []
        self.load_news()

    def load_news(self):
        """📌 CSV 파일에서 뉴스 데이터 불러오기"""
        self.news_list.clear()
        self.news_data.clear()

        if not os.path.exists(CSV_FILE_PATH):
            self.news_list.addItem("⚠️ 뉴스 파일을 찾을 수 없습니다!")
            return

        try:
            with open(CSV_FILE_PATH, "r", encoding="utf-8") as file:
                reader = csv.reader(file)
                next(reader)  # 첫 번째 줄(헤더) 건너뛰기
                for row in reader:
                    if len(row) < 3:
                        continue
                    title, link, content = row
                    self.news_data.append((title, link, content))
                    self.news_list.addItem(title)  # 뉴스 제목 리스트에 추가
        except Exception as e:
            self.news_list.addItem(f"❌ 파일 읽기 오류: {e}")

    def update_news_list(self):
        """🔍 검색 기능 - 키워드가 포함된 뉴스만 표시"""
        keyword = self.search_input.text().strip().lower()
        self.news_list.clear()
        for title, link, content in self.news_data:
            if keyword in title.lower() or keyword in content.lower():
                self.news_list.addItem(title)

    def show_news(self, item):
        """📜 선택한 뉴스 본문 표시"""
        index = self.news_list.row(item)
        title, link, content = self.news_data[index]

        self.title_label.setText(title)
        self.news_content.setText(content)
        self.link_label.setText(link)

        self.link_label.setStyleSheet("color: blue; text-decoration: underline; cursor: pointer;")
        self.current_link = link

    def open_link(self, event):
        """🌐 웹 브라우저에서 뉴스 링크 열기"""
        if hasattr(self, "current_link"):
            webbrowser.open(self.current_link)

    def summarize_news(self):
        """📑 KoBART 모델을 이용해 뉴스 본문 요약"""
        content = self.news_content.toPlainText()
        inputs = tokenizer.encode("summarize: " + content, return_tensors="pt")

        summary_ids = model.generate(
            inputs,
            max_length=150,
            num_beams=4,
            no_repeat_ngram_size=2,
            early_stopping=True
        )

        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        self.news_content.setText(summary)

# 실행 코드
if __name__ == "__main__":
    app = QApplication(sys.argv)
    viewer = NewsViewer()
    viewer.show()
    sys.exit(app.exec_())


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


: 

In [1]:
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast

MODEL_PATH = "digit82/kobart-summarization"
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_PATH)
model = BartForConditionalGeneration.from_pretrained(MODEL_PATH)
print("✅ 모델 로딩 완료")


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


✅ 모델 로딩 완료


In [2]:
import sys
import csv
import os
import webbrowser
import requests
from bs4 import BeautifulSoup
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QListWidget, QLabel, QTextEdit,
    QPushButton, QVBoxLayout, QHBoxLayout, QWidget, QLineEdit
)
from PyQt5.QtGui import QFont

# ✅ 저장 위치 (GUI와 동일한 위치)
CSV_FILE_PATH = r"C:\pthon_basic\webcrolingProject\news_crawler_project\data\naver_news.csv"

# ✅ 요약 모델 로드
MODEL_PATH = "digit82/kobart-summarization"
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_PATH)
model = BartForConditionalGeneration.from_pretrained(MODEL_PATH)

# ✅ 뉴스 크롤링 함수
def crawl_and_save():
    url = "https://news.naver.com/section/101"
    headers = {"User-Agent": "Mozilla/5.0"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"❌ 요청 오류 발생: {e}")
        return

    soup = BeautifulSoup(response.text, "html.parser")
    news_list = soup.select("div.sa_text a")[:120]

    def get_article_content(url):
        try:
            res = requests.get(url, headers=headers)
            res.raise_for_status()
        except:
            return "❌ 본문을 가져올 수 없습니다."

        soup = BeautifulSoup(res.text, "html.parser")
        selectors = ["div#newsct_article", "div#articleBodyContents", "div#dic_area"]
        for sel in selectors:
            article = soup.select_one(sel)
            if article:
                return article.get_text(strip=True)
        return "❌ 본문을 찾을 수 없습니다."

    news_data = []
    for news in news_list:
        title = news.get_text(strip=True)
        link = news["href"]
        if not link.startswith("http"):
            link = "https://news.naver.com" + link
        if "더보기" in title or len(title) < 5:
            continue
        content = get_article_content(link)
        if "❌ 본문" in content:
            continue
        news_data.append((title, link, content))
        if len(news_data) >= 100:
            break

    with open(CSV_FILE_PATH, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["제목", "링크", "본문"])
        writer.writerows(news_data)
    print(f"✅ 최신 뉴스 50건 저장 완료 → {CSV_FILE_PATH}")

# ✅ GUI 클래스
class NewsViewer(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("네이버 경제 뉴스 뷰어")
        self.setGeometry(100, 100, 900, 650)

        layout = QHBoxLayout()
        left_layout = QVBoxLayout()

        self.search_input = QLineEdit(self)
        self.search_input.setPlaceholderText("🔍 검색어 입력 후 Enter")
        self.search_input.setFont(QFont("맑은 고딕", 14))
        self.search_input.returnPressed.connect(self.update_news_list)
        left_layout.addWidget(self.search_input)

        self.news_list = QListWidget(self)
        self.news_list.setFont(QFont("맑은 고딕", 14))
        self.news_list.itemClicked.connect(self.show_news)
        left_layout.addWidget(self.news_list)

        self.load_button = QPushButton("🔄 뉴스 불러오기", self)
        self.load_button.setFont(QFont("맑은 고딕", 14))
        self.load_button.clicked.connect(self.load_news)
        left_layout.addWidget(self.load_button)

        right_layout = QVBoxLayout()

        self.title_label = QLabel("뉴스 제목", self)
        self.title_label.setFont(QFont("맑은 고딕", 16, QFont.Weight.Bold))
        right_layout.addWidget(self.title_label)

        self.news_content = QTextEdit(self)
        self.news_content.setFont(QFont("맑은 고딕", 14))
        self.news_content.setReadOnly(True)
        right_layout.addWidget(self.news_content)

        self.summarize_button = QPushButton("📑 요약하기", self)
        self.summarize_button.setFont(QFont("맑은 고딕", 14))
        self.summarize_button.clicked.connect(self.summarize_news)
        right_layout.addWidget(self.summarize_button)

        self.link_label = QLabel("", self)
        self.link_label.setFont(QFont("맑은 고딕", 14))
        self.link_label.setStyleSheet("color: blue; text-decoration: underline; cursor: pointer;")
        self.link_label.mousePressEvent = self.open_link
        right_layout.addWidget(self.link_label)

        layout.addLayout(left_layout)
        layout.addLayout(right_layout)

        central_widget = QWidget()
        central_widget.setLayout(layout)
        self.setCentralWidget(central_widget)

        self.news_data = []
        self.load_news()

    def load_news(self):
        # ✅ 뉴스 불러오기 전 최신 뉴스 수집
        crawl_and_save()

        self.news_list.clear()
        self.news_data.clear()

        if not os.path.exists(CSV_FILE_PATH):
            self.news_list.addItem("⚠️ 뉴스 파일을 찾을 수 없습니다!")
            return

        try:
            with open(CSV_FILE_PATH, "r", encoding="utf-8") as file:
                reader = csv.reader(file)
                next(reader)
                for row in reader:
                    if len(row) < 3:
                        continue
                    title, link, content = row
                    self.news_data.append((title, link, content))
                    self.news_list.addItem(title)
        except Exception as e:
            self.news_list.addItem(f"❌ 파일 읽기 오류: {e}")

    def update_news_list(self):
        keyword = self.search_input.text().strip().lower()
        self.news_list.clear()
        for title, link, content in self.news_data:
            if keyword in title.lower() or keyword in content.lower():
                self.news_list.addItem(title)

    def show_news(self, item):
        index = self.news_list.row(item)
        title, link, content = self.news_data[index]
        self.title_label.setText(title)
        self.news_content.setText(content)
        self.link_label.setText(link)
        self.current_link = link

    def open_link(self, event):
        if hasattr(self, "current_link"):
            webbrowser.open(self.current_link)

    def summarize_news(self):
        content = self.news_content.toPlainText().strip()
        if not content:
            return
        inputs = tokenizer.encode("summarize: " + content, return_tensors="pt")
        summary_ids = model.generate(
            inputs,
            max_length=150,
            num_beams=4,
            no_repeat_ngram_size=2,
            early_stopping=True
        )
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        self.news_content.setText(summary)

# ✅ 실행
if __name__ == "__main__":
    app = QApplication(sys.argv)
    viewer = NewsViewer()
    viewer.show()
    sys.exit(app.exec_())


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


✅ 최신 뉴스 50건 저장 완료 → C:\pthon_basic\webcrolingProject\news_crawler_project\data\naver_news.csv


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
