In [3]:
import os
from dotenv import load_dotenv

# .env 파일 로드
load_dotenv()

# 환경 변수 읽기
DB_URL = os.getenv("DB_URL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

import openai
openai.api_key = OPENAI_API_KEY

In [4]:
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, func
from sqlalchemy.orm import sessionmaker, declarative_base

Base = declarative_base()
engine = create_engine(DB_URL, echo=False, future=True)
Session = sessionmaker(bind=engine)

class RssArticleEntity(Base):
    __tablename__ = "rss_articles"
    article_id = Column(Integer, primary_key=True)
    link = Column(Text, unique=True)
    content = Column(Text)
    title = Column(String(500))
    published_at = Column(DateTime)

class ArticleSummaryEntity(Base):
    __tablename__ = "article_summaries"
    summary_id = Column(Integer, primary_key=True)
    article_id = Column(Integer, unique=True)
    summary_text = Column(Text)
    status = Column(String(20), nullable=False)
    try_count = Column(Integer, nullable=False, default=0)
    last_error = Column(Text)
    last_success_at = Column(DateTime)
    model_name = Column(String(100))
    created_at = Column(DateTime, default=func.now())
    updated_at = Column(DateTime, default=func.now(), onupdate=func.now())

In [None]:
import requests
from bs4 import BeautifulSoup

def crawl_article_content(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    resp = requests.get(url, headers=headers, timeout=10)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")
    paragraphs = soup.select("p")
    content = "\n".join(p.get_text(strip=True) for p in paragraphs)
    if not content.strip():
        raise ValueError("본문 추출 실패")
    return content

def summarize_text(text):
    if not text.strip():
        raise ValueError("본문 내용이 비어있음")
    
    response = openai.ChatCompletion.create(
        model="gpt-4.1-mini",
        messages=[{"role":"user","content":f"다음 기사를 요약해줘:\n{text}"}],
        temperature=0.3,
        max_tokens=500
    )
    return response.choices[0].message.content.strip()

In [None]:
session = Session()

# 테스트할 기사 1건 선택
article = session.query(RssArticleEntity).filter(RssArticleEntity.content != None).first()

try:
    print(f"[INFO] 크롤링 시작: {article.link}")
    content = crawl_article_content(article.link)

    # 기존 content 덮어쓰기
    article.content = content
    session.commit()
    
    print(f"[INFO] AI 요약 시작: {article.article_id}")
    summary_text = summarize_text(content)
    
    # Summary DB 업데이트
    summary = session.query(ArticleSummaryEntity).filter_by(article_id=article.article_id).first()
    if not summary:
        summary = ArticleSummaryEntity(
            article_id=article.article_id,
            try_count=0
        )
        session.add(summary)
    
    summary.summary_text = summary_text
    summary.status = "SUCCESS"
    summary.model_name = "gpt-4.1-mini"
    summary.try_count += 1
    summary.last_error = None
    summary.last_success_at = datetime.now()
    
    session.commit()
    print(f"[SUCCESS] 요약 완료: {article.article_id}")

except Exception as e:
    session.rollback()
    print(f"[ERROR] 처리 실패: {e}")
    summary = session.query(ArticleSummaryEntity).filter_by(article_id=article.article_id).first()
    if not summary:
        summary = ArticleSummaryEntity(
            article_id=article.article_id,
            try_count=0
        )
        session.add(summary)
    summary.status = "FAILED"
    summary.try_count += 1
    summary.last_error = str(e)
    session.commit()

finally:
    session.close()