In [None]:
# pip install requests beautifulsoup4

Collecting beautifulsoup4
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Downloading soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4

   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   ---------------------------------------- 2/2 [beautifulsoup4]

Successfully installed beautifulsoup4-4.13.4 soupsieve-2.7
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

BASE_URL = "https://www.kw.ac.kr"
NOTICE_URL = f"{BASE_URL}/ko/life/notice.jsp"

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def get_notice_pages():
    """공지사항 리스트 페이지 여러 개 구성"""
    return [f"{NOTICE_URL}?pageNum={i}" for i in range(1, 4)]  # 페이지 수 조절 가능

def extract_post_links(page_url):
    """각 리스트 페이지에서 공지 상세 페이지 링크 추출"""
    print(f"🌐 리스트 페이지 확인 중: {page_url}")
    res = requests.get(page_url, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")
    links = []

    for a in soup.select("ul.board_list li a"):
        href = a.get("href")
        if href and "DUID=" in href:
            full_url = urljoin(BASE_URL, href)
            links.append(full_url)
    return links

def extract_images_from_post(post_url):
    """공지 상세 페이지에서 이미지 추출"""
    res = requests.get(post_url, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")
    
    print(f"🔍 게시글 방문: {post_url}")

    img_tags = soup.select("div.bbs_view img")
    if not img_tags:
        print("⚠️ 이미지 없음")
    return [urljoin(BASE_URL, img["src"]) for img in img_tags if img.get("src")]

def download_image(url, save_dir="downloaded_posters"):
    os.makedirs(save_dir, exist_ok=True)
    filename = url.split("/")[-1].split("?")[0]
    path = os.path.join(save_dir, filename)
    res = requests.get(url, stream=True)
    if res.status_code == 200:
        with open(path, "wb") as f:
            for chunk in res.iter_content(1024):
                f.write(chunk)
        print(f"✅ 다운로드 완료: {filename}")
    else:
        print(f"❌ 다운로드 실패: {filename}")

def main():
    print("📥 공지사항 이미지 수집 시작")
    for page_url in get_notice_pages():
        post_links = extract_post_links(page_url)
        for post_url in post_links:
            img_urls = extract_images_from_post(post_url)
            for img_url in img_urls:
                download_image(img_url)
    print("🎉 완료")

if __name__ == "__main__":
    main()


📥 공지사항 이미지 수집 시작
🎉 완료
