In [None]:
from dataclasses import dataclass
from typing import Any, Optional, TypedDict
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

ROOT = "https://www.riss.kr/"
PATH = "search/Search.do"
param_dict = {
  "isDetailSearch": 'N',
  "searchGubun": True,
  "viewYn": 'OP',
  "strQuery": '엣지 디바이스',
  "order": '/DESC',
  "onHanja": False,
  "strSort": 'RANK',
  "iStartCount": 0,
  "fsearchMethod": 'search',
  "sflag": 1,
  "isFDetailSearch":'N',
  "pageNumber": 1,
  "icate": 're_a_kor',
  "colName": 're_a_kor',
  "pageScale": 10,
  "isTab": 'Y',
  "query": '엣지 디바이스',
}


HeaderType = TypedDict("HeaderType", {"User-Agent": str, "Referer": str})


@dataclass(frozen=True)
class PageResponseReturnType:
  response: requests.Response
  soup: BeautifulSoup


def get_page_response_with_soup(url: str, *, query_params: Optional[dict[Any, Any]] = None, header: Optional[HeaderType] = None) -> PageResponseReturnType:
  response = requests.get(url, params=query_params, headers=header)
  soup = BeautifulSoup(response.text, "html.parser")


  return PageResponseReturnType(response=response, soup=soup)


paper_list_response = get_page_response_with_soup(urljoin(ROOT, PATH), query_params=param_dict)
previous_link, paper_list_soup = paper_list_response.response.url, paper_list_response.soup

paper_list_selector = ".srchResultListW > ul >  li"
paper_list_element = paper_list_soup.select(paper_list_selector)
len(paper_list_element)


10

In [None]:
for paper_element in paper_list_element[:1]:
  title = paper_element.select_one(".title > a").get_text(strip=True)
  link = urljoin(ROOT, paper_element.select_one(".title > a").get("href"))
  print(title, link, sep="\n")
  print("======== visit link =========")
  paper_detail_response = get_page_response_with_soup(link, header={
    "User-Agent": "Mozilla/5.0",
    "Referer": previous_link
  })
  paper = paper_detail_response.soup
  press = paper.find("span", string="발행기관").find_next_sibling()
  print(press.text)
  

IoT엣지디바이스를 이용한 경량화 클라우드 컴퓨팅환경 구축 및 검증
https://www.riss.kr/search/detail/DetailView.do?p_mat_type=1a0202e37d52c72d&control_no=71d192c1127301b047de9c1710b0298d&keyword=엣지 디바이스
한국산업기술융합학회(구. 산업기술교육훈련학회)


In [5]:
s1 = "https://www.riss.kr/search/Search.do?isDetailSearch=N&searchGubun=True&viewYn=OP&strQuery=%EC%97%A3%EC%A7%80+%EB%94%94%EB%B0%94%EC%9D%B4%EC%8A%A4&order=%2FDESC&onHanja=False&strSort=RANK&iStartCount=0&fsearchMethod=search&sflag=1&isFDetailSearch=N&pageNumber=1&icate=re_a_kor&colName=re_a_kor&pageScale=10&isTab=Y&query=%EC%97%A3%EC%A7%80+%EB%94%94%EB%B0%94%EC%9D%B4%EC%8A%A4"
s2 = "https://www.riss.kr/search/Search.do?isDetailSearch=N&searchGubun=true&viewYn=OP&queryText=&strQuery=%EC%97%A3%EC%A7%80+%EB%94%94%EB%B0%94%EC%9D%B4%EC%8A%A4&exQuery=&exQueryText=&order=%2FDESC&onHanja=false&strSort=RANK&p_year1=&p_year2=&iStartCount=0&orderBy=&mat_type=&mat_subtype=&fulltext_kind=&t_gubun=&learning_type=&ccl_code=&inside_outside=&fric_yn=&db_type=&image_yn=&gubun=&kdc=&ttsUseYn=&l_sub_code=&fsearchMethod=search&sflag=1&isFDetailSearch=N&pageNumber=1&resultKeyword=&fsearchSort=&fsearchOrder=&limiterList=&limiterListText=&facetList=&facetListText=&fsearchDB=&icate=re_a_kor&colName=re_a_kor&pageScale=10&isTab=Y&regnm=&dorg_storage=&language=&language_code=&clickKeyword=&relationKeyword=&query=%EC%97%A3%EC%A7%80+%EB%94%94%EB%B0%94%EC%9D%B4%EC%8A%A4"

print(s1 == s2)

False


True

# dataclass vs TypedDict

- TypedDict: dictionary형태의 문법과 추론을 지원하는 타입 흰트 문법
  - 함수형 문법: HeaderType = TypedDict("HeaderType", {"User-Agent": str, "Referer": str})
    - 이중 dictionary의 타입을 클래스형은 추론 못 하지만 함수형 문법은 추론 가능함.
  - 클래스형 문법 :
    class HeaderType(TypedDict):
      User-Agent: str
      Referer: str

- dataclass
  - 함수형 문법: @dataclass를 이용
  - TypedDict과 다르게 HeaderType(response = ... , soup = ...) 처럼 명시적으로 타입을 지정하여 사용할 수 있음.
  