In [14]:
from time import sleep
from fake_useragent import UserAgent
import re
import requests

def get_user_agent():
    """ Return user-agent
    Returns
    -------
    str
        user-agent
    """
    ua = UserAgent()
    agent = ua.chrome
    return str(agent)


def query_to_regex(query):
    """ query to regular expression

    Parameters
    ----------
    query: str or list of str
        query

    Returns
    -------
    Pattern object
        regular expression
    """
    if isinstance(query, str):
        regex = re.compile(query, re.IGNORECASE)
    elif isinstance(query, list):
        pattern = '(' + '|'.join(query) + ')'
        regex = re.compile(pattern, re.IGNORECASE)
    else:
        raise TypeError('Invalid query type')
    return regex


class Request(object):
    """HTTP 요청을 보내는 클래스

    HTTP 요청을 위해 사용되는 클래스입니다.
    User-Agent 및 Cookies 관련 정보를 저장하고 있습니다.

    Attributes
    ---------
    s: Session
        Requests Session
    delay: float
        Delay for repeat delay, Default: 1s

    """
    def __init__(self):
        self.s = requests.Session()
        self.update_user_agent()
        self.delay = 1

    def update_user_agent(self, force: bool = False):
        """ Update User-Agent

        Parameters
        ----------
        force: bool
            Force update
        """
        if force:
            ua = UserAgent()
            agent = ua.chrome
            user_agent = str(agent)
        else:
            user_agent = get_user_agent()
        self.s.headers.update({'user-agent': user_agent})

    def set_proxies(self, proxies: dict = None):
        """ Set proxies

        Parameters
        ----------
        proxies: dict
            proxies
        """
        if proxies is not None:
            import copy
            self.s.proxies = copy.deepcopy(proxies)

    def set_delay(self, second: float = None):
        """ Set delay

        Parameters
        ----------
        second: float
            delay for repeat
        """
        self.delay = second

    def request(self,
                url: str,
                method: str = 'GET',
                payload: dict = None,
                referer: str = None,
                stream: bool = False,
                timeout: int = 120):
        """ send http requests

        Parameters
        ----------
        url: str
            URL
        method: str, optional
            GET, OPTIONS, POST, PUT, PATCH or DELETE
        payload: dict, optional
            Request parameters
        referer: str, optional
            Temporary referer
        stream: bool, optional
            Stream optional, default False
        timeout: int, optional
            default 120s

        Returns
        -------
        requests.Response
            Response
        """
        headers = self.s.headers
        if referer is not None:
            headers['referer'] = referer

        # Session-level state such as cookies will not get applied to your request.
        # To get a PreparedRequest with that state applied,
        # replace the call to Request.prepare() with a call to Session.prepare_request()
        req = requests.Request(method, url=url, params=payload, headers=headers)
        prepped = self.s.prepare_request(req)
        resp = self.s.send(prepped, stream=stream, timeout=timeout)
        if self.delay is not None:
            sleep(self.delay)
        return resp

    def get(self, url: str,
            payload: dict = None,
            referer: str = None,
            stream: bool = False,
            timeout: int = 120):
        """ Request get method

        Parameters
        ----------
        url: str
            URL
        payload: dict, optional
            Request parameters
        referer: str, optional
            Temporary referer
        stream: bool, optional
            Stream optional, default False
        timeout: int, optional
            default 120s

        Returns
        -------
        requests.Response
            Response
        """
        return self.request(url=url, method='GET', payload=payload, referer=referer, stream=stream, timeout=timeout)

    def post(self, url: str,
             payload: dict = None,
             referer: str = None,
             stream: bool = False,
             timeout: int = 120):
        """ Request post method

        Parameters
        ----------
        url: str
            URL
        payload: dict, optional
            Request parameters
        referer: str, optional
            Temporary referer
        stream: bool, optional
            Stream optional, default False
        timeout: int, optional
            default 120s

        Returns
        -------
        requests.Response
            Response
        """
        return self.request(url=url, method='POST', payload=payload, referer=referer, stream=stream, timeout=timeout)

""" 공시보고서 검색

 Parameters
 ----------
 corp_code: str, optional
     공시대상회사의 고유번호(8자리), 고유번호(corp_code)가 없는 경우 검색기간은 3개월로 제한
 bgn_de: str, optional
     검색시작 접수일자(YYYYMMDD), 없으면 종료일(end_de)
 end_de: str, optional
     검색종료 접수일자(YYYYMMDD), 없으면 당일
 last_reprt_at: str, optional
     최종보고서만 검색여부(Y or N), default : N
 pblntf_ty: str, optional
     공시유형
 pblntf_detail_ty: str, optional
     공시상세유형
 sort: str, optional
     정렬, {접수일자: date, 회사명: crp, 고서명: rpt}
 sort_mth: str, optional
     오름차순(asc), 내림차순(desc), default : desc
 page_no: int, optional
     페이지 번호(1~n) default : 1
 page_count: int, optional
     페이지당 건수(1~100) 기본값 : 10, default : 100

 Returns
 -------
 dict
     Response data
 """

import requests

url = 'https://opendart.fss.or.kr/api/list.json'

api_key = '61358a6905345520489a50621766a180b5f8cbc3'

corp_code = '00401731'

last_reprt_at = 'Y'

payload = {
    'crtfc_key': api_key,
    'corp_code': corp_code,
    'bgn_de': '20190101',
    'end_de': '20200717',
    'last_reprt_at': last_reprt_at,
    'pblntf_detail_ty': 'A001',
    'sort': 'date',
    'sort_mth': 'desc',
    'page_no': 1,
    'page_count': 100
}
request = Request()
resp = request.get(url=url, payload=payload)
dataset = resp.json()
print(dataset)

{'status': '000', 'message': '정상', 'page_no': 1, 'page_count': 100, 'total_count': 2, 'total_page': 1, 'list': [{'corp_code': '00401731', 'corp_name': 'LG전자', 'stock_code': '066570', 'corp_cls': 'Y', 'report_nm': '사업보고서 (2019.12)', 'rcept_no': '20200330004430', 'flr_nm': 'LG전자', 'rcept_dt': '20200330', 'rm': '연'}, {'corp_code': '00401731', 'corp_name': 'LG전자', 'stock_code': '066570', 'corp_cls': 'Y', 'report_nm': '사업보고서 (2018.12)', 'rcept_no': '20190401004344', 'flr_nm': 'LG전자', 'rcept_dt': '20190401', 'rm': '연'}]}


In [8]:
reports=[]

In [12]:
reports=list(dataset)

In [34]:
url='http://dart.fss.or.kr/dsaf001/main.do?rcpNo=20200330004430'

In [35]:
payload = {
    'crtfc_key': api_key,
}

In [36]:
request = Request()
resp = request.get(url=url,payload=payload)

In [37]:
resp

<Response [200]>

In [38]:
from bs4 import BeautifulSoup

data=BeautifulSoup(resp.content, 'html.parser')

In [39]:
data


<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">

<html>
<head>
<title>
LG전자/사업보고서/2020.03.30
</title>
<meta content="IE=EmulateIE8" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<style type="text/css">
    .x-window-dlg .ext-mb-download {
        background:transparent url(images/download.gif) no-repeat top left;
        height:46px;
    }
</style>
<link charset="utf-8" href="/css/viewer.css" rel="stylesheet" type="text/css"/>
<script src="/js/prototype.js" type="text/javascript"></script>
<script src="/js/jquery/jquery-all.js" type="text/javascript"></script>
<!-- 2014.03.11 ext 3.4 -->
<!--[if lte IE 8]><link rel="stylesheet" type="text/css" href="/js/ext-viewer/resources/css/ext-all-ie8.css" ><![endif]-->
<!--[if (gte IE 9)|!(IE)]><!--><link href="/js/ext-viewer/resources/css/ext-all.css" rel="stylesheet" type="text/css"/><!--<![endif]-->
<script src="/js/ext-viewer/adapter/

In [40]:
data.find_all('option')[3]

<option value="rcpNo=20200330004430&amp;dcmNo=7206120">
									
									2020.03.30 
									
										
										
									
									감사보고서
								</option>