In [23]:
from requests import request
from requests.compat import urljoin, urlparse
from requests.exceptions import HTTPError
from urllib.robotparser import RobotFileParser
from bs4 import BeautifulSoup
from time import sleep
import re
import json

def canfetch(url, agent='*', path='/'):
    robot = RobotFileParser(urljoin(url, '/robots.txt'))
    robot.read()
    return robot.can_fetch(agent, urlparse(url)[2])
    
def download(url, params={}, headers={}, method='GET', limit=3):
    method = method.upper()
    if canfetch(url) == False:
        print('[Error] ' + url)
#     else: # 실제 수집할 때, 제약사항이 많으므로 여기선 잠시 해제
    try:
        resp = request(method, url,
               params=params if method=='GET' else {},
               data=params if method=='POST' else {},
               headers=headers)
        resp.raise_for_status()
    except HTTPError as e:
        if limit > 0 and e.response.status_code >= 500:
            print(limit)
            time.sleep(1) # => random
            resp = download(url, params, headers, method, limit-1)
        else:
            print('[{}] '.format(e.response.status_code) + url)
            print(e.response.status_code)
            print(e.response.reason)
            print(e.response.headers)
    return resp

In [2]:
url = 'http://example.webscraping.com/places/default/search'
headers = {
    'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}

resp = download(url)
dom = BeautifulSoup(resp.text, 'html.parser')

In [3]:
dom.find('div', {'id':'results'})

<div id="results">
</div>

In [4]:
# dom.find('form')
ajax = '/places/ajax/search.json'
params = {
    'search_term' : 'korea',
    'page_size' : 10,
    'page' : 0
}

resp = download(urljoin(url, ajax), params=params)

result = resp.json()

In [5]:
result['records'][1]

{'pretty_link': '<div><a href="/places/default/view/South-Korea-211"><img src="/places/static/images/flags/kr.png" /> South Korea</a></div>',
 'country': 'South Korea',
 'id': 6734659}

In [6]:
for _ in result['records']:
    print(_['id'])
    print(_['country'])
    print(_['pretty_link'], end = '\n')
    print(urljoin(url, re.search(r'href="([^"]+)"',_['pretty_link']).group(1)))
    print(urljoin(url, re.search(r'src="([^"]+)"',_['pretty_link']).group(1)))
    print(re.search(r'>\s*([^<]+)<',_['pretty_link']).group(1))
    
    print('-'*10)
    dom = BeautifulSoup(_['pretty_link'], 'html.parser')
    print(urljoin(url, dom.a['href']))
    print(urljoin(url, dom.img['src']))
    print(dom.a.text.strip())
    print()

6734613
North Korea
<div><a href="/places/default/view/North-Korea-165"><img src="/places/static/images/flags/kp.png" /> North Korea</a></div>
http://example.webscraping.com/places/default/view/North-Korea-165
http://example.webscraping.com/places/static/images/flags/kp.png
North Korea
----------
http://example.webscraping.com/places/default/view/North-Korea-165
http://example.webscraping.com/places/static/images/flags/kp.png
North Korea

6734659
South Korea
<div><a href="/places/default/view/South-Korea-211"><img src="/places/static/images/flags/kr.png" /> South Korea</a></div>
http://example.webscraping.com/places/default/view/South-Korea-211
http://example.webscraping.com/places/static/images/flags/kr.png
South Korea
----------
http://example.webscraping.com/places/default/view/South-Korea-211
http://example.webscraping.com/places/static/images/flags/kr.png
South Korea



In [7]:
url = 'http://pythonscraping.com/pages/cookies/login.html'
resp = download(url)
dom = BeautifulSoup(resp.text, 'html.parser')
dom.form

<form action="welcome.php" method="post">
Username (use anything!): <input name="username" type="text"/><br/>
Password (try "password"): <input name="password" type="password"/><br/>
<input type="submit" value="Login"/>
</form>

In [8]:
urljoin(url, dom.form['action'])
# params = username=이름&password=비밀번호

'http://pythonscraping.com/pages/cookies/welcome.php'

In [9]:
[{_['name']:'anything' if _['name']=='username' else 'password'} for _ in dom.select('input[name]')]

[{'username': 'anything'}, {'password': 'password'}]

In [10]:
params = {}
for _ in dom.select('input[name]'):
    params[_['name']] = 'anything' if _['name'] == 'username' else 'password'

resp = download(urljoin(url, dom.form['action']), params=params, method='POST')

In [11]:
resp.text

'\n<h2>Welcome to the Website!</h2>\nWhoops! You logged in wrong. Try again with any username, and the password "password"<br><a href="login.html">Log in here</a>'

In [12]:
resp.request.body

'username=anything&password=password'

In [13]:
resp.text

'\n<h2>Welcome to the Website!</h2>\nWhoops! You logged in wrong. Try again with any username, and the password "password"<br><a href="login.html">Log in here</a>'

In [14]:
from requests import Session

session = Session()
# request(쿠키x) == session(쿠키)
resp = session.request('POST', url = urljoin(url, dom.form['action']), data=params)
resp.text # 쿠기 저장

'\n<h2>Welcome to the Website!</h2>\nWhoops! You logged in wrong. Try again with any username, and the password "password"<br><a href="login.html">Log in here</a>'

In [15]:
resp.cookies.get_dict()

{'loggedin': '1', 'username': 'anything'}

In [16]:
resp = session.request('GET', urljoin(url, dom.form['action']), data=params)
resp.text

'\n<h2>Welcome to the Website!</h2>\nYou have logged in successfully! <br><a href="profile.php">Check out your profile!</a>'

In [17]:
resp = session.request('GET', urljoin(url, dom.form['action']), cookies = session.cookies)
resp.text

'\n<h2>Welcome to the Website!</h2>\nYou have logged in successfully! <br><a href="profile.php">Check out your profile!</a>'

In [18]:
cookiedict = {}
for _ in session.cookies.items():
    cookiedict[_[0]] = _[1]
cookiedict

{'loggedin': '1', 'username': 'anything'}

In [19]:
resp = session.request('GET', urljoin(url, dom.form['action']), cookies = cookiedict)
resp.text

'\n<h2>Welcome to the Website!</h2>\nYou have logged in successfully! <br><a href="profile.php">Check out your profile!</a>'

#### Naver Log in

In [20]:
resp = download('http://nid.naver.com/nidlogin.login?mode=form', headers=headers)
dom = BeautifulSoup(resp.text, 'html.parser')

[Error] http://nid.naver.com/nidlogin.login?mode=form


In [21]:
params = dict()
for _ in dom.form.select('input[name]'):
    params[_['name']] = _['value'] if _.has_attr('values') else ''
params['id'] = 'test'
params['pw'] = 'test'

In [22]:
# params, dom.form['action'], dom.form['method']
resp = session.request(dom.form['method'], dom.form['action'], data=params)

In [23]:
coodiedict = {
    'gnbFav' : '%7B%22gnbFav%22%3A%5B%5D%7D',
    'NID_SAUTO' : '-1891493465',
    'coach_tooltip' : 'ok',
    'nid_enctp' : '1',
    'nid_buk' : '43O5YWJ7SNCV2',
    'NID_SES' : 'AAABeAgr0Glx2yCbCp+qjOuI+ro1F0g9h8GN/OVScdu3XzFA/o8Sosg8tQm7RsgV3xLkUpmoP9c3eB2Z2BECpqHvQ69V0Pm1uv/DceHH0Iki/99mWxwh12PTo/zWu8IES6aidySdoo2IJgtddR1S8aodlBDbZgrlQZhXWov3gzb/JVAeu9B5L+Ra6AXsKfyp3yKV9b4pzsleg3TLZ4Eo4YXOv0pb1rEWcZzzHRXjE68Tz47n6za9gy8RKLjLOXQ7nKCOZa00j6d0/i8wdIAun3EkUK0prRQGkaEczzrfDH/2ItilduqDcBeEnitmfdve+6dfJ4nFJb8hQsuvAnRtJQC8ewnayL0c6oBdNCccLGL5yU0IKRK8FdxrgfUDT0t5sChiW9MWekCRf+JRz5JIhOmlX91/t8iUWCMnKxjYTCbiXmW6OethS5E6LP+2SX7Aajnsj0+XfapIGTzpmA/ZjkD1WRm7t6jwU22F6KF9dCsFp2gZbE1eIrKUVIETtD8HLY1IKg==',
    'NMUSER' : 'YXKmFAEwaqbsaAUqFxKdbXk5W40N1HtlFxgXaqu9axEqaAtZaxusFxuZKAnmaqns3xndFAKwFoUsHoKma9vsxonOaxRVadUstoRVaqRVaqns1rejL9Us6xRVaqnD16lvpB2RKZl5WLl5MBp0bSloWrdnaAvmKARqp6FTW43CbNvR16lvpB2RKZl5WLl5MBp0bSloWrdnaAvmKAn=',
    'NID_AUT' : 'Uu3mELeSQvuRqLKCvunNeRM/eLeoakeJZgc0tHYAtWpos96czTnjcTs+Daguji1r',
    '_ga_7VKFYR6RV1' : 'GS1.1.1595347015.33.1.1595348957.60',
    'NID_JKL' : 'ETkTt2IWJyid9+q8wvnujZTI2M9fOFyIkTtSB1LgmFc=',
    '_ga' : 'GA1.1.2123753588.1591027971',
    'nid_slevel' : '-1',
    '_gid' : 'GA1.2.283987012.1595347016',
    'NRTK' : 'ag#all_gr#4_ma#2_si#2_en#2_sp#2',
    'nid_inf' : '-600244553',
    'nx_ssl' : '2',
    'ASID' : 'b6dd6ef1000001721be570c10000005f',
    'NNB' : 'VGJQ2CTCLK7V4',
}

url = 'https://mail.naver.com/'
# session.cookies.clear()
for k,v in cookiedict.items():
    session.cookies.set(k, v)
    
resp = session.request('GET', url)

resp.text

'<!DOCTYPE html>\n<html lang="ko">\n<head>\n\t<meta charset="UTF-8">\n\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\n\t<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no">\n\t<meta property="og:type" content="website">\n\t<meta property="og:title" content="[네이버: 로그인]">\n\t<meta property="og:description" content="안전한 로그인을 위해 주소창의 URL과 자물쇠 마크를 확인하세요!">\n\t<meta property="og:image" content="https://ssl.pstatic.net/sstatic/search/common/og_v3.png">\n\t<meta property="og:image:type" content="image/png">\n\t<meta property="og:image:width" content="1200">\n\t<meta property="og:image:height" content="1200">\t\t\n\t<title>네이버 : 로그인</title>\n\t<link rel="stylesheet" type="text/css" href="https://nid.naver.com/login/css/global/desktop/w_20200707.css">\n</head>\n<body class="">\n<div class="theme_txt" id="theme_txt_message">\n\t<p><strong>안전한 네이버 로그인을 위해 주소창의 URL과 자물쇠 마크를 확인하세요!</strong></p>\n</div>\n<div id="wrap"

In [24]:
# dom.find(text='전체메일').find_parent()
# dom.select_one('#list_for_view')

In [25]:
mailapi = 'http://mail.naver.com/json/list/'
params = {
    'page' : '1',
    'sortField' : '1',
    'sortType' : '0',
    'folderSN' : '0',
    'type' : '',
    'isUnread' : 'false',
    'u' : 'id'
}
resp = session.request('POST', mailapi, params=params)

In [26]:
# for _ in resp.json()['mailData']:
#     print(_['from'])
#     print(_['subject'])
#     print()

#### 빅데이터 청년인재 게시물 가져오기

In [27]:
url = 'https://lms.sunde41.net/auth/login'
resp = download(url)
dom = BeautifulSoup(resp.text, 'html.parser')

In [28]:
dom.form['action'], dom.form['method']
params = {}
for _ in dom.form.select('input[name]'):
    params[_['name']] = _['value'] if _.has_attr('value') else ''

In [29]:
session.cookies.clear()
session.post(urljoin(url, dom.form['action']), data=params)

<Response [200]>

In [32]:
%%writefile lms.json
{
    "id" : "id",
    "pw" : "pw"
}

Overwriting lms.json


In [31]:
with open('lms.json') as f:
    account = json.load(f)

In [32]:
params['email'] = account['id']
params['password'] = account['pw']

In [33]:
resp = session.post(urljoin(url, dom.form['action']), data=params)

In [34]:
url = 'https://lms.sunde41.net/course/2'
resp = session.get(url)
dom = BeautifulSoup(resp.text, 'html.parser')

In [35]:
# dom.find(text=re.compile('오류')).find_parent().find_parent().find_parent().find_parent().find_parent()
notice = dom.select_one('#notice-table tr:nth-of-type(2) td:nth-of-type(2)')
re.search(r'href="([^"]+)"', notice.decode()).group(1)

'https://us02web.zoom.us/j/82558845438?pwd=UkkzK0NPMFZkaEtTM1l2dCtLeDJidz09'

In [36]:
re.findall(r'\s+(\d+)<', notice.decode())

['82558845438', '053625']

In [37]:
notice.find_previous_sibling().span.text.strip()

re.sub(r'(T.+)', '', notice.find_previous_sibling().span.text.strip())

'2020-07-22'

In [38]:
url = 'https://lms.sunde41.net/auth/login'
resp = download(url)
dom = BeautifulSoup(resp.text, 'html.parser')

dom.form['action'], dom.form['method']
params = {}
for _ in dom.form.select('input[name]'):
    params[_['name']] = _['value'] if _.has_attr('value') else ''
    
with open('lms.json') as f:
    account = json.load(f)
    
params['email'] = account['id']
params['password'] = account['pw']

resp = session.post(urljoin(url, dom.form['action']), data=params)

url = 'https://lms.sunde41.net/course/2'
resp = session.get(url)
dom = BeautifulSoup(resp.text, 'html.parser')

notice = dom.select_one('#notice-table tr:nth-of-type(2) td:nth-of-type(2)')

date = re.sub(r'(T.+)', '', notice.find_previous_sibling().span.text.strip())
addr = re.search(r'href="([^"]+)"', notice.decode()).group(1)
user = re.findall(r'\s+(\d+)<', notice.decode())
print('날짜 : {}'.format(date))
print('주소 : {}'.format(addr))
print('아이디 : {0} / 비밀번호 : {1}'.format(user[0], user[1]))

날짜 : 2020-07-22
주소 : https://us02web.zoom.us/j/82558845438?pwd=UkkzK0NPMFZkaEtTM1l2dCtLeDJidz09
아이디 : 82558845438 / 비밀번호 : 053625


### https://pubmed.ncbi.nlm.nih.gov/

In [39]:
url = 'https://pubmed.ncbi.nlm.nih.gov/'
resp = download(url)
dom = BeautifulSoup(resp.text, 'html.parser')

In [40]:
params = dict()

# for _ in dom.form.select('input[name]'):
#     params[_['names']] = _['value'] if _.has_attr['value'] else ''

In [41]:
params['term'] = 'COVID-19'

In [42]:
resp = download(urljoin(url, dom.form['action']),
               params=params, method=dom.form['method'].upper())
dom = BeautifulSoup(resp.text, 'html.parser')

In [43]:
len(dom.select('a.docsum-title'))

10

In [44]:
dom.find(text=re.compile(r'neuroinvasive')).find_parent()

<a class="docsum-title" data-article-id="32104915" data-full-article-url="from_term=COVID-19&amp;from_pos=1" data-ga-action="1" data-ga-category="result_click" data-ga-label="32104915" href="/32104915/" ref="linksrc=docsum_link&amp;article_id=32104915&amp;ordinalpos=1&amp;page=1">
                The neuroinvasive potential of <b>SARS</b>-CoV2 may play a role in the <b>respiratory</b> failure of <b>COVID</b>-<b>19</b> patients.
              </a>

In [45]:
for _ in dom.select('a.docsum-title'):
    print(urljoin(url, _['href']).strip())
    print(_.text.strip())
    print()

https://pubmed.ncbi.nlm.nih.gov/32104915/
The neuroinvasive potential of SARS-CoV2 may play a role in the respiratory failure of COVID-19 patients.

https://pubmed.ncbi.nlm.nih.gov/32347054/
[Strategies for vaccine development of COVID-19].

https://pubmed.ncbi.nlm.nih.gov/32141569/
COVID-19 (Novel Coronavirus 2019) - recent trends.

https://pubmed.ncbi.nlm.nih.gov/32232980/
COVID-19 epidemic: Disease characteristics in children.

https://pubmed.ncbi.nlm.nih.gov/32096567/
Understanding of COVID-19 based on current evidence.

https://pubmed.ncbi.nlm.nih.gov/32134116/
Unique epidemiological and clinical features of the emerging 2019 novel coronavirus pneumonia (COVID-19) implicate special control measures.

https://pubmed.ncbi.nlm.nih.gov/32134278/
Perspectives on monoclonal antibody therapy as potential therapeutic intervention for Coronavirus disease-19 (COVID-19).

https://pubmed.ncbi.nlm.nih.gov/32380453/
What dentists need to know about COVID-19.

https://pubmed.ncbi.nlm.nih.gov/322

In [46]:
url = 'https://pubmed.ncbi.nlm.nih.gov/more/'
params = {
    'term' : '',
    'no_cache' : 'yes',
    'page' : 1,
    'no-cache' : 1595396887846,
    'csrfmiddlewaretoken' : 'rtDB8RItQm6xTR7Oe4QSlpVFlqUbNgb00mRKAvpPJ7tF8nAljd3Ot2sMlmfzlpvt'
}
params['term'] : 'COVID-19'
params['page'] = 2

headers['referer'] = 'https://pubmed.ncbi.nlm.nih.gov/?term='+params['term']
headers['cookie'] = 'pm-csrf=pkvGnIKLv4da9kRagpEvsTJvsfR9nyd2YdJPPmr7oPAioQkHlyRrAwgCsbcxVHxv; pm-sessionid=ifcvy24k5xn44zs9s13eeppmmszibspu; ncbi_sid=6AE6AD10F17A2DA3_23305SID; _ga=GA1.2.6179736.1595395910; _gid=GA1.2.1259136346.1595395910; pm-sid=KcnbvYWncywpgyh8xBWJqA:fc3c2327825122295ca0f27af5d5c8e6; pm-adjnav-sid=yCOSb6cqapQKyTqcLiTUSg:fc3c2327825122295ca0f27af5d5c8e6; ncbi_pinger=N4IgDgTgpgbg+mAFgSwCYgFwgIICYBsA7AAykCM2ZAohQBwAiArKaQMymECc2AQp8a3q0AdGWEBbOLhABfIA; pm-iosp=; _gat_ncbiSg=1; _gat_dap=1'

resp = download(url, params, headers, 'POST')

[Error] https://pubmed.ncbi.nlm.nih.gov/more/


In [47]:
dom = BeautifulSoup(resp.text, 'html.parser')

In [48]:
dom.find(text=re.compile(r'neuroinvasive'))

In [49]:
session.cookies.clear()
resp = session.get('https://pubmed.ncbi.nlm.nih.gov/',
            params = {'term' : 'COVID-19'})

In [50]:
session.cookies.get_dict()

{'ncbi_sid': 'DF2B038CF17E50F3_24140SID',
 'pm-adjnav-sid': '2YxJuEJidomMEfjSsXXYFA:fc3c2327825122295ca0f27af5d5c8e6',
 'pm-sid': '7wRMQtUFdEjU2rPUePTgEg:fc3c2327825122295ca0f27af5d5c8e6',
 'pm-csrf': 'jfBlBqp9u8ar99Ef3BOyRByjaNK4biyatFwr6gktMC8HDkJtkreiX5k7AZqPt1md',
 'pm-sessionid': 'j4vlvrwbzaz7shj7vlmzls4zt9pjzrj6'}

In [51]:
resp.request.url

'https://pubmed.ncbi.nlm.nih.gov/?term=COVID-19'

In [52]:
header_cookie = ';'.join([k+'='+v for k, v in session.cookies.get_dict().items()])
header_referer = resp.request.url

In [53]:
cookie = 'pm-csrf=pkvGnIKLv4da9kRagpEvsTJvsfR9nyd2YdJPPmr7oPAioQkHlyRrAwgCsbcxVHxv; pm-sessionid=ifcvy24k5xn44zs9s13eeppmmszibspu; ncbi_sid=6AE6AD10F17A2DA3_23305SID; _ga=GA1.2.6179736.1595395910; _gid=GA1.2.1259136346.1595395910; pm-sid=KcnbvYWncywpgyh8xBWJqA:fc3c2327825122295ca0f27af5d5c8e6; pm-adjnav-sid=yCOSb6cqapQKyTqcLiTUSg:fc3c2327825122295ca0f27af5d5c8e6; ncbi_pinger=N4IgDgTgpgbg+mAFgSwCYgFwgIICYBsA7AAykCM2ZAohQBwAiArKaQMymECc2AQp8a3q0AdGWEBbOLhABfIA; pm-iosp=; _gat_ncbiSg=1; _gat_dap=1'

In [54]:
params = {
    'term' : 'COVID-19',
    'page' : 1,
    'no-cache' : '1595396887846',
    'csrfmiddlewaretoken' : 'rtDB8RItQm6xTR7Oe4QSlpVFlqUbNgb00mRKAvpPJ7tF8nAljd3Ot2sMlmfzlpvt'
}

resp = session.post('https://pubmed.ncbi.nlm.nih.gov/more/',
                   data = params,
                   headers={'cookie' : cookie,
                            'referer' : header_referer})

In [55]:
resp # Success

<Response [200]>

In [56]:
resp.request.headers

{'User-Agent': 'python-requests/2.22.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive', 'cookie': 'pm-csrf=pkvGnIKLv4da9kRagpEvsTJvsfR9nyd2YdJPPmr7oPAioQkHlyRrAwgCsbcxVHxv; pm-sessionid=ifcvy24k5xn44zs9s13eeppmmszibspu; ncbi_sid=6AE6AD10F17A2DA3_23305SID; _ga=GA1.2.6179736.1595395910; _gid=GA1.2.1259136346.1595395910; pm-sid=KcnbvYWncywpgyh8xBWJqA:fc3c2327825122295ca0f27af5d5c8e6; pm-adjnav-sid=yCOSb6cqapQKyTqcLiTUSg:fc3c2327825122295ca0f27af5d5c8e6; ncbi_pinger=N4IgDgTgpgbg+mAFgSwCYgFwgIICYBsA7AAykCM2ZAohQBwAiArKaQMymECc2AQp8a3q0AdGWEBbOLhABfIA; pm-iosp=; _gat_ncbiSg=1; _gat_dap=1', 'referer': 'https://pubmed.ncbi.nlm.nih.gov/?term=COVID-19', 'Content-Length': '128', 'Content-Type': 'application/x-www-form-urlencoded'}

In [57]:
header_cookie

'ncbi_sid=DF2B038CF17E50F3_24140SID;pm-adjnav-sid=2YxJuEJidomMEfjSsXXYFA:fc3c2327825122295ca0f27af5d5c8e6;pm-sid=7wRMQtUFdEjU2rPUePTgEg:fc3c2327825122295ca0f27af5d5c8e6;pm-csrf=jfBlBqp9u8ar99Ef3BOyRByjaNK4biyatFwr6gktMC8HDkJtkreiX5k7AZqPt1md;pm-sessionid=j4vlvrwbzaz7shj7vlmzls4zt9pjzrj6'

In [58]:
cookie

'pm-csrf=pkvGnIKLv4da9kRagpEvsTJvsfR9nyd2YdJPPmr7oPAioQkHlyRrAwgCsbcxVHxv; pm-sessionid=ifcvy24k5xn44zs9s13eeppmmszibspu; ncbi_sid=6AE6AD10F17A2DA3_23305SID; _ga=GA1.2.6179736.1595395910; _gid=GA1.2.1259136346.1595395910; pm-sid=KcnbvYWncywpgyh8xBWJqA:fc3c2327825122295ca0f27af5d5c8e6; pm-adjnav-sid=yCOSb6cqapQKyTqcLiTUSg:fc3c2327825122295ca0f27af5d5c8e6; ncbi_pinger=N4IgDgTgpgbg+mAFgSwCYgFwgIICYBsA7AAykCM2ZAohQBwAiArKaQMymECc2AQp8a3q0AdGWEBbOLhABfIA; pm-iosp=; _gat_ncbiSg=1; _gat_dap=1'

In [59]:
session.cookies.clear()
resp = session.get('https://pubmed.ncbi.nlm.nih.gov/',
            params = {'term' : 'COVID-19'})
referer = resp.request.url

# Header -> Referer, Token 둘 다 검사

params = {
    'term' : 'COVID-19',
    'page' : 1,
    'no-cache' : '1595396887846',
    'csrfmiddlewaretoken' : session.cookies.get('pm-csrf')
}

for page in range(1,10):
    params['page'] = page
    resp = session.post('https://pubmed.ncbi.nlm.nih.gov/more/',
                       data = params,
                       headers={'referer' : header_referer})
    dom = BeautifulSoup(resp.text, 'html.parser')
    
    for _ in dom.select('a.docsum-title'):
        print(urljoin(url, _['href']).strip())
        print(_.text.strip())
        print()

https://pubmed.ncbi.nlm.nih.gov/32104915/
The neuroinvasive potential of SARS-CoV2 may play a role in the respiratory failure of COVID-19 patients.

https://pubmed.ncbi.nlm.nih.gov/32347054/
[Strategies for vaccine development of COVID-19].

https://pubmed.ncbi.nlm.nih.gov/32141569/
COVID-19 (Novel Coronavirus 2019) - recent trends.

https://pubmed.ncbi.nlm.nih.gov/32232980/
COVID-19 epidemic: Disease characteristics in children.

https://pubmed.ncbi.nlm.nih.gov/32096567/
Understanding of COVID-19 based on current evidence.

https://pubmed.ncbi.nlm.nih.gov/32134116/
Unique epidemiological and clinical features of the emerging 2019 novel coronavirus pneumonia (COVID-19) implicate special control measures.

https://pubmed.ncbi.nlm.nih.gov/32134278/
Perspectives on monoclonal antibody therapy as potential therapeutic intervention for Coronavirus disease-19 (COVID-19).

https://pubmed.ncbi.nlm.nih.gov/32380453/
What dentists need to know about COVID-19.

https://pubmed.ncbi.nlm.nih.gov/322

https://pubmed.ncbi.nlm.nih.gov/32267109/
[Coronavirus and COVID-19 : focus on a galopping pandemic].

https://pubmed.ncbi.nlm.nih.gov/32104907/
A systematic review of lopinavir therapy for SARS coronavirus and MERS coronavirus-A possible reference for coronavirus disease-19 treatment option.

https://pubmed.ncbi.nlm.nih.gov/32179150/
Teicoplanin: an alternative drug for the treatment of COVID-19?

https://pubmed.ncbi.nlm.nih.gov/32344226/
COVID-19: Disease, management, treatment, and social impact.

https://pubmed.ncbi.nlm.nih.gov/32340347/
Airborne Transmission Route of COVID-19: Why 2 Meters/6 Feet of Inter-Personal Distance Could Not Be Enough.

https://pubmed.ncbi.nlm.nih.gov/32245396/
Updating the diagnostic criteria of COVID-19 "suspected case" and "confirmed case" is necessary.

https://pubmed.ncbi.nlm.nih.gov/32198986/
Review and Prospect of Pathological Features of Corona Virus Disease.

https://pubmed.ncbi.nlm.nih.gov/32171952/
The SARS-CoV-2 outbreak: What we know.

https:/

### Selenium

In [2]:
from selenium import webdriver

In [13]:
def init():
    chrome_driver = 'chromedriver' # 드라이버 위치 확인
    driver = webdriver.Chrome(chrome_driver) # 드리이버 만들기
    return driver

In [8]:
init().get('http://www.naver.com')

In [14]:
driver = init()
driver.get('http://www.naver.com')
driver.find_element_by_class_name('link_login')

<selenium.webdriver.remote.webelement.WebElement (session="47d1af24691388896a1d093f1b1df85e", element="c0697cf0-9f12-4bd4-b2a3-42fd1c85f9b3")>

In [15]:
driver = init()
driver.get('http://www.naver.com')
driver.find_element_by_class_name('link_login').text

'네이버\n로그인'

In [18]:
# with open('account.json') as f:
#     accout = json.load(f)

In [16]:
'''
/ -> root, . -> 현재위치
/ -> 자식
// -> 자손
[@class='속성']
'''

"\n/ -> root, . -> 현재위치\n/ -> 자식\n// -> 자손\n[@class='속성']\n"

In [31]:
driver = init()
driver.get('http://www.naver.com')
driver.find_element_by_class_name('link_login').click()
driver.find_element_by_xpath('//input[@id="id"]').clear()
driver.find_element_by_xpath('//input[@id="id"]').send_keys('id')
driver.find_element_by_id('pw').clear()
driver.find_element_by_id('pw').send_keys('pw')
# driver.find_element_by_css_selector('input[type=submit][id]').get_attribute('id')
driver.find_element_by_css_selector('input[type=submit][id]').click()

# # 모두 동일한 결과
# # driver.find_element_by_xpath('//input[@id="id"]').get_attribute('placeholder')
# # driver.find_element_by_xpath('//span/input').get_attribute('placeholder')
# # driver.find_element_by_id('id').get_attribute('placeholder')
# driver.find_element_by_css_selector('#id').get_attribute('placeholder')

# dom = BeautifulSoup(driver.page_source, 'html.parser')