Skip to content

Commit

Permalink
support render of non-loaded websites
Browse files Browse the repository at this point in the history
Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
  • Loading branch information
kennethreitz committed Feb 28, 2018
1 parent 18da03e commit 2504efb
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions requests_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import asyncio
from urllib.parse import urlparse, urlunparse
from concurrent.futures._base import TimeoutError
from typing import Set, Union, List, MutableMapping
from typing import Set, Union, List, MutableMapping, Optional

import pyppeteer
import requests
Expand Down Expand Up @@ -325,7 +325,7 @@ def __init__(self, *, url: str = DEFAULT_URL, html: _HTML, default_encoding: str
def __repr__(self) -> str:
return "<HTML url={}>".format(repr(self.url))

def render(self, retries: int = 8, script: str = None, scrolldown=False, sleep: int = 0):
def render(self, retries: int = 8, script: str = None, scrolldown=False, sleep: int = 0, reload: bool = True):
"""Reloads the response in Chromium, and replaces HTML content
with an updated version, with JavaScript executed.
Expand Down Expand Up @@ -361,13 +361,16 @@ def render(self, retries: int = 8, script: str = None, scrolldown=False, sleep:
Warning: the first time you run this method, it will download
Chromium into your home directory (``~/.pyppeteer``).
"""
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int):
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, reload: bool = True, content: Optional[str]):
try:
browser = pyppeteer.launch(headless=True)
page = await browser.newPage()

# Load the given page (GET request, obviously.)
await page.goto(url)
if reload:
await page.goto(url)
else:
await page.setContent(content)

result = None
if script:
Expand Down Expand Up @@ -395,7 +398,7 @@ async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int)
for i in range(retries):
if not content:
try:
content, result = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, scrolldown=scrolldown))
content, result = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, content=self.html, reload=reload, scrolldown=scrolldown))
except TimeoutError:
pass

Expand Down

0 comments on commit 2504efb

Please sign in to comment.