Skip to content

Commit

Permalink
fix output path and generated from url filename
Browse files Browse the repository at this point in the history
  • Loading branch information
s3rgeym committed Apr 25, 2020
1 parent e32754d commit eeacbaf
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/.vscode
/dist
/*.egg-info
/websnapshot/websnapshots
/websnapshots
__pycache__
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ $ pip install websnapshot
$ pipx install websnapshot

# full page snapshot
$ echo 'https://stackoverflow.com/' | websnapshot -f t
$ echo "https://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8" | websnapshot --full_page true

# text file with urls
# file with urls, each on a new line
$ websnapshot -i urls.txt

# help
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[tool.poetry]
name = "websnapshot"
version = "0.1.4"
description = ""
version = "0.1.5"
description = "take snapshot of webpage"
authors = ["Sergey M <tz4678@gmail.com>"]
readme = "README.md"
homepage = ""
homepage = "https://github.com/tz4678/websnapshot"
exclude = [".git", "websnapshot/websnapshots"]

[tool.poetry.dependencies]
Expand Down
23 changes: 12 additions & 11 deletions websnapshot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
import asyncio
import logging
import pathlib
import re
import sys
from functools import partial
from typing import TextIO, Tuple
from urllib.parse import unquote

import click
from pyppeteer import launch

__version__ = '0.1.4'
__version__ = '0.1.5'

# Символы запрещенные в именах файлов в Linux, Mac и Windows
UNSAFE_CHARACTERS = re.compile(r'[\\/:*?"<>|]+')

log = logging.getLogger(__name__)
click.option = partial(click.option, show_default=True)


def imagename_from_url(url: str) -> str:
return (
url.replace('://', '_').replace('/', '_').replace('.', '_').rstrip('_')
+ '.png'
)
return UNSAFE_CHARACTERS.sub('_', unquote(url)) + '.png'


async def worker(
Expand Down Expand Up @@ -102,15 +103,15 @@ def websnapshot(
logging.basicConfig()
if debug:
log.setLevel(level=logging.DEBUG)
log.info('viewport size: %s', viewport_size)
log.info('full page: %s', full_page)
log.info("viewport size: %s", viewport_size)
log.info("full page: %s", full_page)
urls = asyncio.Queue()
for url in input.read().splitlines():
urls.put_nowait(url)
output_dirname = pathlib.Path(__file__).parent.joinpath(output).resolve()
output_dirname = pathlib.Path(output).expanduser().resolve()
output_dirname.mkdir(parents=True, exist_ok=True)
N = min(urls.qsize(), worker_num)
sem = asyncio.Semaphore(N)
n = min(urls.qsize(), worker_num)
sem = asyncio.Semaphore(n)
tasks = [
asyncio.ensure_future(
worker(
Expand All @@ -122,7 +123,7 @@ def websnapshot(
pageload_timeout=pageload_timeout,
)
)
for _ in range(N)
for _ in range(n)
]
_ = asyncio.get_event_loop().run_until_complete(
asyncio.gather(*tasks, return_exceptions=True)
Expand Down

0 comments on commit eeacbaf

Please sign in to comment.