diff --git a/.idea/workspace.xml b/.idea/workspace.xml index c220b989..3a4180a4 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -26,10 +26,14 @@ + + + + + - @@ -351,6 +364,6 @@ - + \ No newline at end of file diff --git a/captionfunctions.py b/captionfunctions.py index 64471e21..2ee58597 100644 --- a/captionfunctions.py +++ b/captionfunctions.py @@ -1,3 +1,4 @@ +import contextlib import io import logging import os @@ -6,7 +7,9 @@ import subprocess import sys from PIL import Image -from improcessing import filetostring, imgkitstring, temp_file + +import imgkit +from improcessing import filetostring, temp_file, options # stolen code https://stackoverflow.com/questions/6116978/how-to-replace-multiple-substrings-of-a-string @@ -105,3 +108,10 @@ def jpeg(image, params: list, tosavename=None): def speed(media): pass + + +def imgkitstring(torender, tosavename=None): + if tosavename is None: + tosavename = temp_file("png") + imgkit.from_string(torender, tosavename, options=options) + return tosavename diff --git a/imgkit/__init__.py b/imgkit/__init__.py new file mode 100644 index 00000000..53b227cf --- /dev/null +++ b/imgkit/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +""" +Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt +""" + +__author__ = 'jarrekk' +__contact__ = 'me@jarrekk.com' +__version__ = '1.0.2' +__homepage__ = 'https://github.com/jarrekk/imgkit' +__license__ = 'MIT' + +from .imgkit import IMGKit +from .api import from_url, from_file, from_string, config diff --git a/imgkit/api.py b/imgkit/api.py new file mode 100644 index 00000000..7366c501 --- /dev/null +++ b/imgkit/api.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +from .imgkit import IMGKit +from .config import Config + + +def from_url(url, + output_path, + options=None, + toc=None, + cover=None, + config=None, + cover_first=None): + """ + Convert URL/URLs to IMG file/files + + :param url: URL or list of URLs to be saved + :param output_path: path to output PDF file/files. False means file will be returned as string + :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' + :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' + :param cover: (optional) string with url/filename with a cover html page + :param css: style of input + :param config: (optional) instance of imgkit.config.Config() + :param cover_first: (optional) if True, cover always precedes TOC + :return: True when success + """ + rtn = IMGKit(url, + 'url', + options=options, + toc=toc, cover=cover, + config=config, + cover_first=cover_first) + return rtn.to_img(output_path) + + +def from_file(filename, + output_path, + options=None, + toc=None, + cover=None, + css=None, + config=None, + cover_first=None): + """ + Convert HTML file/files to IMG file/files + + :param filename: path of HTML file or list with paths or file-like object + :param output_path: path to output PDF file/files. False means file will be returned as string + :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' + :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' + :param cover: (optional) string with url/filename with a cover html page + :param css: style of input + :param config: (optional) instance of imgkit.config.Config() + :param cover_first: (optional) if True, cover always precedes TOC + :return: True when success + """ + rtn = IMGKit(filename, + 'file', + options=options, + toc=toc, + cover=cover, + css=css, + config=config, + cover_first=cover_first) + return rtn.to_img(output_path) + + +def from_string(string, + output_path, + options=None, + toc=None, + cover=None, + css=None, + config=None, + cover_first=None): + """ + Convert given string/strings to IMG file + + :param string: + :param output_path: path to output PDF file/files. False means file will be returned as string + :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' + :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' + :param cover: (optional) string with url/filename with a cover html page + :param css: style of input + :param config: (optional) instance of imgkit.config.Config() + :param cover_first: (optional) if True, cover always precedes TOC + :return: True when success + """ + rtn = IMGKit(string, 'string', options=options, toc=toc, cover=cover, css=css, + config=config, cover_first=cover_first) + return rtn.to_img(output_path) + + +def config(**kwargs): + """ + Constructs and returns a :class:`Config` with given options + + :param wkhtmltopdf: path to binary + :param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags + """ + + return Config(**kwargs) diff --git a/imgkit/config.py b/imgkit/config.py new file mode 100644 index 00000000..22d02593 --- /dev/null +++ b/imgkit/config.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +import subprocess +import sys + + +class Config(object): + def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'): + self.meta_tag_prefix = meta_tag_prefix + + self.wkhtmltoimage = wkhtmltoimage + + self.xvfb = '' + + if not self.wkhtmltoimage: + if sys.platform == 'win32': + self.wkhtmltoimage = subprocess.Popen(['where', 'wkhtmltoimage'], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip() + else: + self.wkhtmltoimage = subprocess.Popen(['which', 'wkhtmltoimage'], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip() + if not self.xvfb: + if sys.platform == 'win32': + self.xvfb = subprocess.Popen(['where', 'xvfb-run'], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip() + else: + self.xvfb = subprocess.Popen(['which', 'xvfb-run'], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip() + + try: + with open(self.wkhtmltoimage): + pass + except IOError: + raise IOError('No wkhtmltoimage executable found: "{0}"\n' + 'If this file exists please check that this process can ' + 'read it. Otherwise please install wkhtmltopdf - ' + 'http://wkhtmltopdf.org\n'.format(self.wkhtmltoimage)) diff --git a/imgkit/imgkit.py b/imgkit/imgkit.py new file mode 100644 index 00000000..e7bc30b9 --- /dev/null +++ b/imgkit/imgkit.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +import re +import subprocess +import sys +from .source import Source +from .config import Config +import io +import codecs + +# Python 2.x and 3.x support for checking string types +try: + assert basestring +except NameError: + basestring = str + + +class IMGKit(object): + """ + + """ + + class SourceError(Exception): + """Wrong source type for stylesheets""" + + def __init__(self, message): + self.message = message + + def __str__(self): + return self.message + + def __init__(self, url_or_file, source_type, options=None, toc=None, cover=None, + css=None, config=None, cover_first=None): + self.source = Source(url_or_file, source_type) + self.config = Config() if not config else config + try: + self.wkhtmltoimage = self.config.wkhtmltoimage.decode('utf-8') + except AttributeError: + self.wkhtmltoimage = self.config.wkhtmltoimage + + self.xvfb = self.config.xvfb + + self.options = {} + if self.source.isString(): + self.options.update(self._find_options_in_meta(url_or_file)) + + if options: + self.options.update(options) + + self.toc = toc if toc else {} + self.cover = cover + self.cover_first = cover_first + self.css = css + self.stylesheets = [] + + def _gegetate_args(self, options): + """ + Generator of args parts based on options specification. + """ + for optkey, optval in self._normalize_options(options): + yield optkey + + if isinstance(optval, (list, tuple)): + assert len(optval) == 2 and optval[0] and optval[ + 1], 'Option value can only be either a string or a (tuple, list) of 2 items' + yield optval[0] + yield optval[1] + else: + yield optval + + def _command(self, path=None): + """ + Generator of all command parts + :type options: object + :return: + """ + options = self._gegetate_args(self.options) + options = [x for x in options] + # print 'options', options + if self.css: + self._prepend_css(self.css) + + if '--xvfb' in options: + options.remove('--xvfb') + yield self.xvfb + # auto servernum option to prevent failure on concurrent runs + # https://bugs.launchpad.net/ubuntu/+source/xorg-server/+bug/348052 + yield '-a' + + yield self.wkhtmltoimage + + for argpart in options: + if argpart: + yield argpart + + if self.cover and self.cover_first: + yield 'cover' + yield self.cover + + if self.toc: + yield 'toc' + for argpart in self._gegetate_args(self.toc): + if argpart: + yield argpart + + if self.cover and not self.cover_first: + yield 'cover' + yield self.cover + + # If the source is a string then we will pipe it into wkhtmltoimage + # If the source is file-like then we will read from it and pipe it in + if self.source.isString() or self.source.isFileObj(): + yield '-' + else: + if isinstance(self.source.source, basestring): + yield self.source.to_s() + else: + for s in self.source.source: + yield s + + # If output_path evaluates to False append '-' to end of args + # and wkhtmltoimage will pass generated IMG to stdout + if path: + yield path + else: + yield '-' + + def command(self, path=None): + return list(self._command(path)) + + def _normalize_options(self, options): + """ + Generator of 2-tuples (option-key, option-value). + When options spec is a list, generate a 2-tuples per list item. + + :param options: dict {option: value} + + returns: + iterator (option-key, option-value) + - option names lower cased and prepended with + '--' if necessary. Non-empty values cast to str + """ + for key, value in list(options.items()): + if '--' in key: + normalized_key = self._normalize_arg(key) + else: + normalized_key = '--%s' % self._normalize_arg(key) + + if isinstance(value, (list, tuple)): + for opt_val in value: + yield (normalized_key, opt_val) + else: + yield (normalized_key, str(value) if value else value) + + def _normalize_arg(self, arg): + return arg.lower() + + def _style_tag(self, stylesheet): + return "" % stylesheet + + def _prepend_css(self, path): + if self.source.isUrl() or isinstance(self.source.source, list): + raise self.SourceError('CSS files can be added only to a single file or string') + + if not isinstance(path, list): + path = [path] + + css_data = [] + for p in path: + with codecs.open(p, encoding="UTF-8") as f: + css_data.append(f.read()) + css_data = "\n".join(css_data) + + if self.source.isFile(): + with codecs.open(self.source.to_s(), encoding="UTF-8") as f: + inp = f.read() + self.source = Source( + inp.replace('', self._style_tag(css_data) + ''), + 'string') + + elif self.source.isString(): + if '' in self.source.to_s(): + self.source.source = self.source.to_s().replace( + '', self._style_tag(css_data) + '') + else: + self.source.source = self._style_tag(css_data) + self.source.to_s() + + def _find_options_in_meta(self, content): + """Reads 'content' and extracts options encoded in HTML meta tags + + :param content: str or file-like object - contains HTML to parse + + returns: + dict: {config option: value} + """ + if (isinstance(content, io.IOBase) + or content.__class__.__name__ == 'StreamReaderWriter'): + content = content.read() + + found = {} + + for x in re.findall(']*>', content): + if re.search('name=["\']%s' % self.config.meta_tag_prefix, x): + name = re.findall('name=["\']%s([^"\']*)' % + self.config.meta_tag_prefix, x)[0] + found[name] = re.findall('content=["\']([^"\']*)', x)[0] + + return found + + def to_img(self, path=None): + args = self.command(path) + + result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # If the source is a string then we will pipe it into wkhtmltoimage. + # If we want to add custom CSS to file then we read input file to + # string and prepend css to it and then pass it to stdin. + # This is a workaround for a bug in wkhtmltoimage (look closely in README) + if self.source.isString() or (self.source.isFile() and self.css): + string = self.source.to_s().encode('utf-8') + elif self.source.isFileObj(): + string = self.source.source.read().encode('utf-8') + else: + string = None + stdout, stderr = result.communicate(input=string) + stderr = stderr or stdout + try: + stderr = stderr.decode('utf-8') + except UnicodeDecodeError: + stderr = '' + exit_code = result.returncode + + if 'cannot connect to X server' in stderr: + raise IOError('%s\n' + 'You will need to run wkhtmltoimage within a "virtual" X server.\n' + 'Go to the link below for more information\n' + 'http://wkhtmltopdf.org' % stderr) + + if 'Error' in stderr: + raise IOError('wkhtmltoimage reported an error:\n' + stderr) + + if exit_code != 0: + xvfb_error = '' + if 'QXcbConnection' in stderr: + xvfb_error = 'You need to install xvfb(sudo apt-get install xvfb, yum install xorg-x11-server-Xvfb, etc), then add option: {"xvfb": ""}.' + raise IOError("wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error)) + + # Since wkhtmltoimage sends its output to stderr we will capture it + # and properly send to stdout + if '--quiet' not in args and 'quiet' not in args: + sys.stdout.write(stderr) + + if not path: + return stdout + else: + try: + with codecs.open(path, mode='rb') as f: + text = f.read(4) + if text == '': + raise IOError('Command failed: %s\n' + 'Check whhtmltoimage output without \'quiet\' ' + 'option' % ' '.join(args)) + return True + except IOError as e: + raise IOError('Command failed: %s\n' + 'Check whhtmltoimage output without \'quiet\' option\n' + '%s ' % (' '.join(args)), e) diff --git a/imgkit/source.py b/imgkit/source.py new file mode 100644 index 00000000..28e96597 --- /dev/null +++ b/imgkit/source.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +import os +import io + + +class Source(object): + def __init__(self, url_or_file, type_): + self.source = url_or_file + self.type = type_ + + if self.type == 'file': + self.checkFiles() + + def isUrl(self): + return 'url' in self.type + + def isFile(self, path=None): + # dirty hack to check where file is opened with codecs module + # (because it returns 'instance' type when encoding is specified + if path: + return isinstance(path, io.IOBase) or path.__class__.__name__ == 'StreamReaderWriter' + else: + return 'file' in self.type + + def checkFiles(self): + if isinstance(self.source, list): + for path in self.source: + if not os.path.exists(path): + raise IOError('No such file: %s' % path) + else: + if not hasattr(self.source, 'read') and not os.path.exists(self.source): + raise IOError('No such file: %s' % self.source) + + def isString(self): + return 'string' in self.type + + def isFileObj(self): + return hasattr(self.source, 'read') + + def to_s(self): + return self.source diff --git a/improcessing.py b/improcessing.py index 408c256e..bd294e2b 100644 --- a/improcessing.py +++ b/improcessing.py @@ -7,26 +7,12 @@ import subprocess import sys import discord.ext -import imgkit from PIL import Image from winmagic import magic from multiprocessing import Pool -import functools import captionfunctions import humanize - -def disable_logging(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - logging.disable(logging.INFO) - result = func(*args, **kwargs) - logging.disable(logging.NOTSET) - return result - - return wrapper - - options = { "enable-local-file-access": None, "format": "png", @@ -82,22 +68,22 @@ async def run_command(*args): # TODO: sanitize this... this means change all st return result -@disable_logging -def imgkitstring(torender, tosavename=None): - if tosavename is None: - name = temp_file("png") - imgkit.from_string(torender, name, options=options) - return name - else: - imgkit.from_string(torender, tosavename, options=options) - return tosavename +def mute(): + devnull = open(os.devnull, 'w') + sys.stdout = devnull + sys.__stdout__ = devnull + sys.stderr = devnull + sys.__stderr__ = devnull + + +# @supress_stdout # https://askubuntu.com/questions/110264/how-to-find-frames-per-second-of-any-video-file def get_frame_rate(filename): logging.info("[improcessing] Getting FPS...") if not os.path.exists(filename): - sys.stderr.write("ERROR: filename %r was not found!" % (filename,)) + logging.error("ERROR: filename %r was not found!" % (filename,)) return -1 out = subprocess.check_output( ["ffprobe", filename, "-v", "0", "-select_streams", "v", "-print_format", "flat", "-show_entries", @@ -121,8 +107,8 @@ async def ffmpegsplit(image): async def splitaudio(video): logging.info("[improcessing] Splitting audio...") name = temp_file("aac") - result = await run_command("ffmpeg", "-i", video, "-vn", "-acodec", "copy", name) - logging.info(result) + result = await run_command("ffmpeg", "-hide_banner", "-i", video, "-vn", "-acodec", "copy", + name) if "Output file #0 does not contain any stream" in result: return False return name @@ -182,7 +168,7 @@ async def handleanimated(image: str, caption, capfunction): capargs = [] for i, frame in enumerate(frames): capargs.append((frame, caption, frame.replace('.png', '_rendered.png'))) - pool = Pool(32) + pool = Pool(1) # , initializer=mute) pool.starmap_async(capfunction, capargs) pool.close() pool.join()