diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index c220b989..3a4180a4 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -26,10 +26,14 @@
+
+
+
+
+
-
@@ -174,7 +178,9 @@
-
+
+
+
1606187690936
@@ -295,7 +301,14 @@
1610512679342
-
+
+ 1610514693809
+
+
+
+ 1610514693809
+
+
@@ -351,6 +364,6 @@
-
+
\ No newline at end of file
diff --git a/captionfunctions.py b/captionfunctions.py
index 64471e21..2ee58597 100644
--- a/captionfunctions.py
+++ b/captionfunctions.py
@@ -1,3 +1,4 @@
+import contextlib
import io
import logging
import os
@@ -6,7 +7,9 @@
import subprocess
import sys
from PIL import Image
-from improcessing import filetostring, imgkitstring, temp_file
+
+import imgkit
+from improcessing import filetostring, temp_file, options
# stolen code https://stackoverflow.com/questions/6116978/how-to-replace-multiple-substrings-of-a-string
@@ -105,3 +108,10 @@ def jpeg(image, params: list, tosavename=None):
def speed(media):
pass
+
+
+def imgkitstring(torender, tosavename=None):
+ if tosavename is None:
+ tosavename = temp_file("png")
+ imgkit.from_string(torender, tosavename, options=options)
+ return tosavename
diff --git a/imgkit/__init__.py b/imgkit/__init__.py
new file mode 100644
index 00000000..53b227cf
--- /dev/null
+++ b/imgkit/__init__.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+"""
+Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt
+"""
+
+__author__ = 'jarrekk'
+__contact__ = 'me@jarrekk.com'
+__version__ = '1.0.2'
+__homepage__ = 'https://github.com/jarrekk/imgkit'
+__license__ = 'MIT'
+
+from .imgkit import IMGKit
+from .api import from_url, from_file, from_string, config
diff --git a/imgkit/api.py b/imgkit/api.py
new file mode 100644
index 00000000..7366c501
--- /dev/null
+++ b/imgkit/api.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+from .imgkit import IMGKit
+from .config import Config
+
+
+def from_url(url,
+ output_path,
+ options=None,
+ toc=None,
+ cover=None,
+ config=None,
+ cover_first=None):
+ """
+ Convert URL/URLs to IMG file/files
+
+ :param url: URL or list of URLs to be saved
+ :param output_path: path to output PDF file/files. False means file will be returned as string
+ :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--'
+ :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
+ :param cover: (optional) string with url/filename with a cover html page
+ :param css: style of input
+ :param config: (optional) instance of imgkit.config.Config()
+ :param cover_first: (optional) if True, cover always precedes TOC
+ :return: True when success
+ """
+ rtn = IMGKit(url,
+ 'url',
+ options=options,
+ toc=toc, cover=cover,
+ config=config,
+ cover_first=cover_first)
+ return rtn.to_img(output_path)
+
+
+def from_file(filename,
+ output_path,
+ options=None,
+ toc=None,
+ cover=None,
+ css=None,
+ config=None,
+ cover_first=None):
+ """
+ Convert HTML file/files to IMG file/files
+
+ :param filename: path of HTML file or list with paths or file-like object
+ :param output_path: path to output PDF file/files. False means file will be returned as string
+ :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--'
+ :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
+ :param cover: (optional) string with url/filename with a cover html page
+ :param css: style of input
+ :param config: (optional) instance of imgkit.config.Config()
+ :param cover_first: (optional) if True, cover always precedes TOC
+ :return: True when success
+ """
+ rtn = IMGKit(filename,
+ 'file',
+ options=options,
+ toc=toc,
+ cover=cover,
+ css=css,
+ config=config,
+ cover_first=cover_first)
+ return rtn.to_img(output_path)
+
+
+def from_string(string,
+ output_path,
+ options=None,
+ toc=None,
+ cover=None,
+ css=None,
+ config=None,
+ cover_first=None):
+ """
+ Convert given string/strings to IMG file
+
+ :param string:
+ :param output_path: path to output PDF file/files. False means file will be returned as string
+ :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--'
+ :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
+ :param cover: (optional) string with url/filename with a cover html page
+ :param css: style of input
+ :param config: (optional) instance of imgkit.config.Config()
+ :param cover_first: (optional) if True, cover always precedes TOC
+ :return: True when success
+ """
+ rtn = IMGKit(string, 'string', options=options, toc=toc, cover=cover, css=css,
+ config=config, cover_first=cover_first)
+ return rtn.to_img(output_path)
+
+
+def config(**kwargs):
+ """
+ Constructs and returns a :class:`Config` with given options
+
+ :param wkhtmltopdf: path to binary
+ :param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags
+ """
+
+ return Config(**kwargs)
diff --git a/imgkit/config.py b/imgkit/config.py
new file mode 100644
index 00000000..22d02593
--- /dev/null
+++ b/imgkit/config.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+import subprocess
+import sys
+
+
+class Config(object):
+ def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'):
+ self.meta_tag_prefix = meta_tag_prefix
+
+ self.wkhtmltoimage = wkhtmltoimage
+
+ self.xvfb = ''
+
+ if not self.wkhtmltoimage:
+ if sys.platform == 'win32':
+ self.wkhtmltoimage = subprocess.Popen(['where', 'wkhtmltoimage'],
+ stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip()
+ else:
+ self.wkhtmltoimage = subprocess.Popen(['which', 'wkhtmltoimage'],
+ stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip()
+ if not self.xvfb:
+ if sys.platform == 'win32':
+ self.xvfb = subprocess.Popen(['where', 'xvfb-run'],
+ stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip()
+ else:
+ self.xvfb = subprocess.Popen(['which', 'xvfb-run'],
+ stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].strip()
+
+ try:
+ with open(self.wkhtmltoimage):
+ pass
+ except IOError:
+ raise IOError('No wkhtmltoimage executable found: "{0}"\n'
+ 'If this file exists please check that this process can '
+ 'read it. Otherwise please install wkhtmltopdf - '
+ 'http://wkhtmltopdf.org\n'.format(self.wkhtmltoimage))
diff --git a/imgkit/imgkit.py b/imgkit/imgkit.py
new file mode 100644
index 00000000..e7bc30b9
--- /dev/null
+++ b/imgkit/imgkit.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+import re
+import subprocess
+import sys
+from .source import Source
+from .config import Config
+import io
+import codecs
+
+# Python 2.x and 3.x support for checking string types
+try:
+ assert basestring
+except NameError:
+ basestring = str
+
+
+class IMGKit(object):
+ """
+
+ """
+
+ class SourceError(Exception):
+ """Wrong source type for stylesheets"""
+
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return self.message
+
+ def __init__(self, url_or_file, source_type, options=None, toc=None, cover=None,
+ css=None, config=None, cover_first=None):
+ self.source = Source(url_or_file, source_type)
+ self.config = Config() if not config else config
+ try:
+ self.wkhtmltoimage = self.config.wkhtmltoimage.decode('utf-8')
+ except AttributeError:
+ self.wkhtmltoimage = self.config.wkhtmltoimage
+
+ self.xvfb = self.config.xvfb
+
+ self.options = {}
+ if self.source.isString():
+ self.options.update(self._find_options_in_meta(url_or_file))
+
+ if options:
+ self.options.update(options)
+
+ self.toc = toc if toc else {}
+ self.cover = cover
+ self.cover_first = cover_first
+ self.css = css
+ self.stylesheets = []
+
+ def _gegetate_args(self, options):
+ """
+ Generator of args parts based on options specification.
+ """
+ for optkey, optval in self._normalize_options(options):
+ yield optkey
+
+ if isinstance(optval, (list, tuple)):
+ assert len(optval) == 2 and optval[0] and optval[
+ 1], 'Option value can only be either a string or a (tuple, list) of 2 items'
+ yield optval[0]
+ yield optval[1]
+ else:
+ yield optval
+
+ def _command(self, path=None):
+ """
+ Generator of all command parts
+ :type options: object
+ :return:
+ """
+ options = self._gegetate_args(self.options)
+ options = [x for x in options]
+ # print 'options', options
+ if self.css:
+ self._prepend_css(self.css)
+
+ if '--xvfb' in options:
+ options.remove('--xvfb')
+ yield self.xvfb
+ # auto servernum option to prevent failure on concurrent runs
+ # https://bugs.launchpad.net/ubuntu/+source/xorg-server/+bug/348052
+ yield '-a'
+
+ yield self.wkhtmltoimage
+
+ for argpart in options:
+ if argpart:
+ yield argpart
+
+ if self.cover and self.cover_first:
+ yield 'cover'
+ yield self.cover
+
+ if self.toc:
+ yield 'toc'
+ for argpart in self._gegetate_args(self.toc):
+ if argpart:
+ yield argpart
+
+ if self.cover and not self.cover_first:
+ yield 'cover'
+ yield self.cover
+
+ # If the source is a string then we will pipe it into wkhtmltoimage
+ # If the source is file-like then we will read from it and pipe it in
+ if self.source.isString() or self.source.isFileObj():
+ yield '-'
+ else:
+ if isinstance(self.source.source, basestring):
+ yield self.source.to_s()
+ else:
+ for s in self.source.source:
+ yield s
+
+ # If output_path evaluates to False append '-' to end of args
+ # and wkhtmltoimage will pass generated IMG to stdout
+ if path:
+ yield path
+ else:
+ yield '-'
+
+ def command(self, path=None):
+ return list(self._command(path))
+
+ def _normalize_options(self, options):
+ """
+ Generator of 2-tuples (option-key, option-value).
+ When options spec is a list, generate a 2-tuples per list item.
+
+ :param options: dict {option: value}
+
+ returns:
+ iterator (option-key, option-value)
+ - option names lower cased and prepended with
+ '--' if necessary. Non-empty values cast to str
+ """
+ for key, value in list(options.items()):
+ if '--' in key:
+ normalized_key = self._normalize_arg(key)
+ else:
+ normalized_key = '--%s' % self._normalize_arg(key)
+
+ if isinstance(value, (list, tuple)):
+ for opt_val in value:
+ yield (normalized_key, opt_val)
+ else:
+ yield (normalized_key, str(value) if value else value)
+
+ def _normalize_arg(self, arg):
+ return arg.lower()
+
+ def _style_tag(self, stylesheet):
+ return "" % stylesheet
+
+ def _prepend_css(self, path):
+ if self.source.isUrl() or isinstance(self.source.source, list):
+ raise self.SourceError('CSS files can be added only to a single file or string')
+
+ if not isinstance(path, list):
+ path = [path]
+
+ css_data = []
+ for p in path:
+ with codecs.open(p, encoding="UTF-8") as f:
+ css_data.append(f.read())
+ css_data = "\n".join(css_data)
+
+ if self.source.isFile():
+ with codecs.open(self.source.to_s(), encoding="UTF-8") as f:
+ inp = f.read()
+ self.source = Source(
+ inp.replace('', self._style_tag(css_data) + ''),
+ 'string')
+
+ elif self.source.isString():
+ if '' in self.source.to_s():
+ self.source.source = self.source.to_s().replace(
+ '', self._style_tag(css_data) + '')
+ else:
+ self.source.source = self._style_tag(css_data) + self.source.to_s()
+
+ def _find_options_in_meta(self, content):
+ """Reads 'content' and extracts options encoded in HTML meta tags
+
+ :param content: str or file-like object - contains HTML to parse
+
+ returns:
+ dict: {config option: value}
+ """
+ if (isinstance(content, io.IOBase)
+ or content.__class__.__name__ == 'StreamReaderWriter'):
+ content = content.read()
+
+ found = {}
+
+ for x in re.findall(']*>', content):
+ if re.search('name=["\']%s' % self.config.meta_tag_prefix, x):
+ name = re.findall('name=["\']%s([^"\']*)' %
+ self.config.meta_tag_prefix, x)[0]
+ found[name] = re.findall('content=["\']([^"\']*)', x)[0]
+
+ return found
+
+ def to_img(self, path=None):
+ args = self.command(path)
+
+ result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ # If the source is a string then we will pipe it into wkhtmltoimage.
+ # If we want to add custom CSS to file then we read input file to
+ # string and prepend css to it and then pass it to stdin.
+ # This is a workaround for a bug in wkhtmltoimage (look closely in README)
+ if self.source.isString() or (self.source.isFile() and self.css):
+ string = self.source.to_s().encode('utf-8')
+ elif self.source.isFileObj():
+ string = self.source.source.read().encode('utf-8')
+ else:
+ string = None
+ stdout, stderr = result.communicate(input=string)
+ stderr = stderr or stdout
+ try:
+ stderr = stderr.decode('utf-8')
+ except UnicodeDecodeError:
+ stderr = ''
+ exit_code = result.returncode
+
+ if 'cannot connect to X server' in stderr:
+ raise IOError('%s\n'
+ 'You will need to run wkhtmltoimage within a "virtual" X server.\n'
+ 'Go to the link below for more information\n'
+ 'http://wkhtmltopdf.org' % stderr)
+
+ if 'Error' in stderr:
+ raise IOError('wkhtmltoimage reported an error:\n' + stderr)
+
+ if exit_code != 0:
+ xvfb_error = ''
+ if 'QXcbConnection' in stderr:
+ xvfb_error = 'You need to install xvfb(sudo apt-get install xvfb, yum install xorg-x11-server-Xvfb, etc), then add option: {"xvfb": ""}.'
+ raise IOError("wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error))
+
+ # Since wkhtmltoimage sends its output to stderr we will capture it
+ # and properly send to stdout
+ if '--quiet' not in args and 'quiet' not in args:
+ sys.stdout.write(stderr)
+
+ if not path:
+ return stdout
+ else:
+ try:
+ with codecs.open(path, mode='rb') as f:
+ text = f.read(4)
+ if text == '':
+ raise IOError('Command failed: %s\n'
+ 'Check whhtmltoimage output without \'quiet\' '
+ 'option' % ' '.join(args))
+ return True
+ except IOError as e:
+ raise IOError('Command failed: %s\n'
+ 'Check whhtmltoimage output without \'quiet\' option\n'
+ '%s ' % (' '.join(args)), e)
diff --git a/imgkit/source.py b/imgkit/source.py
new file mode 100644
index 00000000..28e96597
--- /dev/null
+++ b/imgkit/source.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+import os
+import io
+
+
+class Source(object):
+ def __init__(self, url_or_file, type_):
+ self.source = url_or_file
+ self.type = type_
+
+ if self.type == 'file':
+ self.checkFiles()
+
+ def isUrl(self):
+ return 'url' in self.type
+
+ def isFile(self, path=None):
+ # dirty hack to check where file is opened with codecs module
+ # (because it returns 'instance' type when encoding is specified
+ if path:
+ return isinstance(path, io.IOBase) or path.__class__.__name__ == 'StreamReaderWriter'
+ else:
+ return 'file' in self.type
+
+ def checkFiles(self):
+ if isinstance(self.source, list):
+ for path in self.source:
+ if not os.path.exists(path):
+ raise IOError('No such file: %s' % path)
+ else:
+ if not hasattr(self.source, 'read') and not os.path.exists(self.source):
+ raise IOError('No such file: %s' % self.source)
+
+ def isString(self):
+ return 'string' in self.type
+
+ def isFileObj(self):
+ return hasattr(self.source, 'read')
+
+ def to_s(self):
+ return self.source
diff --git a/improcessing.py b/improcessing.py
index 408c256e..bd294e2b 100644
--- a/improcessing.py
+++ b/improcessing.py
@@ -7,26 +7,12 @@
import subprocess
import sys
import discord.ext
-import imgkit
from PIL import Image
from winmagic import magic
from multiprocessing import Pool
-import functools
import captionfunctions
import humanize
-
-def disable_logging(func):
- @functools.wraps(func)
- def wrapper(*args, **kwargs):
- logging.disable(logging.INFO)
- result = func(*args, **kwargs)
- logging.disable(logging.NOTSET)
- return result
-
- return wrapper
-
-
options = {
"enable-local-file-access": None,
"format": "png",
@@ -82,22 +68,22 @@ async def run_command(*args): # TODO: sanitize this... this means change all st
return result
-@disable_logging
-def imgkitstring(torender, tosavename=None):
- if tosavename is None:
- name = temp_file("png")
- imgkit.from_string(torender, name, options=options)
- return name
- else:
- imgkit.from_string(torender, tosavename, options=options)
- return tosavename
+def mute():
+ devnull = open(os.devnull, 'w')
+ sys.stdout = devnull
+ sys.__stdout__ = devnull
+ sys.stderr = devnull
+ sys.__stderr__ = devnull
+
+
+# @supress_stdout
# https://askubuntu.com/questions/110264/how-to-find-frames-per-second-of-any-video-file
def get_frame_rate(filename):
logging.info("[improcessing] Getting FPS...")
if not os.path.exists(filename):
- sys.stderr.write("ERROR: filename %r was not found!" % (filename,))
+ logging.error("ERROR: filename %r was not found!" % (filename,))
return -1
out = subprocess.check_output(
["ffprobe", filename, "-v", "0", "-select_streams", "v", "-print_format", "flat", "-show_entries",
@@ -121,8 +107,8 @@ async def ffmpegsplit(image):
async def splitaudio(video):
logging.info("[improcessing] Splitting audio...")
name = temp_file("aac")
- result = await run_command("ffmpeg", "-i", video, "-vn", "-acodec", "copy", name)
- logging.info(result)
+ result = await run_command("ffmpeg", "-hide_banner", "-i", video, "-vn", "-acodec", "copy",
+ name)
if "Output file #0 does not contain any stream" in result:
return False
return name
@@ -182,7 +168,7 @@ async def handleanimated(image: str, caption, capfunction):
capargs = []
for i, frame in enumerate(frames):
capargs.append((frame, caption, frame.replace('.png', '_rendered.png')))
- pool = Pool(32)
+ pool = Pool(1) # , initializer=mute)
pool.starmap_async(capfunction, capargs)
pool.close()
pool.join()