-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature Request: Parallel Build #1900
Comments
This would require a major rewrite of the build code, presumably using threads. I certainly don't have the time to work on this. That said, PRs are always welcome. |
Thank you for your prompt answer. |
I've tested this and even when copying very large static files using multiple threads we don't save much :/
If you have a lot of them, it might be worthwhile. As for parallelizing rendering, @waylan is 100% correct and I've tested this. The patch itself is rather ugly since we need to use the threads as closely to the I/O operation, otherwise the GIL slows us down. diff --git a/mkdocs/__main__.py b/mkdocs/__main__.py
index da803a6..5b8f273 100644
--- a/mkdocs/__main__.py
+++ b/mkdocs/__main__.py
@@ -3,6 +3,8 @@
import os
import sys
import logging
+from concurrent.futures.process import ProcessPoolExecutor
+
import click
# TODO: Remove this check at some point in the future.
@@ -36,6 +38,7 @@ class State:
pass_state = click.make_pass_decorator(State, ensure=True)
clean_help = "Remove old files from the site_dir before building (the default)."
+jobs_help = "Build the documentation in parallel using the provided number of workers (default: None)"
config_help = "Provide a specific MkDocs config"
dev_addr_help = ("IP address and port to serve documentation locally (default: "
"localhost:8000)")
@@ -144,14 +147,15 @@ def serve_command(dev_addr, livereload, **kwargs):
@cli.command(name="build")
@click.option('-c', '--clean/--dirty', is_flag=True, default=True, help=clean_help)
+@click.option('-j', '--jobs', default=None, type=click.INT, help=jobs_help)
@common_config_options
@click.option('-d', '--site-dir', type=click.Path(), help=site_dir_help)
@common_options
-def build_command(clean, **kwargs):
+def build_command(clean, jobs, **kwargs):
"""Build the MkDocs documentation"""
try:
- build.build(config.load_config(**kwargs), dirty=not clean)
+ build.build(config.load_config(**kwargs), dirty=not clean, jobs=jobs)
except exceptions.ConfigurationError as e: # pragma: no cover
# Avoid ugly, unhelpful traceback
raise SystemExit('\n' + str(e))
diff --git a/mkdocs/commands/build.py b/mkdocs/commands/build.py
index d1dd7a5..eda1849 100644
--- a/mkdocs/commands/build.py
+++ b/mkdocs/commands/build.py
@@ -1,3 +1,4 @@
+from concurrent.futures.thread import ThreadPoolExecutor
from datetime import datetime
from calendar import timegm
import logging
@@ -16,6 +17,7 @@ import mkdocs
class DuplicateFilter:
''' Avoid logging duplicate messages. '''
+
def __init__(self):
self.msgs = set()
@@ -222,15 +224,12 @@ def _build_page(page, config, files, nav, env, dirty=False):
utils.write_file(output.encode('utf-8', errors='xmlcharrefreplace'), page.file.abs_dest_path)
else:
log.info("Page skipped: '{}'. Generated empty output.".format(page.file.src_path))
-
- # Deactivate page
- page.active = False
except Exception as e:
log.error("Error building page '{}': {}".format(page.file.src_path, e))
raise
-def build(config, live_server=False, dirty=False):
+def build(config, live_server=False, dirty=False, jobs=None):
""" Perform a full site build. """
from time import time
start = time()
@@ -282,7 +281,11 @@ def build(config, live_server=False, dirty=False):
# with lower precedence get written first so that files with higher precedence can overwrite them.
log.debug("Copying static assets.")
- files.copy_static_files(dirty=dirty)
+ if jobs:
+ with ThreadPoolExecutor(jobs) as pool:
+ files.copy_static_files(dirty=dirty, pool=pool)
+ else:
+ files.copy_static_files(dirty=dirty)
for template in config['theme'].static_templates:
_build_theme_template(template, env, files, config, nav)
diff --git a/mkdocs/structure/files.py b/mkdocs/structure/files.py
index a693fad..fdfa7da 100644
--- a/mkdocs/structure/files.py
+++ b/mkdocs/structure/files.py
@@ -1,6 +1,7 @@
import fnmatch
import os
import logging
+from concurrent.futures import wait
from functools import cmp_to_key
from urllib.parse import quote as urlquote
@@ -35,11 +36,15 @@ class Files:
self._files.append(file)
self.src_paths[file.src_path] = file
- def copy_static_files(self, dirty=False):
+ def copy_static_files(self, dirty=False, pool=None):
""" Copy static files from source to destination. """
- for file in self:
- if not file.is_documentation_page():
- file.copy_file(dirty)
+ if pool:
+ results = [file.copy_file(dirty, pool=pool) for file in self if not file.is_documentation_page()]
+ wait([result for result in results if result])
+ else:
+ for file in self:
+ if not file.is_documentation_page():
+ file.copy_file(dirty)
def documentation_pages(self):
""" Return iterable of all Markdown page file objects. """
@@ -172,13 +177,13 @@ class File:
""" Return url for file relative to other file. """
return utils.get_relative_url(self.url, other.url if isinstance(other, File) else other)
- def copy_file(self, dirty=False):
+ def copy_file(self, dirty=False, pool=None):
""" Copy source file to destination, ensuring parent directories exist. """
if dirty and not self.is_modified():
log.debug("Skip copying unmodified file: '{}'".format(self.src_path))
else:
log.debug("Copying media file: '{}'".format(self.src_path))
- utils.copy_file(self.abs_src_path, self.abs_dest_path)
+ return utils.copy_file(self.abs_src_path, self.abs_dest_path, pool=pool)
def is_modified(self):
if os.path.isfile(self.abs_dest_path):
diff --git a/mkdocs/tests/integration.py b/mkdocs/tests/integration.py
index f4a3f2a..c1f7300 100644
--- a/mkdocs/tests/integration.py
+++ b/mkdocs/tests/integration.py
@@ -35,7 +35,10 @@ TEST_PROJECTS = os.path.abspath(os.path.join(DIR, 'integration'))
help="The output directory to use when building themes",
type=click.Path(file_okay=False, writable=True),
required=True)
-def main(output=None):
+@click.option('--project',
+ help="The project to build",
+ default=None)
+def main(output=None, project=None):
log.propagate = False
stream = logging.StreamHandler()
@@ -47,19 +50,43 @@ def main(output=None):
base_cmd = ['mkdocs', 'build', '-s', '-v', '--site-dir', ]
- log.debug("Building installed themes.")
- for theme in sorted(MKDOCS_THEMES):
- log.debug("Building theme: {}".format(theme))
- project_dir = os.path.dirname(MKDOCS_CONFIG)
- out = os.path.join(output, theme)
- command = base_cmd + [out, '--theme', theme]
- subprocess.check_call(command, cwd=project_dir)
+ if not project:
+ log.debug("Building installed themes.")
+ for theme in sorted(MKDOCS_THEMES):
+ log.debug("Building theme: {}".format(theme))
+ project_dir = os.path.dirname(MKDOCS_CONFIG)
+ out = os.path.join(output, theme)
+ command = base_cmd + [out, '--theme', theme]
+ subprocess.check_call(command, cwd=project_dir)
log.debug("Building test projects.")
- for project in os.listdir(TEST_PROJECTS):
- log.debug("Building test project: {}".format(project))
- project_dir = os.path.join(TEST_PROJECTS, project)
- out = os.path.join(output, project)
+ for test_project in os.listdir(TEST_PROJECTS):
+ if project and test_project != project:
+ continue
+ log.debug("Building test project: {}".format(test_project))
+ project_dir = os.path.join(TEST_PROJECTS, test_project)
+ out = os.path.join(output, test_project)
+ command = base_cmd + [out, ]
+ subprocess.check_call(command, cwd=project_dir)
+
+ base_cmd = ['mkdocs', 'build', '-j8', '-s', '-v', '--site-dir', ]
+
+ if not project:
+ log.debug("Building installed themes again with 2 jobs.")
+ for theme in sorted(MKDOCS_THEMES):
+ log.debug("Building theme with 2 jobs: {}".format(theme))
+ project_dir = os.path.dirname(MKDOCS_CONFIG)
+ out = os.path.join(output, theme)
+ command = base_cmd + [out, '--theme', theme]
+ subprocess.check_call(command, cwd=project_dir)
+
+ log.debug("Building test projects again with 2 jobs.")
+ for test_project in os.listdir(TEST_PROJECTS):
+ if project and test_project != project:
+ continue
+ log.debug("Building test project with 2 jobs: {}".format(test_project))
+ project_dir = os.path.join(TEST_PROJECTS, test_project)
+ out = os.path.join(output, test_project)
command = base_cmd + [out, ]
subprocess.check_call(command, cwd=project_dir)
diff --git a/mkdocs/utils/__init__.py b/mkdocs/utils/__init__.py
index 7b26a7c..4834cfc 100644
--- a/mkdocs/utils/__init__.py
+++ b/mkdocs/utils/__init__.py
@@ -86,7 +86,7 @@ def reduce_list(data_set):
item not in seen and not seen.add(item)]
-def copy_file(source_path, output_path):
+def copy_file(source_path, output_path, pool=None):
"""
Copy source_path to output_path, making sure any parent directories exist.
@@ -97,7 +97,10 @@ def copy_file(source_path, output_path):
os.makedirs(output_dir)
if os.path.isdir(output_path):
output_path = os.path.join(output_path, os.path.basename(source_path))
- shutil.copyfile(source_path, output_path)
+ if pool:
+ return pool.submit(shutil.copyfile, source_path, output_path)
+ else:
+ shutil.copyfile(source_path, output_path)
def write_file(content, output_path):
diff --git a/tox.ini b/tox.ini
index f941918..454c243 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,7 +13,7 @@ commands=
{envpython} --version
py{35,36,37,38,py3}-{unittests,min-req}: {envbindir}/coverage run --source=mkdocs --omit 'mkdocs/tests/*' -m unittest discover -p '*tests.py' mkdocs
py{35,36,37,38,py3}-{unittests,min-req}: {envbindir}/coverage report --show-missing
- py{35,36,37,38,py3}-integration: {envpython} -m mkdocs.tests.integration --output={envtmpdir}/builds
+ py{35,36,37,38,py3}-integration: {envpython} -m mkdocs.tests.integration --output={envtmpdir}/builds {posargs}
[testenv:flake8]
deps=-rrequirements/test.txt If |
The following patch does the same with asyncio. However it requires us to switch our click implementation into asyncclick. Before I adjust the tests let me know if I should proceed with this. diff --git a/mkdocs/__main__.py b/mkdocs/__main__.py
index da803a6..9122071 100644
--- a/mkdocs/__main__.py
+++ b/mkdocs/__main__.py
@@ -3,7 +3,8 @@
import os
import sys
import logging
-import click
+import asyncclick as click
+click.anyio_backend = "asyncio"
# TODO: Remove this check at some point in the future.
# (also remove flake8's 'ignore E402' comments below)
@@ -147,11 +148,11 @@ def serve_command(dev_addr, livereload, **kwargs):
@common_config_options
@click.option('-d', '--site-dir', type=click.Path(), help=site_dir_help)
@common_options
-def build_command(clean, **kwargs):
+async def build_command(clean, **kwargs):
"""Build the MkDocs documentation"""
try:
- build.build(config.load_config(**kwargs), dirty=not clean)
+ await build.build(config.load_config(**kwargs), dirty=not clean)
except exceptions.ConfigurationError as e: # pragma: no cover
# Avoid ugly, unhelpful traceback
raise SystemExit('\n' + str(e))
diff --git a/mkdocs/commands/build.py b/mkdocs/commands/build.py
index d1dd7a5..88548cb 100644
--- a/mkdocs/commands/build.py
+++ b/mkdocs/commands/build.py
@@ -1,3 +1,4 @@
+import asyncio
from datetime import datetime
from calendar import timegm
import logging
@@ -100,7 +101,7 @@ def _build_template(name, template, files, config, nav):
return output
-def _build_theme_template(template_name, env, files, config, nav):
+async def _build_theme_template(template_name, env, files, config, nav):
""" Build a template using the theme environment. """
log.debug("Building theme template: {}".format(template_name))
@@ -115,7 +116,7 @@ def _build_theme_template(template_name, env, files, config, nav):
if output.strip():
output_path = os.path.join(config['site_dir'], template_name)
- utils.write_file(output.encode('utf-8'), output_path)
+ await utils.awrite_file(output.encode('utf-8'), output_path)
if template_name == 'sitemap.xml':
log.debug("Gzipping template: %s", template_name)
@@ -125,7 +126,7 @@ def _build_theme_template(template_name, env, files, config, nav):
log.info("Template skipped: '{}' generated empty output.".format(template_name))
-def _build_extra_template(template_name, files, config, nav):
+async def _build_extra_template(template_name, files, config, nav):
""" Build user templates which are not part of the theme. """
log.debug("Building extra template: {}".format(template_name))
@@ -145,7 +146,7 @@ def _build_extra_template(template_name, files, config, nav):
output = _build_template(template_name, template, files, config, nav)
if output.strip():
- utils.write_file(output.encode('utf-8'), file.abs_dest_path)
+ await utils.awrite_file(output.encode('utf-8'), file.abs_dest_path)
else:
log.info("Template skipped: '{}' generated empty output.".format(template_name))
@@ -182,7 +183,7 @@ def _populate_page(page, config, files, dirty=False):
raise
-def _build_page(page, config, files, nav, env, dirty=False):
+async def _build_page(page, config, files, nav, env, dirty=False):
""" Pass a Page to theme template and write output to site_dir. """
try:
@@ -219,7 +220,7 @@ def _build_page(page, config, files, nav, env, dirty=False):
# Write the output file.
if output.strip():
- utils.write_file(output.encode('utf-8', errors='xmlcharrefreplace'), page.file.abs_dest_path)
+ await utils.awrite_file(output.encode('utf-8', errors='xmlcharrefreplace'), page.file.abs_dest_path)
else:
log.info("Page skipped: '{}'. Generated empty output.".format(page.file.src_path))
@@ -230,7 +231,7 @@ def _build_page(page, config, files, nav, env, dirty=False):
raise
-def build(config, live_server=False, dirty=False):
+async def build(config, live_server=False, dirty=False):
""" Perform a full site build. """
from time import time
start = time()
@@ -282,17 +283,17 @@ def build(config, live_server=False, dirty=False):
# with lower precedence get written first so that files with higher precedence can overwrite them.
log.debug("Copying static assets.")
- files.copy_static_files(dirty=dirty)
+ await files.copy_static_files(dirty=dirty)
- for template in config['theme'].static_templates:
- _build_theme_template(template, env, files, config, nav)
+ await asyncio.gather(*[_build_theme_template(template, env, files, config, nav)
+ for template in config['theme'].static_templates])
- for template in config['extra_templates']:
- _build_extra_template(template, files, config, nav)
+ await asyncio.gather(*[_build_extra_template(template, files, config, nav)
+ for template in config['extra_templates']])
log.debug("Building markdown pages.")
- for file in files.documentation_pages():
- _build_page(file.page, config, files, nav, env, dirty)
+ await asyncio.gather(*[_build_page(file.page, config, files, nav, env, dirty)
+ for file in files.documentation_pages()])
# Run `post_build` plugin events.
config['plugins'].run_event('post_build', config=config)
diff --git a/mkdocs/structure/files.py b/mkdocs/structure/files.py
index a693fad..571255a 100644
--- a/mkdocs/structure/files.py
+++ b/mkdocs/structure/files.py
@@ -1,3 +1,4 @@
+import asyncio
import fnmatch
import os
import logging
@@ -35,11 +36,14 @@ class Files:
self._files.append(file)
self.src_paths[file.src_path] = file
- def copy_static_files(self, dirty=False):
+ async def copy_static_files(self, dirty=False):
""" Copy static files from source to destination. """
+ awaitables = []
for file in self:
if not file.is_documentation_page():
- file.copy_file(dirty)
+ awaitables.append(file.copy_file(dirty))
+
+ await asyncio.gather(*awaitables)
def documentation_pages(self):
""" Return iterable of all Markdown page file objects. """
@@ -172,13 +176,13 @@ class File:
""" Return url for file relative to other file. """
return utils.get_relative_url(self.url, other.url if isinstance(other, File) else other)
- def copy_file(self, dirty=False):
+ async def copy_file(self, dirty=False):
""" Copy source file to destination, ensuring parent directories exist. """
if dirty and not self.is_modified():
log.debug("Skip copying unmodified file: '{}'".format(self.src_path))
else:
log.debug("Copying media file: '{}'".format(self.src_path))
- utils.copy_file(self.abs_src_path, self.abs_dest_path)
+ await utils.acopy_file(self.abs_src_path, self.abs_dest_path)
def is_modified(self):
if os.path.isfile(self.abs_dest_path):
diff --git a/mkdocs/utils/__init__.py b/mkdocs/utils/__init__.py
index 7b26a7c..fab1dbb 100644
--- a/mkdocs/utils/__init__.py
+++ b/mkdocs/utils/__init__.py
@@ -5,9 +5,10 @@ Nothing in this module should have an knowledge of config or the layout
and structure of the site and pages in the site.
"""
-
import logging
import os
+
+import anyio
import pkg_resources
import shutil
import re
@@ -16,6 +17,8 @@ import fnmatch
import posixpath
from urllib.parse import urlparse
+from anyio import aopen
+
from mkdocs import exceptions
log = logging.getLogger(__name__)
@@ -86,6 +89,20 @@ def reduce_list(data_set):
item not in seen and not seen.add(item)]
+async def acopy_file(source_path, output_path):
+ """
+ Copy source_path to output_path, making sure any parent directories exist.
+
+ The output_path may be a directory.
+ """
+ output_dir = os.path.dirname(output_path)
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ if os.path.isdir(output_path):
+ output_path = os.path.join(output_path, os.path.basename(source_path))
+ await anyio.run_in_thread(shutil.copyfile, source_path, output_path)
+
+
def copy_file(source_path, output_path):
"""
Copy source_path to output_path, making sure any parent directories exist.
@@ -100,6 +117,17 @@ def copy_file(source_path, output_path):
shutil.copyfile(source_path, output_path)
+async def awrite_file(content, output_path):
+ """
+ Write content to output_path asynchronously, making sure any parent directories exist.
+ """
+ output_dir = os.path.dirname(output_path)
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ async with await aopen(output_path, 'wb') as f:
+ await f.write(content)
+
+
def write_file(content, output_path):
"""
Write content to output_path, making sure any parent directories exist.
diff --git a/requirements/project.txt b/requirements/project.txt
index 846d72c..cede29d 100644
--- a/requirements/project.txt
+++ b/requirements/project.txt
@@ -1,4 +1,4 @@
-click>=7.0
+asyncclick>=7.0
Jinja2>=2.10.3
livereload>=2.6.1
Markdown>=3.2.1 |
Thank you for the detailed investigation. Let's keep things simple and reliable! |
Could this issue be reopened? I think it's a valid feature request. |
I'm not sure. On one hand, yes, performance optimisations are great to have, whether through parallelization or something else. On the other hand, parallelization might be extremely hard to pull off without breaking almost all existing plugins. If someone wants to try and manages to do it, we could certainly review the code. But I can't guarantee it would be merged, or that it's even feasible. |
The GNU
make
utility allows parallel build with the-j
argument.Could MkDocs provide the same feature to speed-up the website generation?
Thank you!
The text was updated successfully, but these errors were encountered: