diff --git a/docs2/Makefile b/docs2/Makefile new file mode 100644 index 0000000000..d0381deff5 --- /dev/null +++ b/docs2/Makefile @@ -0,0 +1,161 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = -W # turn warnings into errors +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build +DIAGRAM_BUILD_DIR = _diagrams + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext diagrams + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " diagrams to make diagram images" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + rm -rf $(DIAGRAM_BUILD_DIR)/* + +diagrams: + mkdir -p $(DIAGRAM_BUILD_DIR) + plantuml diagrams_src/*.dot + mv diagrams_src/*.png $(DIAGRAM_BUILD_DIR)/ + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PulpDocs.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PulpDocs.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/PulpDocs" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PulpDocs" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs2/api-reference/content-app.rst b/docs2/api-reference/content-app.rst new file mode 100644 index 0000000000..c4e64f4184 --- /dev/null +++ b/docs2/api-reference/content-app.rst @@ -0,0 +1,74 @@ +.. _content-app-docs: + +pulpcore.plugin.content +======================= + +The Content app provides built-in functionality to handle user requests for content, but in some +cases the default behavior may not work for some content types. For example, Docker content requires +specific response headers to be present. In these cases the plugin write should provide a custom +Handler to the Content App by subclassing `pulpcore.plugin.content.Handler`. + +Making a custom Handler is a two-step process: + +1. subclass `pulpcore.plugin.content.Handler` to define your Handler's behavior +2. Add the Handler to a route using aiohttp.server's `add_route() `_ interface. + + +Creating your Handler +--------------------- + +Import the Handler object through the plugin API and then subclass it. Custom functionality can be +provided by overriding the various methods of `Handler`, but here is the simplest version: + +.. code-block:: python + + from pulpcore.plugin.content import Handler + + class MyHandler(Handler): + + pass + +Here is an example of the `Docker custom Handler `_. + + +Registering your Handler +------------------------ + +We register the Handler with Pulp's Content App by importing the aiohttp.server 'app' and then +adding a custom route to it. Here's an example: + +.. code-block:: python + + from pulpcore.content import app + + app.add_routes([web.get(r'/my/custom/{somevar:.+}', MyHandler().stream_content)]) + + +Here is an example of `Docker registering some custom routes `_. + + +Restricting which detail Distributions Match +-------------------------------------------- + +To restrict which Distribution model types your Handler will serve, set the `distribution_model` +field to your Model type. This causes the Handler to only search/serve your Distribution types. + +.. code-block:: python + + from pulpcore.plugin.content import Handler + + from models import MyDistribution + + + class MyHandler(Handler): + + distribution_model = MyDistribution + + +pulpcore.plugin.content.Handler +------------------------------- + +.. autoclass:: pulpcore.plugin.content.Handler diff --git a/docs2/api-reference/download.rst b/docs2/api-reference/download.rst new file mode 100644 index 0000000000..8ff4b785e3 --- /dev/null +++ b/docs2/api-reference/download.rst @@ -0,0 +1,230 @@ +.. _download-docs: + +pulpcore.plugin.download +======================== + +The module implements downloaders that solve many of the common problems plugin writers have while +downloading remote data. A high level list of features provided by these downloaders include: + +* auto-configuration from remote settings (auth, ssl, proxy) +* synchronous or parallel downloading +* digest and size validation computed during download +* grouping downloads together to return to the user when all files are downloaded +* customizable download behaviors via subclassing + +All classes documented here should be imported directly from the +``pulpcore.plugin.download`` namespace. + +Basic Downloading +----------------- + +The most basic downloading from a url can be done like this: + +>>> downloader = HttpDownloader('http://example.com/') +>>> result = downloader.fetch() + +The example above downloads the data synchronously. The +:meth:`~pulpcore.plugin.download.HttpDownloader.fetch` call blocks until the data is +downloaded and the :class:`~pulpcore.plugin.download.DownloadResult` is returned or a fatal +exception is raised. + +Parallel Downloading +-------------------- + +Any downloader in the ``pulpcore.plugin.download`` package can be run in parallel with the +``asyncio`` event loop. Each downloader has a +:meth:`~pulpcore.plugin.download.BaseDownloader.run` method which returns a coroutine object +that ``asyncio`` can schedule in parallel. Consider this example: + +>>> download_coroutines = [ +>>> HttpDownloader('http://example.com/').run(), +>>> HttpDownloader('http://pulpproject.org/').run(), +>>> ] +>>> +>>> loop = asyncio.get_event_loop() +>>> done, not_done = loop.run_until_complete(asyncio.wait([download_coroutines])) +>>> +>>> for task in done: +>>> try: +>>> task.result() # This is a DownloadResult +>>> except Exception as error: +>>> pass # fatal exceptions are raised by result() + +.. _download-result: + +Download Results +---------------- + +The download result contains all the information about a completed download and is returned from a +the downloader's `run()` method when the download is complete. + +.. autoclass:: pulpcore.plugin.download.DownloadResult + :no-members: + +.. _configuring-from-a-remote: + +Configuring from a Remote +------------------------- + +When fetching content during a sync, the remote has settings like SSL certs, SSL validation, basic +auth credentials, and proxy settings. Downloaders commonly want to use these settings while +downloading. The Remote's settings can automatically configure a downloader either to download a +`url` or a :class:`pulpcore.plugin.models.RemoteArtifact` using the +:meth:`~pulpcore.plugin.models.Remote.get_downloader` call. Here is an example download from a URL: + +>>> downloader = my_remote.get_downloader(url='http://example.com') +>>> downloader.fetch() # This downloader is configured with the remote's settings + +Here is an example of a download configured from a RemoteArtifact, which also configures the +downloader with digest and size validation: + +>>> remote_artifact = RemoteArtifact.objects.get(...) +>>> downloader = my_remote.get_downloader(remote_artifact=ra) +>>> downloader.fetch() # This downloader has the remote's settings and digest+validation checking + +The :meth:`~pulpcore.plugin.models.Remote.get_downloader` internally calls the +`DownloaderFactory`, so it expects a `url` that the `DownloaderFactory` can build a downloader for. +See the :class:`~pulpcore.plugin.download.DownloaderFactory` for more information on +supported urls. + +.. tip:: + The :meth:`~pulpcore.plugin.models.Remote.get_downloader` accepts kwargs that can + enable size or digest based validation, and specifying a file-like object for the data to be + written into. See :meth:`~pulpcore.plugin.models.Remote.get_downloader` for more + information. + +.. note:: + All :class:`~pulpcore.plugin.download.HttpDownloader` downloaders produced by the same + remote instance share an `aiohttp` session, which provides a connection pool, connection + reusage and keep-alives shared across all downloaders produced by a single remote. + + +.. _automatic-retry: + +Automatic Retry +--------------- + +The :class:`~pulpcore.plugin.download.HttpDownloader` will automatically retry 10 times if the +server responds with one of the following error codes: + +* 429 - Too Many Requests + + +.. _exception-handling: + +Exception Handling +------------------ + +Unrecoverable errors of several types can be raised during downloading. One example is a +:ref:`validation exception ` that is raised if the content downloaded fails +size or digest validation. There can also be protocol specific errors such as an +``aiohttp.ClientResponse`` being raised when a server responds with a 400+ response such as an HTTP +403. + +Plugin writers can choose to halt the entire task by allowing the exception be uncaught which +would mark the entire task as failed. + +.. note:: + The :class:`~pulpcore.plugin.download.HttpDownloader` automatically retry in some cases, but if + unsuccessful will raise an exception for any HTTP response code that is 400 or greater. + +.. _custom-download-behavior: + +Custom Download Behavior +------------------------ + +Custom download behavior is provided by subclassing a downloader and providing a new `run()` method. +For example you could catch a specific error code like a 404 and try another mirror if your +downloader knew of several mirrors. Here is an `example of that +`_ in +code. + +A custom downloader can be given as the downloader to use for a given protocol using the +``downloader_overrides`` on the :class:`~pulpcore.plugin.download.DownloaderFactory`. +Additionally, you can implement the :meth:`~pulpcore.plugin.models.Remote.get_downloader` +method to specify the ``downloader_overrides`` to the +:class:`~pulpcore.plugin.download.DownloaderFactory`. + +.. _adding-new-protocol-support: + +Adding New Protocol Support +--------------------------- + +To create a new protocol downloader implement a subclass of the +:class:`~pulpcore.plugin.download.BaseDownloader`. See the docs on +:class:`~pulpcore.plugin.download.BaseDownloader` for more information on the requirements. + +.. _downloader-factory: + +Download Factory +---------------- + +The DownloaderFactory constructs and configures a downloader for any given url. Specifically: + +1. Select the appropriate downloader based from these supported schemes: `http`, `https` or `file`. + +2. Auto-configure the selected downloader with settings from a remote including (auth, ssl, + proxy). + +The :meth:`~pulpcore.plugin.download.DownloaderFactory.build` method constructs one +downloader for any given url. + +.. note:: + Any :ref:`HttpDownloader ` objects produced by an instantiated + `DownloaderFactory` share an `aiohttp` session, which provides a connection pool, connection + reusage and keep-alives shared across all downloaders produced by a single factory. + +.. tip:: + The :meth:`~pulpcore.plugin.download.DownloaderFactory.build` method accepts kwargs that + enable size or digest based validation or the specification of a file-like object for the data + to be written into. See :meth:`~pulpcore.plugin.download.DownloaderFactory.build` for + more information. + +.. autoclass:: pulpcore.plugin.download.DownloaderFactory + :members: + +.. _http-downloader: + +HttpDownloader +-------------- + +This downloader is an asyncio-aware parallel downloader which is the default downloader produced by +the :ref:`downloader-factory` for urls starting with `http://` or `https://`. It also supports +synchronous downloading using :meth:`~pulpcore.plugin.download.HttpDownloader.fetch`. + +.. autoclass:: pulpcore.plugin.download.HttpDownloader + :members: + :inherited-members: fetch + +.. _file-downloader: + +FileDownloader +-------------- + +This downloader is an asyncio-aware parallel file reader which is the default downloader produced by +the :ref:`downloader-factory` for urls starting with `file://`. + +.. autoclass:: pulpcore.plugin.download.FileDownloader + :members: + :inherited-members: fetch + +.. _base-downloader: + +BaseDownloader +-------------- + +This is an abstract downloader that is meant for subclassing. All downloaders are expected to be +descendants of BaseDownloader. + +.. autoclass:: pulpcore.plugin.download.BaseDownloader + :members: + + +.. _validation-exceptions: + +Validation Exceptions +--------------------- + +.. autoclass:: pulpcore.exceptions.DigestValidationError +.. autoclass:: pulpcore.exceptions.SizeValidationError +.. autoclass:: pulpcore.exceptions.ValidationError diff --git a/docs2/api-reference/index.rst b/docs2/api-reference/index.rst new file mode 100644 index 0000000000..5061489a10 --- /dev/null +++ b/docs2/api-reference/index.rst @@ -0,0 +1,31 @@ +Plugin API Reference +-------------------- + +The Plugin API is versioned separately from the Pulp Core and consists of everything importable +within the :mod:`pulpcore.plugin` namespace. When writing plugins, care should be taken to only +import Pulp Core components exposed in this namespace; importing from elsewhere within the Pulp +Core (e.g. importing directly from ``pulpcore.app``, ``pulpcore.exceptions``, etc.) is unsupported, +and not protected by the Pulp Plugin API's semantic versioning guarantees. + +.. warning:: + + Exactly what is versioned in the Plugin API, and how, still has yet to be determined. + This documentation will be updated to clearly identify what guarantees come with the + semantic versioning of the Plugin API in the future. As our initial plugins are under + development prior to the release of Pulp 3.0, the Plugin API can be assumed to have + semantic major version 0, indicating that it is unstable and still being developed. + +.. toctree:: + models + serializers + storage + viewsets + tasking + download + stages + profiling + content-app + + +.. automodule:: pulpcore.plugin + :imported-members: diff --git a/docs2/api-reference/models.rst b/docs2/api-reference/models.rst new file mode 100644 index 0000000000..ac32d173aa --- /dev/null +++ b/docs2/api-reference/models.rst @@ -0,0 +1,7 @@ +pulp.plugin.models +================== + +All models documented here should be imported directly from the ``pulpcore.plugin.models`` namespace. + +.. automodule:: pulpcore.plugin.models + :imported-members: diff --git a/docs2/api-reference/profiling.rst b/docs2/api-reference/profiling.rst new file mode 100644 index 0000000000..c1c6708d19 --- /dev/null +++ b/docs2/api-reference/profiling.rst @@ -0,0 +1,27 @@ +.. _stages-api-profiling-docs: + +Profiling the Stages API Performance +==================================== + +Pulp has a performance data collection feature that collects statistics about a Stages API pipeline +as it runs. The data is recorded to a sqlite3 database in the `/var/lib/pulp/debug` folder. + +This can be enabled with the `PROFILE_STAGES_API = True` setting in the Pulp settings file. Once +enabled it will write a sqlite3 with the uuid of the task name it runs in to the +`/var/lib/pulp/debug/` folder. + +Summarizing Performance Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`django-admin` includes command that displays the pipeline along with summary statistics. After +generating a sqlite3 performance database, use the `stage-profile-summary` command like this:: + + $ django-admin stage-profile-summary /var/lib/pulp/debug/2dcaf53a-4b0f-4b42-82ea-d2d68f1786b0 + + +Profiling API Machinery +^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pulpcore.plugin.stages.ProfilingQueue + +.. automethod:: pulpcore.plugin.stages.create_profile_db_and_connection diff --git a/docs2/api-reference/serializers.rst b/docs2/api-reference/serializers.rst new file mode 100644 index 0000000000..88bf765dda --- /dev/null +++ b/docs2/api-reference/serializers.rst @@ -0,0 +1,8 @@ +pulp.plugin.serializers +======================= + +All serializers documented here should be imported directly from the ``pulpcore.plugin.serializers`` +namespace. + +.. automodule:: pulpcore.plugin.serializers + :imported-members: diff --git a/docs2/api-reference/stages.rst b/docs2/api-reference/stages.rst new file mode 100644 index 0000000000..025601a7fc --- /dev/null +++ b/docs2/api-reference/stages.rst @@ -0,0 +1,81 @@ +.. _stages-api-docs: + +pulpcore.plugin.stages +====================== + +Plugin writers can use the Stages API to create a high-performance, download-and-saving pipeline +to make writing sync code easier. There are several parts to the API: + +1. :ref:`declarative-version` is a generic pipeline useful for most synchronization use cases. +2. The builtin Stages including :ref:`artifact-stages`, :ref:`content-stages`, and + :ref:`content-association-stages`. +3. The :ref:`stages-api`, which allows you to build custom stages and pipelines. + + +.. _declarative-version: + +DeclarativeVersion +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pulpcore.plugin.stages.DeclarativeVersion + +.. autoclass:: pulpcore.plugin.stages.DeclarativeArtifact + :no-members: + +.. autoclass:: pulpcore.plugin.stages.DeclarativeContent + :no-members: + :members: get_or_create_future + + +.. _stages-api: + +Stages API +^^^^^^^^^^ + +.. autofunction:: pulpcore.plugin.stages.create_pipeline + +.. autoclass:: pulpcore.plugin.stages.Stage + :special-members: __call__ + +.. autoclass:: pulpcore.plugin.stages.EndStage + :special-members: __call__ + + +.. _artifact-stages: + +Artifact Related Stages +^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pulpcore.plugin.stages.ArtifactDownloader + +.. autoclass:: pulpcore.plugin.stages.ArtifactSaver + +.. autoclass:: pulpcore.plugin.stages.RemoteArtifactSaver + +.. autoclass:: pulpcore.plugin.stages.QueryExistingArtifacts + + +.. _content-stages: + +Content Related Stages +^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pulpcore.plugin.stages.ContentSaver + :private-members: _pre_save, _post_save + +.. autoclass:: pulpcore.plugin.stages.QueryExistingContents + +.. autoclass:: pulpcore.plugin.stages.ResolveContentFutures + + +.. _content-association-stages: + +Content Association and Unassociation Stages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pulpcore.plugin.stages.RemoveDuplicates + +.. autoclass:: pulpcore.plugin.stages.ContentAssociation + +.. autoclass:: pulpcore.plugin.stages.ContentUnassociation + diff --git a/docs2/api-reference/storage.rst b/docs2/api-reference/storage.rst new file mode 100644 index 0000000000..8dc3c01a58 --- /dev/null +++ b/docs2/api-reference/storage.rst @@ -0,0 +1,6 @@ +.. _storage-docs: + +pulpcore.plugin.storage +======================= + +.. automodule:: pulpcore.plugin.storage \ No newline at end of file diff --git a/docs2/api-reference/tasking.rst b/docs2/api-reference/tasking.rst new file mode 100644 index 0000000000..0f781d0d2d --- /dev/null +++ b/docs2/api-reference/tasking.rst @@ -0,0 +1,7 @@ +pulp.plugin.tasks +================== + +All models documented here should be imported directly from the ``pulpcore.plugin.tasking`` namespace. + +.. automodule:: pulpcore.plugin.tasking + :imported-members: diff --git a/docs2/api-reference/viewsets.rst b/docs2/api-reference/viewsets.rst new file mode 100644 index 0000000000..ed3f03f66d --- /dev/null +++ b/docs2/api-reference/viewsets.rst @@ -0,0 +1,8 @@ +pulp.plugin.viewsets +==================== + +All viewsets documented here should be imported directly from the ``pulpcore.plugin.viewsets`` +namespace. + +.. automodule:: pulpcore.plugin.viewsets + :imported-members: diff --git a/docs2/changes.rst b/docs2/changes.rst new file mode 100644 index 0000000000..6d9bc065dd --- /dev/null +++ b/docs2/changes.rst @@ -0,0 +1,5 @@ +.. _pulpcore-plugin-changes: + +.. include:: ../CHANGES.rst + +.. include:: ../HISTORY.rst diff --git a/docs2/conf.py b/docs2/conf.py new file mode 100644 index 0000000000..7fe59989bb --- /dev/null +++ b/docs2/conf.py @@ -0,0 +1,317 @@ +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import pkg_resources +import sys +import os +from datetime import date + +try: + import sphinx_rtd_theme +except ImportError: + sphinx_rtd_theme = False + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('./extensions')) # noqa + +sys.path.insert(0, os.path.abspath('..')) # noqa + +import pulpcore.plugin + +# Set environment variable so Sphinx can bootstrap the Django app +os.environ["DJANGO_SETTINGS_MODULE"] = "pulpcore.app.settings" + +import django +django.setup() + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.extlinks', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', + 'napoleon_django', 'sphinx.ext.napoleon', 'sphinxcontrib.openapi'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Pulp Plugin API' + +# Set copyright to current year +copyright = u'2012-{0}, Pulp Team'.format(date.today().year) + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = pulpcore.plugin.__version__ +# The full version, including alpha/beta/rc tags. +release = pulpcore.plugin.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# Set autodoc default options +# Document all module/class/etc members, even if they have no docstring. +# Show class inheritance, and group class members together by type (attr, method, etc) +autodoc_default_flags = ['members', 'undoc-members'] +autodoc_member_order = 'groupwise' +autoclass_content = 'both' + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' if sphinx_rtd_theme else 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] if sphinx_rtd_theme else [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'PulpDocs' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'PulpDocs.tex', u'Pulp Documentation', + u'Pulp Team', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('user-guide/admin-client/index', 'pulp-admin', u'Pulp Documentation', [u'Pulp Team'], 1), + ('user-guide/consumer-client/index', 'pulp-consumer', u'Pulp Documentation', [u'Pulp Team'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'PulpDocs', u'Pulp Documentation', + u'Pulp Team', 'PulpDocs', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +extlinks = {'redmine': ('https://pulp.plan.io/issues/%s', '#'), + 'fixedbugs_pulp': ('https://pulp.plan.io/projects/pulp/issues?c%%5B%%5D=tracker&c%%5B%' + '%5D=status&c%%5B%%5D=priority&c%%5B%%5D=cf_5&c%%5B%%5D=subject&c%%' + '5B%%5D=author&c%%5B%%5D=assigned_to&c%%5B%%5D=cf_3&f%%5B%%5D=cf_4&' + 'f%%5B%%5D=tracker_id&f%%5B%%5D=&group_by=&op%%5Bcf_4%%5D=%%3D&op%%' + '5Btracker_id%%5D=%%3D&set_filter=1&sort=priority%%3Adesc%%2Ccf_5%%' + '3Adesc%%2Cid%%3Adesc&utf8=%%E2%%9C%%93&v%%5Bcf_4%%5D%%5B%%5D=%s&v%' + '%5Btracker_id%%5D%%5B%%5D=1', 'bugs fixed in '), + 'fixedbugs_pulp_rpm': ('https://pulp.plan.io/projects/pulp_rpm/issues?c%%5B%%5D=tracke' + 'r&c%%5B%%5D=status&c%%5B%%5D=priority&c%%5B%%5D=cf_5&c%%5B%%5D' + '=subject&c%%5B%%5D=author&c%%5B%%5D=assigned_to&c%%5B%%5D=cf_3' + '&f%%5B%%5D=cf_4&f%%5B%%5D=tracker_id&f%%5B%%5D=&group_by=&op%%' + '5Bcf_4%%5D=%%3D&op%%5Btracker_id%%5D=%%3D&set_filter=1&sort=pr' + 'iority%%3Adesc%%2Ccf_5%%3Adesc%%2Cstatus&utf8=%%E2%%9C%%93&v%%' + '5Bcf_4%%5D%%5B%%5D=%s&v%%5Btracker_id%%5D%%5B%%5D=1', + 'bugs fixed in '), + 'fixedbugs_pulp_puppet': ('https://pulp.plan.io/projects/pulp_puppet/issues?utf8=%%E2%' + '%9C%%93&set_filter=1&f%%5B%%5D=cf_4&op%%5Bcf_4%%5D=%%3D&v%%' + '5Bcf_4%%5D%%5B%%5D=%s&f%%5B%%5D=tracker_id&op%%5Btracker_id' + '%%5D=%%3D&v%%5Btracker_id%%5D%%5B%%5D=1&f%%5B%%5D=&c%%5B%%5' + 'D=tracker&c%%5B%%5D=status&c%%5B%%5D=priority&c%%5B%%5D=cf_' + '5&c%%5B%%5D=subject&c%%5B%%5D=author&c%%5B%%5D=assigned_to&' + 'c%%5B%%5D=cf_3&group_by=', 'bugs fixed in '), + 'fixedbugs_pulp_python': ('https://pulp.plan.io/projects/pulp_python/issues?c%%5B%%5D=' + 'tracker&c%%5B%%5D=status&c%%5B%%5D=priority&c%%5B%%5D=cf_5&' + 'c%%5B%%5D=subject&c%%5B%%5D=author&c%%5B%%5D=assigned_to&c%' + '%5B%%5D=cf_3&f%%5B%%5D=cf_11&f%%5B%%5D=tracker_id&f%%5B%%5D' + '=&group_by=&op%%5Bcf_11%%5D=%%3D&op%%5Btracker_id%%5D=%%3D&' + 'set_filter=1&sort=priority%%3Adesc%%2Ccf_5%%3Adesc%%2Cstatu' + 's&utf8=%%E2%%9C%%93&v%%5Bcf_11%%5D%%5B%%5D=%s&v%%5Btracker_' + 'id%%5D%%5B%%5D=1', 'bugs fixed in '), + 'fixedbugs_pulp_docker': ('https://pulp.plan.io/projects/pulp_docker/issues?utf8=%%E2%' + '%9C%%93&set_filter=1&f%%5B%%5D=cf_12&op%%5Bcf_12%%5D=%%3D&v' + '%%5Bcf_12%%5D%%5B%%5D=%s&f%%5B%%5D=tracker_id&op%%5Btracker' + '_id%%5D=%%3D&v%%5Btracker_id%%5D%%5B%%5D=1&f%%5B%%5D=&c%%5B' + '%%5D=tracker&c%%5B%%5D=status&c%%5B%%5D=priority&c%%5B%%5D=' + 'cf_5&c%%5B%%5D=subject&c%%5B%%5D=author&c%%5B%%5D=assigned_' + 'to&c%%5B%%5D=cf_3&group_by=', 'bugs fixed in '), + 'fixedbugs_pulp_ostree': ('https://pulp.plan.io/projects/pulp_ostree/issues?utf8=%%E2%' + '%9C%%93&set_filter=1&f%%5B%%5D=cf_17&op%%5Bcf_17%%5D=%%3D&v' + '%%5Bcf_17%%5D%%5B%%5D=%s&f%%5B%%5D=tracker_id&op%%5Btracker' + '_id%%5D=%%3D&v%%5Btracker_id%%5D%%5B%%5D=1&f%%5B%%5D=&c%%5B' + '%%5D=tracker&c%%5B%%5D=status&c%%5B%%5D=priority&c%%5B%%5D=' + 'cf_5&c%%5B%%5D=subject&c%%5B%%5D=author&c%%5B%%5D=assigned_' + 'to&c%%5B%%5D=cf_3&group_by=', 'bugs fixed in '),} + +# napoleon uses .. attribute by default, but :ivar: is more succinct and looks better, +# particularly on classes with a lot of attributes, like django models and related objects +napoleon_use_ivar = True + +# set primary domain to python so we don't have to include :py: in xref links +default_domain = 'py' diff --git a/docs2/contributing.rst b/docs2/contributing.rst new file mode 100644 index 0000000000..5ca221434e --- /dev/null +++ b/docs2/contributing.rst @@ -0,0 +1,35 @@ +Contributing +============ + +To contribute to the ``pulpcore-plugin`` package follow this process: + +1. Clone the GitHub repo +2. Make a change +3. Make sure all tests passed +4. Add a file into CHANGES folder (Changelog update). +5. Commit changes to own ``pulpcore-plugin`` clone +6. Make pull request from github page for your clone against master branch + + +.. _changelog-update: + +Changelog update +**************** + +The CHANGES.rst file is managed using the `towncrier tool `_ +and all non trivial changes must be accompanied by a news entry. + +To add an entry to the news file, you first need an issue in pulp.plan.io describing the change you +want to make. Once you have an issue, take its number and create a file inside of the ``CHANGES/`` +directory named after that issue number with an extension of .feature, .bugfix, .doc, .removal, or +.misc. So if your issue is 3543 and it fixes a bug, you would create the file +``CHANGES/3543.bugfix``. + +PRs can span multiple categories by creating multiple files (for instance, if you added a feature +and deprecated an old feature at the same time, you would create CHANGES/NNNN.feature and +CHANGES/NNNN.removal). Likewise if a PR touches multiple issues/PRs you may create a file for each +of them with the exact same contents and Towncrier will deduplicate them. + +The contents of this file are reStructuredText formatted text that will be used as the content of +the news file entry. You do not need to reference the issue or PR numbers here as towncrier will +automatically add a reference to all of the affected issues when rendering the news file. \ No newline at end of file diff --git a/docs2/extensions/napoleon_django/__init__.py b/docs2/extensions/napoleon_django/__init__.py new file mode 100644 index 0000000000..8d8c493895 --- /dev/null +++ b/docs2/extensions/napoleon_django/__init__.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" + napoleon-django + ~~~~~~~~~~~~~~~ + + An extension to sphinx.ext.napoleon's Google-style Docstring + with support for custom django blocks "Fields" and "Relations". + + :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import sphinx +from napoleon_django.docstring import DjangoGoogleDocstring + + +class Config(object): + pass + + +def setup(app): + """Sphinx extension setup function. + + When the extension is loaded, Sphinx imports this module and executes + the ``setup()`` function, which in turn notifies Sphinx of everything + the extension offers. + + Parameters + ---------- + app : sphinx.application.Sphinx + Application object representing the Sphinx process + + See Also + -------- + `The Sphinx documentation on Extensions + `_ + + `The Extension Tutorial `_ + + `The Extension API `_ + + """ + from sphinx.application import Sphinx + if not isinstance(app, Sphinx): + return # probably called by tests + + app.connect('autodoc-process-docstring', _process_docstring) + + return {'version': sphinx.__display_version__, 'parallel_read_safe': True} + + +def _process_docstring(app, what, name, obj, options, lines): + """Process the docstring for a given python object. + + Called when autodoc has read and processed a docstring. `lines` is a list + of docstring lines that `_process_docstring` modifies in place to change + what Sphinx outputs. + + Parameters + ---------- + app : sphinx.application.Sphinx + Application object representing the Sphinx process. + what : str + A string specifying the type of the object to which the docstring + belongs. Valid values: "module", "class", "exception", "function", + "method", "attribute". + name : str + The fully qualified name of the object. + obj : module, class, exception, function, method, or attribute + The object to which the docstring belongs. + options : sphinx.ext.autodoc.Options + The options given to the directive: an object with attributes + inherited_members, undoc_members, show_inheritance and noindex that + are True if the flag option of same name was given to the auto + directive. + lines : list of str + The lines of the docstring, see above. + + .. note:: `lines` is modified *in place* + + """ + result_lines = lines + docstring = DjangoGoogleDocstring(result_lines, app.config, app, what, name, obj, options) + result_lines = docstring.lines() + lines[:] = result_lines[:] diff --git a/docs2/extensions/napoleon_django/docstring.py b/docs2/extensions/napoleon_django/docstring.py new file mode 100644 index 0000000000..85da9628ba --- /dev/null +++ b/docs2/extensions/napoleon_django/docstring.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" + sphinx.ext.napoleon.docstring + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + Classes for docstring parsing and formatting. + + + :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from sphinx.domains.python import PyObject, PyTypedField +from sphinx.ext.napoleon.docstring import GoogleDocstring + +# Extend the python sphinx domain with support for :field: and :relation: directives, +# as well as their related type directives. These then get used by DjangoGoogleDocstring. +# Using the 'data' role for the :field: and :relation: directives prevents sphinx from trying +# cross-reference them. This role is intended to be used at the module level, but renders +# correctly when used in Model definitions and prevents warnings from sphinx about duplicate +# cross-reference targets on something that shouldn't be cross-referenced. +PyObject.doc_field_types.extend([ + PyTypedField('field', label=('Fields'), rolename='data', + names=('field',), typerolename='obj', typenames=('fieldtype',), + can_collapse=True), + PyTypedField('relation', label=('Relations'), rolename='data', + names=('relation',), typerolename='obj', typenames=('reltype',), + can_collapse=True), +]) + +# Similar to the extensions above, but this rewrites the 'variable' type used for class attrs to +# use the data rolename, which prevents sphinx from attempting to cross-reference class attrs. +for field in PyObject.doc_field_types: + if field.name == 'variable': + field.rolename = 'data' + + +class DjangoGoogleDocstring(GoogleDocstring): + """Add support for Django-specific sections to napoleon's GoogleDocstring parser. + + Parameters + ---------- + docstring : str or List[str] + The docstring to parse, given either as a string or split into + individual lines. + config : Optional[sphinx.ext.napoleon.Config or sphinx.config.Config] + The configuration settings to use. If not given, defaults to the + config object on `app`; or if `app` is not given defaults to the + a new `sphinx.ext.napoleon.Config` object. + + See Also + -------- + :class:`sphinx.ext.napoleon.Config` + + Other Parameters + ---------------- + app : Optional[sphinx.application.Sphinx] + Application object representing the Sphinx process. + what : Optional[str] + A string specifying the type of the object to which the docstring + belongs. Valid values: "module", "class", "exception", "function", + "method", "attribute". + name : Optional[str] + The fully qualified name of the object. + obj : module, class, exception, function, method, or attribute + The object to which the docstring belongs. + options : Optional[sphinx.ext.autodoc.Options] + The options given to the directive: an object with attributes + inherited_members, undoc_members, show_inheritance and noindex that + are True if the flag option of same name was given to the auto + directive. + + """ + def __init__(self, docstring, config=None, app=None, what='', name='', + obj=None, options=None): + # super's __init__ calls _parse, so we need to wrap it to make sure the custom + # django-ness is added to the class before _parse runs. Thus, self._initialized. + # See _parse below for how this attr gets used to delay parsing. + self._initialized = False + super().__init__(docstring, config, app, what, name, obj, options) + self._sections.update({ + 'fields': self._parse_fields_section, + 'relations': self._parse_relations_section, + }) + self._initialized = True + self._parse() + + def _parse(self): + if self._initialized: + return super()._parse() + + def _parse_fields_section(self, section): + return self._parse_django_section(section, 'field') + + def _parse_relations_section(self, section): + return self._parse_django_section(section, 'relation') + + def _parse_django_section(self, section, directive): + # a "django" directive is either field or relation. Use the correct type definition + # based on the value of 'directive' to generate a correctly cross-referenced type link. + # directive and typedirective need to match the name and typename of the custom + # PyTypedFields added to the python sphinx domain above. + if directive == 'field': + typedirective = 'fieldtype' + else: + typedirective = 'reltype' + + lines = [] + for _name, _type, _desc in self._consume_fields(): + field = ':%s %s: ' % (directive, _name) + lines.extend(self._format_block(field, _desc)) + if _type: + lines.append(':%s %s: %s' % (typedirective, _name, _type)) + lines.append('') + return lines diff --git a/docs2/index.rst b/docs2/index.rst new file mode 100644 index 0000000000..2f015da6f4 --- /dev/null +++ b/docs2/index.rst @@ -0,0 +1,28 @@ +Plugin API +========== + +The Pulp Plugin API is versioned separately from Pulp Core. It is governed by `semantic +versioning `_. Backwards incompatible changes may be made until the +Plugin API reaches stability with v1.0. + +Plugin Writer's Guide +--------------------- +.. toctree:: + plugin-writer/index + +Plugin Writer's Reference +------------------------- +.. toctree:: + :maxdepth: 1 + + reference/index + +Plugin API Reference +-------------------- +.. toctree:: + api-reference/index + + +.. toctree:: + changes + contributing diff --git a/docs2/plugin-writer/concepts/index.rst b/docs2/plugin-writer/concepts/index.rst new file mode 100644 index 0000000000..ec3d99e6ba --- /dev/null +++ b/docs2/plugin-writer/concepts/index.rst @@ -0,0 +1,115 @@ +.. _plugin-concepts: + +Plugin Concepts +=============== + +Like the Pulp Core itself, all Pulp Plugins are Django Applications, and could be created like any +other Django app with ``django-admin startapp ``. However, instead of writing all of +the boilerplate yourself, it is recommmended that you start your plugin by utilizing the `Plugin +Template `_. This guide will assume that you have used +the plugin_template, but if you are interested in the details of what it provides you, please see +:ref:`plugin-django-application` for more information for how plugins are "discovered" and connected to +the ``pulpcore`` Django app. Additional information is given as inline comments in the template. + + +Plugin API Usage +---------------- +Plugin Applications interact with pulpcore with two high level interfaces, **subclassing** and +adding **tasks**. Additionally, plugins that need to implement dynamic web APIs can +optionally provide their own Django views. See our :ref:`live-apis` page for more information. + + +.. _subclassing-general: + +Subclassing +----------- + +Pulp Core and each plugin utilize `Django `_ and the `Django Rest +Framework `_. Each plugin provides +:ref:`subclassing-models`, :ref:`subclassing-serializers`, and :ref:`subclassing-viewsets`. For +each object that a plugin writer needs to make, the ``pulpcore-plugin`` API provides base classes. +These base classes handle most of the boilerplate code, resulting in CRUD for each object out of +the box. + +.. toctree:: + :maxdepth: 2 + + subclassing/models + subclassing/serializers + subclassing/viewsets + + +.. _writing-tasks: + +Tasks +----- + +Any action that can run for a long time should be an asynchronous task. Plugin writers do not need +to understand the internals of the pulpcore tasking system, workers automatically execute tasks from +RQ, including tasks deployed by plugins. + +**Reservations** + +The tasking system adds a concept called **reservations** which ensures that actions that act on the +same resources are not run at the same time. To ensure data correctness, any action that alters the +content of a repository (thus creating a new version) must be run asynchronously, locking on the +repository and any other models which cannot change during the action. For example, sync tasks must +be asynchronous and lock on the repository and the remote. Publish should lock on the repository +version being published as well as the publisher. + +**Deploying Tasks** + +Tasks are deployed from Views or Viewsets, please see :ref:`kick-off-tasks`. + +.. toctree:: + :maxdepth: 2 + + tasks/add-remove + tasks/publish + tasks/export + + +Sync Pipeline +------------- + +.. toctree:: + :maxdepth: 2 + + sync_pipeline/sync_pipeline + + +Content Protection +------------------ + +Users can configure a ``ContentGuard`` to protect a ``Distribution`` on their own, but some plugins +want to offer built-in content protection features. For example pulp_docker may only want a user to +download docker images they have rights to based on some permissions system pulp_docker could +provide. + +For more information see the :ref:`ContentGuard Usage by Plugin Writers +` documentation. + + +Plugin Settings +--------------- + +Plugins can define settings by creating a ``.app.settings`` module containing settings +as you would define in the Django Settings File itself. ``pulpcore`` ships the actual settings.py +file so settings cannot be added directly as with most Django deployments. Instead as each plugin is +loaded, pulpcore looks for the ``.app.settings`` module and uses ``dynaconf`` to +overlay the settings on top of ``pulpcore``'s settings and user provided settings. + +Settings are parsed in the following order with later settings overwriting earlier ones: + +1. Settings from ``/etc/pulp/settings.py``. +2. Settings from ``pulpcore.app.settings`` (the pulpcore provided settings defaults). +3. Plugin settings from ``.app.settings``. + +In some cases a setting should not overwrite an existing setting, but instead add to it. For +example, consider adding a custom log handler or logger to the `LOGGING `_ +settings. You don't want to fully overwrite it, but instead add or overwrite only a sub-portion. +``dynaconf`` provides the `dynaconf_merge feature `_ which is for merging settings instead of overwriting them. For +example, pulp_ansible makes use of this `here `_. diff --git a/docs2/plugin-writer/concepts/subclassing/models.rst b/docs2/plugin-writer/concepts/subclassing/models.rst new file mode 100644 index 0000000000..3537d5f2c7 --- /dev/null +++ b/docs2/plugin-writer/concepts/subclassing/models.rst @@ -0,0 +1,132 @@ +.. _subclassing-models: + +Models +====== + +For the most part, models provided by plugin writers are just regular `Django Models +`_. + +.. note:: + One slight variation is that the validation is primarily handled in the Django Rest Framework + Serializer. ``.clean()`` is not called. + +Most plugins will implement: + * model(s) for the specific content type(s) used in the plugin, should be subclassed from Content model + * model(s) for the plugin specific remote(s), should be subclassed from Remote model + * model(s) for the plugin specific publisher(s), should be subclassed from Publisher model + + +Adding Model Fields +~~~~~~~~~~~~~~~~~~~ + +Each subclassed Model will typically store attributes that are specific to the content type. These +attributes need to be added to the model as ``fields``. You can use any of Django's field types +for your fields. See the `Django field documentation +`_, for more in-depth information on +using these fields. + +.. note:: + One of Pulp's goals is to work correctly on multiple databases. It is probably best to avoid + fields that are not database agnostic. See Database Gotchas below. + +.. note:: + It is required to declare the ``default_related_name``. + +The TYPE class attribute is used for filtering purposes. + +.. code-block:: python + + class FileContent(Content): + """ + The "file" content type. + + Fields: + digest (str): The SHA256 HEX digest. + """ + TYPE = 'file' + digest = models.TextField(null=False) + + class Meta: + default_related_name = "%(app_label)s_%(model_name)s" + + +Here we create a new field using use Django's ``TextField``. After adding/modifying a model, you +can make and run database migrations with: + + +.. code-block:: bash + + django-admin makemigrations + django-admin migrate + +If you recognize this syntax, it is because django-admin is used with the same interace as ``django +admin``, but has additional commands. + + +Uniqueness +~~~~~~~~~~ + +Model uniqueness (which will also be used as the natural key) is defined by an inner ``class +Meta``. Pulp Core enforces uniqueness constraints at the database level. + +Adding to the simplified ``FileContent`` above: + +.. code-block:: python + + class FileContent(Content): + """ + The "file" content type. + Content of this type represents a single file uniquely + identified by path and SHA256 digest. + Fields: + digest (str): The SHA256 HEX digest. + """ + + TYPE = 'file' + + digest = models.TextField(null=False) + + class Meta: + # Note the comma, this must be a tuple. + unique_together = ('digest',) + default_related_name = "%(app_label)s_%(model_name)s" + +In this example the Content's uniqueness enforced on a single field ``digest``. For a multi-field +uniqueness, simply add other fields. + +.. code-block:: python + + class FileContent(Content): + """ + The "file" content type. + Content of this type represents a single file uniquely + identified by path and SHA256 digest. + Fields: + relative_path (str): The file relative path. + digest (str): The SHA256 HEX digest. + """ + + TYPE = 'file' + + relative_path = models.TextField(null=False) + digest = models.TextField(null=False) + + class Meta: + default_related_name = "%(app_label)s_%(model_name)s" + unique_together = ( + 'relative_path', + 'digest', + ) + + +The example above ensures that content is unique on ``relative_path`` and ``digest`` together. + +ForeignKey Gotchas +~~~~~~~~~~~~~~~~~~ + +The orphan cleanup operation performs mass-deletion of Content units that are not associated with +any repository. Any ForeignKey relationships that refer to Content with a deletion relationship of +``PROTECT`` will cause Orphan cleanup errors like:: + + django.db.models.deletion.ProtectedError: ("Cannot delete some instances of model 'MyContent' + because they are referenced through a protected foreign key: 'MyOtherContent.mycontent'" diff --git a/docs2/plugin-writer/concepts/subclassing/serializers.rst b/docs2/plugin-writer/concepts/subclassing/serializers.rst new file mode 100644 index 0000000000..a3829c66b9 --- /dev/null +++ b/docs2/plugin-writer/concepts/subclassing/serializers.rst @@ -0,0 +1,47 @@ +.. _subclassing-serializers: + +Serializers +=========== + +`Django Rest Framework Serializers `_ +work "both ways", translating user input to Python objects, and translating Python objects to +user-facing responses. Generally, plugins will create a serializer field for each field on their +model that should be user-facing. + +Most plugins will implement: + * serializer(s) for plugin specific content type(s), should be subclassed from one of + NoArtifactContentSerializer, SingleArtifactContentSerializer, or + MultipleArtifactContentSerializer, depending on the properties of the content type(s) + * serializer(s) for plugin specific remote(s), should be subclassed from RemoteSerializer + * serializer(s) for plugin specific publisher(s), should be subclassed from PublisherSerializer + +Adding Fields +------------- + +For each field on the corresponding model that should be readable or writable by the user, the +serializer needs to add that field as well. + + +.. code-block:: python + + class FileContentSerializer(SingleArtifactContentSerializer): + """ + Serializer for File Content. + """ + + relative_path = serializers.CharField( + help_text="Relative location of the file within the repository" + ) + + class Meta: + fields = SingleArtifactContentSerializer.Meta.fields + ('relative_path',) + model = FileContent + +Help Text +^^^^^^^^^ + +The REST APIs of Pulp Core and each plugin are automatically documented using swagger. Each field's +documentation is generated using the ``help_text`` set on the serializer field, so please be sure +to set this for every field. + + diff --git a/docs2/plugin-writer/concepts/subclassing/viewsets.rst b/docs2/plugin-writer/concepts/subclassing/viewsets.rst new file mode 100644 index 0000000000..9006cf64af --- /dev/null +++ b/docs2/plugin-writer/concepts/subclassing/viewsets.rst @@ -0,0 +1,118 @@ +.. _subclassing-viewsets: + +Viewsets +======== + +Each `Django Rest Framework Viewset `_ +is a collection of views that provides ``create``, ``update``, ``retrieve``, ``list``, and +``delete``, which coresponds to http ``POST``, ``PATCH``, ``GET``, ``GET``, ``DELETE``, +respectively. Some base classes will not include all of the views if they are inappropriate. For +instance, the ``ContentViewset`` does not include ``update`` because Content Units are immutable in +Pulp 3 (to support Repository Versions). + +Most Plugins will implement: + * viewset(s) for plugin specific content type(s), should be subclassed from ``ContentViewSet``, + ``ReadOnlyContentViewSet`` or ``SingleArtifactContentUploadViewSet`` + * viewset(s) for plugin specific remote(s), should be subclassed from ``RemoteViewSet`` + * viewset(s) for plugin specific publisher(s), should be subclassed from ``PublisherViewSet`` + + +Endpoint Namespacing +-------------------- + +Automatically, each "Detail" class is namespaced by the ``app_label`` set in the +``PulpPluginAppConfig`` (this is set by the ``plugin_template``). + +For example, a ContentViewSet for ``app_label`` "foobar" like this: + +.. code-block:: python + + class PackageViewSet(ContentViewSet): + endpoint_name = 'packages' + +The above example will create set of CRUD endpoints for Packages at +``pulp/api/v3/content/foobar/packages/`` and +``pulp/api/v3/content/foobar/packages//`` + + +Detail Routes (Extra Endpoints) +------------------------------- + +In addition to the CRUD endpoints, a Viewset can also add a custom endpoint. For example: + + +.. code-block:: python + + class PackageViewSet(ContentViewSet): + endpoint_name = 'packages' + + @decorators.detail_route(methods=('get',)) + def hello(self, request): + return Response("Hey!") + +The above example will create a simple nested endpoint at +``pulp/api/v3/content/foobar/packages/hello/`` + + +.. _kick-off-tasks: + +Kick off Tasks +^^^^^^^^^^^^^^ + +Some endpoints may need to deploy tasks to the tasking system. The following is an example of how +this is accomplished. + +.. code-block:: python + + # We recommend using POST for any endpoints that kick off task. + @detail_route(methods=('post',), serializer_class=RepositorySyncURLSerializer) + # `pk` is a part of the URL + def sync(self, request, pk): + """ + Synchronizes a repository. + The ``repository`` field has to be provided. + """ + remote = self.get_object() + serializer = RepositorySyncURLSerializer(data=request.data, context={'request': request}) + # This is how non-crud validation is accomplished + serializer.is_valid(raise_exception=True) + repository = serializer.validated_data.get('repository') + mirror = serializer.validated_data.get('mirror', False) + + # This is how tasks are kicked off. + result = enqueue_with_reservation( + tasks.synchronize, + [repository, remote], + kwargs={ + 'remote_pk': remote.pk, + 'repository_pk': repository.pk, + 'mirror': mirror + } + ) + # Since tasks are asynchronous, we return a 202 + return OperationPostponedResponse(result, request) + +See :class:`~pulpcore.plugin.tasking.enqueue_with_reservation` for more details. + + +Content Upload ViewSet +^^^^^^^^^^^^^^^^^^^^^^ + +For single file content types, there is the special ``SingleArtifactContentUploadViewSet`` to +derive from, that allows file uploads in the create method, instead of referencing an existing +Artifact. Also it allows to specify a ``Repository``, to create a new ``RepositoryVersion`` +containing the newly created content. Content creation is then offloaded into a task. +To use that ViewSet, the serializer for the content type should inherit from +``SingleArtifactContentUploadSerializer``. By overwriting the ``deferred_validate`` method +instead of ``validate``, this serializer can do detailed analysis of the given or uploaded Artifact +in order to fill database fields of the content type like "name", "version", etc. This part of +validation is only called in the task context. + +If any additional context needs to be passed from the ViewSet to the creation task, the +``get_deferred_context`` method of the ViewSet might be overwritten. It's return value will then be +available as ``self.context`` in the Serializer. + +.. note:: + + Context passed from the ViewSet to the Task must be easily serializable. i.e. one cannot + return the request from ``get_deferred_context``. diff --git a/docs2/plugin-writer/concepts/sync_pipeline/sync_pipeline.rst b/docs2/plugin-writer/concepts/sync_pipeline/sync_pipeline.rst new file mode 100644 index 0000000000..fe27bd6490 --- /dev/null +++ b/docs2/plugin-writer/concepts/sync_pipeline/sync_pipeline.rst @@ -0,0 +1,67 @@ +.. _stages-concept-docs: + +Synchronizing Repositories with the async-Pipeline +================================================== + +To accomplish the steps outlined in :ref:`sync-docs` in an efficient way, pulp provides a high +level api to construct a pipeline of stages. Those stages work in parallel like an assembly line +using pythons `async` feature in combination with the `asyncio` library. Each stage takes +designated content units from an incoming queue of type :class:`asyncio.Queue` and performes an +individual task on them before passing them to the outgoing queue that is connected to the next +stage. + +The anathomy of a stage is that it inherits :class:`pulpcore.plugin.stages.Stage` and overwrites +its asynchronous callback :meth:`run`. +In :meth:`run` it can retrieve incoming declarative content individually via the asynchronous +iterator :meth:`self.items` or in batches via :meth:`self.batches`. +It can pass on declarative content with :meth:`self.put`. + +The sync pipeline is headed by a `first_stage`, that is supposed to download upstream metadata +and iterate over all upstream content references. For each such reference, it creates a +:class:`pulpcore.plugin.stages.DeclarativeContent` that contains a prefilled but unsaved instance +of a subclass of :class:`pulpcore.plugin.content.Content`, as well as a list of +:class:`pulpcore.plugin.stages.DeclarativeArtifact`. The latter combine an unsaved instance of +:class:`pulpcore.plugin.content.Artifact` with a url to retrieve it. +The :class:`pulpcore.plugin.stages.DeclarativeContent` objects, that describe, what a content will +look like when properly downloaded and saved to the database, are passed one by one to the next +pipeline stage. +The responsibility of providing this `first_stage` lies completely in the plugins domain, since +this is the part of the pipeline specific to the repository type. + +The pulp plugin api provides the following stages which also comprise the default pipeline in the +following order: + + 1. :class:`pulpcore.plugin.stages.QueryExistingContents` + 2. :class:`pulpcore.plugin.stages.QueryExistingArtifacts` + 3. :class:`pulpcore.plugin.stages.ArtifactDownloader` + 4. :class:`pulpcore.plugin.stages.ArtifactSaver` + 5. :class:`pulpcore.plugin.stages.ContentSaver` + 6. :class:`pulpcore.plugin.stages.RemoveDuplicates` + 7. :class:`pulpcore.plugin.stages.ResolveContentFutures` + +On-demand synchronizing +----------------------- + +See :ref:`on-demand-support`. + +.. _multi-level-discovery: + +Multiple level discovery +------------------------ + +Plugins like `pulp_deb` and `pulp_docker` use content artifacts to enumerate more content. +To support this pattern, the declarative content allows to be associated with a +:class:`asyncio.Future`, that is resolved when the content reaches the +:class:`pulpcore.plugin.stages.ResolveContentFutures` stage. +By awaiting this Future, one can implement an informational back loop into earlier stages. + +.. warning:: + + In order to prevent deadlocks, be sure that you mark the declarative content with + `does_batch=False`, and that you do not drop it without resolving the future. + +.. hint:: + + If you need downloaded artifacts of this content for further discovery, make sure to + provide `deferred_download=False` to the + :class:`pulpcore.plugin.stages.DeclarativeArtifact`. diff --git a/docs2/plugin-writer/concepts/tasks/add-remove.rst b/docs2/plugin-writer/concepts/tasks/add-remove.rst new file mode 100644 index 0000000000..6bdba9a14c --- /dev/null +++ b/docs2/plugin-writer/concepts/tasks/add-remove.rst @@ -0,0 +1,80 @@ +Adding and Removing Content +=========================== + +For adding and removing content, Pulp 3 provides a layered plugin API. The docs below explain our +lower level API; this information is helpful to understand how a synchronize task works under the +hood. + +Repository Versions +------------------- + +Starting with Pulp 3, repositories are versioned. A new immutable respository version is created +when its set of content units changes + +To facilitate the creation of repository versions a +`pulpcore.plugin.models.RepositoryVersion` context manager is provided. Plugin Writers are +strongly encouraged to use RepositoryVersion as a context manager to provide transactional safety, +working directory setup, and database cleanup after encountering failures. + +.. code-block:: python + + with RepositoryVersion.create(repository) as new_version: + + # add content manually + new_version.add_content(content) + new_version.remove_content(content) + +.. warning:: + + Any action that adds/removes content to a repository *must* create a new RepositoryVersion. + Every action that creates a new RepositoryVersion *must* be asynchronous (defined as a task). + Task reservations are necessary to prevent race conditions. + +.. _sync-docs: + +Synchronizing +------------- + +.. tip:: + + Please consider using the high level :ref:`stages-concept-docs` for actual implementations. + +Most plugins will define a synchronize task, which fetches content from a remote repository, and +adds it to a Pulp repository. + +A typical synchronization task will follow this pattern: + +* Download and analyze repository metadata from a remote source. +* Decide what needs to be added to repository or removed from it. +* Associate already existing content to a repository by creating an instance of + :class:`~pulpcore.plugin.models.RepositoryContent` and saving it. +* Remove :class:`~pulpcore.plugin.models.RepositoryContent` objects which were identified for + removal. +* For every content which should be added to Pulp create but do not save yet: + + * instance of ``ExampleContent`` which will be later associated to a repository. + * instance of :class:`~pulpcore.plugin.models.ContentArtifact` to be able to create relations with + the artifact models. + * instance of :class:`~pulpcore.plugin.models.RemoteArtifact` to store information about artifact + from remote source and to make a relation with :class:`~pulpcore.plugin.models.ContentArtifact` + created before. + +* If a remote content should be downloaded right away (aka ``immediate`` download policy), use + the suggested :ref:`downloading ` solution. If content should be downloaded + later (aka ``on_demand`` or ``background`` download policy), feel free to skip this step. +* Save all artifact and content data in one transaction: + + * in case of downloaded content, create an instance of + :class:`~pulpcore.plugin.models .Artifact`. Set the `file` field to the + absolute path of the downloaded file. Pulp will move the file into place + when the Artifact is saved. The Artifact refers to a downloaded file on a + filesystem and contains calculated checksums for it. + * in case of downloaded content, update the :class:`~pulpcore.plugin.models.ContentArtifact` with + a reference to the created :class:`~pulpcore.plugin.models.Artifact`. + * create and save an instance of the :class:`~pulpcore.plugin.models.RepositoryContent` to + associate the content to a repository. + * save all created artifacts and content: ``ExampleContent``, + :class:`~pulpcore.plugin.models.ContentArtifact`, + :class:`~pulpcore.plugin.models.RemoteArtifact`. + +* Use :class:`~pulpcore.plugin.models.ProgressReport` to report the progress of some steps if needed. diff --git a/docs2/plugin-writer/concepts/tasks/export.rst b/docs2/plugin-writer/concepts/tasks/export.rst new file mode 100644 index 0000000000..db62e5617e --- /dev/null +++ b/docs2/plugin-writer/concepts/tasks/export.rst @@ -0,0 +1,5 @@ +Export Task +=========== + + +STUB diff --git a/docs2/plugin-writer/concepts/tasks/publish.rst b/docs2/plugin-writer/concepts/tasks/publish.rst new file mode 100644 index 0000000000..3630d01cb4 --- /dev/null +++ b/docs2/plugin-writer/concepts/tasks/publish.rst @@ -0,0 +1,28 @@ +Publish +======= + +In order to make content files available to clients, users must publish these files. Typically, +users will publish a repository which will make the content in the repository available. + +When publishing a repository, your plugin needs to mimic the layout of both data and metadata. In +the simplest case for content types that don't have metadata, only the content unit data itself +needs to be published. + +In most cases, both metadata and content unit data are required to make a usable publication. It's +important to understand what the required metadata is for your content type. + +**Using a** :class:`~pulpcore.plugin.models.Publication` **context manager is highly encouraged.** On +context exit, the complete attribute is set True provided that an exception has not been raised. +In the event an exception has been raised, the publication is deleted. + +One of the ways to perform publishing: + +* Find :class:`~pulpcore.plugin.models.ContentArtifact` objects which should be published +* For each of them create and save instance of :class:`~pulpcore.plugin.models.PublishedArtifact` + which refers to :class:`~pulpcore.plugin.models.ContentArtifact` and + :class:`~pulpcore.app.models.Publication` to which this artifact belongs. +* Generate and write to disk repository metadata +* For each of the metadata files create an instance of + :class:`~pulpcore.plugin.models.PublishedMetadata` using `create_from_file` constructor. Each + instance relates a metadata file to a :class:`~pulpcore.app.models.Publication`. +* Use :class:`~pulpcore.plugin.models.ProgressReport` to report progress of some steps if needed. diff --git a/docs2/plugin-writer/custom-installation-tasks.rst b/docs2/plugin-writer/custom-installation-tasks.rst new file mode 100644 index 0000000000..c8a3f36f05 --- /dev/null +++ b/docs2/plugin-writer/custom-installation-tasks.rst @@ -0,0 +1,13 @@ +Custom Installation Tasks +========================= + +If your plugin requires any custom installation steps, we recommend using an +Ansible Role prior to Pulp installation. + +The easiest way to add custom installation tasks is to follow the +`Ansible Galaxy guide `_ +to create a new role with tasks that needs to be done and publish it on Ansible Galaxy. + +Documentation will need to be added to the plugin installation instructions. See the +`RPM Plugin Documentation `_ +as an example. \ No newline at end of file diff --git a/docs2/plugin-writer/index.rst b/docs2/plugin-writer/index.rst new file mode 100644 index 0000000000..99bf2c31e2 --- /dev/null +++ b/docs2/plugin-writer/index.rst @@ -0,0 +1,34 @@ +Plugin Writer's Guide +===================== + +.. note:: + This documentation is for Pulp Plugin developers. For Pulp Core development, see the + `our contributor docs `_. + +Pulp Core does not manage content by itself, but instead relies on plugins to add support for one +content type or another. Examples of content types include RPM packages, Ansible roles, and Docker +images. + +This documentation outlines how to create a Pulp plugin that provides features like: + +* Define a new content type and its attributes +* Download and save the new type of content into Pulp Core +* Publish the new type of content, allowing Pulp Core to serve it at a ``distribution`` +* Export content to remote servers or CDNs +* Add custom web application views +* Implement custom features, e.g. dependency solving, retension/deletion policies, etc. + +Along with this guide, it may be useful to refer to to our simplest plugin, `pulp_file +`_. + +Additionally we provide a `Plugin Template `_ which will +take care of a majority of the boilerplate. + +.. toctree:: + :maxdepth: 2 + + planning-guide + concepts/index + plugin-walkthrough + custom-installation-tasks + diff --git a/docs2/plugin-writer/planning-guide.rst b/docs2/plugin-writer/planning-guide.rst new file mode 100644 index 0000000000..ec89bb17e0 --- /dev/null +++ b/docs2/plugin-writer/planning-guide.rst @@ -0,0 +1,48 @@ +.. _planning-guide: + +Plugin Planning Guide +===================== + +This guide assumes that you are familiar with `general pulp concepts +`_. Usually, the most difficult part +of writing a new plugin is understanding the ecosystem surrounding the content type(s) that you +want to support. + +This page outlines some of the questions a plugin writer should consider while planning and writing +a new plugin. + +What APIs are available from remote repositories? +------------------------------------------------- + +Since remote repositories typically exist to serve content to a client, they usually implement a +web API. It is very helpful to become familiar with this interface in order to understand how +to fetch content into Pulp and subsequently distribute it to the client. + +Some ecosystems have extensive APIs, so it is helpful to understand a general flow to narrow the +research scope. For sychronization, Pulp mimics the behavior of the client, and for +publishing/distributing, Pulp mimics the behavior of the server. + +1. Discover content in a remote repository +2. Retrieve metadata about the content +3. Retrieve files + +What does the metadata look like? +--------------------------------- + +Understanding the structure and content of a content type's metadata is crucial to the design and +function of a plugin. + +**Example:** +When the Docker plugin was in the planning phase, engineers got familiar with the `manifest spec +files `_ to understand how to properly design +the workflow of Docker content management within the plugin. + + +Which data should be modeled as Content Units? +---------------------------------------------- + +Will this data be added to/removed from a repository individually? If yes, this data could be a +Content Unit. + +Should it be possible to add/remove a subset of this data to a repository? If yes, you should +consider managing this as a smaller unit. diff --git a/docs2/plugin-writer/plugin-walkthrough.rst b/docs2/plugin-writer/plugin-walkthrough.rst new file mode 100644 index 0000000000..3ee2bfbc85 --- /dev/null +++ b/docs2/plugin-writer/plugin-walkthrough.rst @@ -0,0 +1,133 @@ +Plugin Walkthrough +================== + +This guide assumes that you are familiar with `general pulp concepts +`_ as well as the :ref:`planning-guide`. +It will be helpful to skim the :ref:`plugin-concepts` pages, and refer back to them as you go +through the process. + +Bootstrap your plugin +--------------------- + +Start your new plugin by using the `Plugin Template `_. +Follow the documentation in the README to get a working stub plugin. + +.. _define-content-type: + +Define your plugin Content type +------------------------------- + +To define a new content type(s), e.g. ``ExampleContent``: + +* :class:`pulpcore.plugin.models.Content` should be subclassed and extended with additional + attributes to the plugin needs, +* define ``TYPE`` class attribute which is used for filtering purposes, +* uniqueness should be specified in ``Meta`` class of newly defined ``ExampleContent`` model, +* ``unique_together`` should be specified for the ``Meta`` class of ``ExampleContent`` model, +* create a serializer for your new Content type as a subclass of + :class:`pulpcore.plugin.serializers.NoArtifactContentSerializer`, + :class:`pulpcore.plugin.serializers.SingleArtifactContentSerializer`, or + :class:`pulpcore.plugin.serializers.MultipleArtifactContentSerializer` +* create a viewset for your new Content type. It can be as a subclass of + :class:`pulpcore.plugin.viewsets.ContentViewSet`, and you can define your ``create()`` method based + on the serializer you chose. If you need a read-only viewset, subclass + :class:`pulpcore.plugin.viewsets.ReadOnlyContentViewSet` instead. It's also convenient to subclass + :class:`pulpcore.plugin.viewsets.SingleArtifactContentUploadViewSet` if you need an upload support. + +:class:`~pulpcore.plugin.models.Content` model should not be used directly anywhere in plugin code. +Only plugin-defined Content classes are expected to be used. + +Check ``pulp_file`` implementation of `the FileContent +`_ and its +`serializer `_ +and `viewset `_. +For a general reference for serializers and viewsets, check `DRF documentation +`_. + +Add any fields that correspond to the metadata of your content, which could be the project name, +the author name, or any other type of metadata. + + +.. _define-remote: + +Define your plugin Remote +------------------------- + +To define a new remote, e.g. ``ExampleRemote``: + +* :class:`pulpcore.plugin.models.Remote` should be subclassed and extended with additional + attributes to the plugin needs, +* define ``TYPE`` class attribute which is used for filtering purposes, +* create a serializer for your new remote as a subclass of + :class:`pulpcore.plugin.serializers.RemoteSerializer`, +* create a viewset for your new remote as a subclass of + :class:`pulpcore.plugin.viewsets.RemoteViewSet`. + +:class:`~pulpcore.plugin.models.Remote` model should not be used directly anywhere in plugin code. +Only plugin-defined Remote classes are expected to be used. + + +There are several important aspects relevant to remote implementation which are briefly mentioned +in the :ref:`object-relationships` section: + +* due to deduplication of :class:`~pulpcore.plugin.models.Content` and + :class:`~pulpcore.plugin.models.Artifact` data, they may already exist and the remote needs to + fetch and use them when they do. +* :class:`~pulpcore.plugin.models.ContentArtifact` associates + :class:`~pulpcore.plugin.models.Content` and :class:`~pulpcore.plugin.models.Artifact`. If + :class:`~pulpcore.plugin.models.Artifact` is not downloaded yet, + :class:`~pulpcore.plugin.models.ContentArtifact` contains ``NULL`` value for + :attr:`~pulpcore.plugin.models.ContentArtifact.artifact`. It should be updated whenever + corresponding :class:`~pulpcore.plugin.models.Artifact` is downloaded + +.. note:: + + Some of these steps may need to behave differently for other download policies. + +The remote implementation suggestion above allows plugin writer to have an understanding and +control at a low level. + +.. _define-publisher: + +Define your plugin Publisher +---------------------------- + +To define a new publisher, e.g. ``ExamplePublisher``: + +* :class:`pulpcore.plugin.models.Publisher` should be subclassed and extended with additional + attributes to the plugin needs, +* define ``TYPE`` class attribute which is used for filtering purposes, +* create a serializer for your new publisher a subclass of + :class:`pulpcore.plugin.serializers.PublisherSerializer`, +* create a viewset for your new publisher as a subclass of + :class:`pulpcore.plugin.viewsets.PublisherViewSet`. + +:class:`~pulpcore.plugin.models.Publisher` model should not be used directly anywhere in plugin +code. Only plugin-defined Publisher classes are expected to be used. + +Check ``pulp_file`` implementation of `the FilePublisher +`_. + + + +Define your Tasks +----------------- + +See :ref:`writing-tasks`. Almost all plugins must implement a `sync` task, most implement a +`publish` task as well. + + +Plugin Completeness Checklist +------------------------------ + + * :ref:`Plugin django app is defined using PulpAppConfig as a parent ` + * :ref:`Plugin entry point is defined ` + * `pulpcore-plugin is specified as a requirement `_ + * Necessary models/serializers/viewsets are :ref:`defined ` and :ref:`discoverable `. At a minimum: + + * models for plugin content type, remote, publisher + * serializers for plugin content type, remote, publisher + * viewset for plugin content type, remote, publisher + + * :ref:`Errors are handled according to Pulp conventions ` + * Docs for plugin are available (any location and format preferred and provided by plugin writer) diff --git a/docs2/reference/content-protection.rst b/docs2/reference/content-protection.rst new file mode 100644 index 0000000000..d20004bd28 --- /dev/null +++ b/docs2/reference/content-protection.rst @@ -0,0 +1,90 @@ +.. _content-protection: + +Content Protection +------------------ + +By default, the Content app will serve all content, but some deployments want to only serve content +to some users and not others. For example pulp_rpm only wants to give rpms to users who have valid +certificates declaring their paid access to content. To allow total customization of how content is +protected, A plugin writer can define a ``ContentGuard``. + + +Defining a ContentGuard +^^^^^^^^^^^^^^^^^^^^^^^ + +The ``ContentGuard`` is a Master/Detail object provided at +``from pulpcore.plugin.models import ContentGuard``, which provides `these base fields `_. + +In your plugin code, subclass ``ContentGuard`` and optionally add additional fields as necessary to +perform the authentication and authorization. As with all Master/Detail objects a ``TYPE`` class +attribute is needed which is then used in the URL. For ``ContentGuard`` detail objects the URL +structure is:: + + ``/pulp/api/v3/contentguards///`` + + +.. note:: + + The `pulp-certguard `_ plugin ships various + ``ContentGuard`` types for users and plugin writers to use together. Plugins can ship their own + content guards too, but look at the existing ones first. + + +Simple Example +^^^^^^^^^^^^^^ + +Here's a trivial example where the client needs to send a header named SECRET_STRING and if its +value matches a recorded value for that ContentGuard instance, give the content to the user. The +secret both authenticates the user and authorizes them for this Content. + +.. code-block:: python + + from django.db import models + from pulpcore.plugin.models import ContentGuard + + class SecretStringContentGuard(ContentGuard): + + TYPE = 'secret_string' + + secret_string = models.FileField(max_length=255) + + def permit(self, request): + """ + + Authorize the specified web request. + + Args: + request (aiohttp.web.Request): A request for a published file. + + Raises: + PermissionError: When the request cannot be authorized. + """ + ca = self.ca_certificate.read() + validator = Validator(ca.decode('utf8')) + validator(request) + + +End-User use of ContentGuard +############################ + +Users create an instance of a ``SecretStringContentGuard`` and give it a secret string with +``httpie``:: + + http POST http://localhost:24817/pulp/api/v3/contentguards//secret_string/ \ + secret_string='2xlSFgJwOhbLrtIlmYszqHQy7ivzdQo9' + + +Then the user can protect one or more Distributions by specifying ``content_guard``. See the +`ContentGuard creation API `_ for more information. + + +.. _plugin-writers-use-content-protection: + +Plugin Writer use of ContentGuard +################################# + +Plugin writers can also programatically create detail ``ContentGuard`` instances and have the +plugin's detail Distribution they define force its use. This allows plugin writers to offer +content protection features to users with fewer user required steps. diff --git a/docs2/reference/error-handling.rst b/docs2/reference/error-handling.rst new file mode 100644 index 0000000000..5c71a4733f --- /dev/null +++ b/docs2/reference/error-handling.rst @@ -0,0 +1,8 @@ +.. _error-handling-basics: + +Error Handling +-------------- + +Please see the `error-handling +`_ section in the +code guidelines. diff --git a/docs2/reference/how-plugins-work.rst b/docs2/reference/how-plugins-work.rst new file mode 100644 index 0000000000..eeda9d349f --- /dev/null +++ b/docs2/reference/how-plugins-work.rst @@ -0,0 +1,74 @@ +How Plugins Work +================ + +.. _plugin-django-application: + +Plugin Django Application +------------------------- + +Like the Pulp Core itself, all Pulp Plugins begin as Django Applications, started like any other +with `django-admin startapp `. However, instead of subclassing Django's +`django.apps.AppConfig` as seen `in the Django documentation +`_, Pulp Plugins +identify themselves as plugins to the Pulp Core by subclassing +:class:`pulpcore.plugin.PulpPluginAppConfig` + +:class:`pulpcore.plugin.PulpPluginAppConfig` also provides the application autoloading behaviors, +such as automatic registration of viewsets with the API router, which adds plugin endpoints. + +The :class:`pulpcore.plugin.PulpPluginAppConfig` subclass for any plugin must set its `name` attribute to the importable +dotted Python location of the plugin application (the Python namespace that contains at least +models and viewsets). Additionally, it should also set its `label` attribute to something that +unambiguously labels which plugin is represented by that subclass. See `how it is done +`_ in ``pulp_file`` plugin. + + +.. _plugin-entry-point: + +pulpcore.plugin Entry Point +--------------------------- + +The Pulp Core discovers available plugins by inspecting the pulpcore.plugin entry point. + +Once a plugin has defined its :class:`pulpcore.plugin.PulpPluginAppConfig` subclass, it should add +a pointer to that subclass using the Django ``default_app_config`` convention, e.g. +``default_app_config = pulp_myplugin.app.MyPulpPluginAppConfig`` somewhere in the module that +contains your Django application. The Pulp Core can then be told to use this value to discover your +plugin, by pointing the pulpcore.plugin entry point at it. If, for example, we set +``default_app_config`` in ``pulp_myplugin/__init__.py``, the setup.py ``entry_points`` would look like +this: + +.. code-block:: python + + entry_points={ + 'pulpcore.plugin': [ + 'pulp_myplugin = pulp_myplugin:default_app_config', + ] + } + +If you do not wish to use Django's ``default_app_config`` convention, the name given to the +``pulpcore.plugin`` entry point must be an importable identifier with a string value containing the +importable dotted path to your plugin's application config class, just as ``default_app_config`` +does. + +Check out ``pulp_file`` plugin: `default_app_config +`_ and `setup.py example +`_. + + +.. _mvs-discovery: + +Model, Serializer, Viewset Discovery +------------------------------------ + +The structure of plugins should, where possible, mimic the layout of the Pulp Core Plugin API. For +example, model classes should be based on platform classes imported from +:mod:`pulpcore.plugin.models` and be defined in the `models` module or directory of a plugin app. +ViewSets should be imported from :mod:`pulpcore.plugin.viewsets`, and be defined in the `viewsets` +module of a plugin app, and so on. + +This matching of module names is required for the Pulp Core to be able to auto-discover plugin +components, particularly for both models and viewsets. + +Take a look at `the structure `_ of +the ``pulp_file`` plugin. diff --git a/docs2/reference/how-to-doc-api.rst b/docs2/reference/how-to-doc-api.rst new file mode 100644 index 0000000000..e337f2d5d1 --- /dev/null +++ b/docs2/reference/how-to-doc-api.rst @@ -0,0 +1,55 @@ +Documenting your API +-------------------- + +Each instance of Pulp hosts dynamically generated REST API documentation located at +`http://pulpserver/pulp/api/v3/docs/`. + +The documentation is generated using `ReDoc `_ based on the +`OpenAPI 2.0 `_ schema +generated by Pulp. The schema generator iterates over all the Views and Viewsets in every plugin +and generates the schema based on the information provided by Viewset doc strings, Viewset method +docstrings, associated Model's names, View docstrings, and the help text from serializers. + +Individual parameters and responses are documented automatically based on the Serializer field type. +A field's description is generated from the "help_text" kwarg when defining serializer fields. + +Response status codes can be generated through the `Meta` class on the serializer: + +.. code-block:: python + + from rest_framework.status import HTTP_400_BAD_REQUEST + + class SnippetSerializerV1(serializers.Serializer): + title = serializers.CharField(required=False, allow_blank=True, max_length=100) + + class Meta: + error_status_codes = { + HTTP_400_BAD_REQUEST: 'Bad Request' + } + + +.. note:: + ref_name - a string that is used as the model definition name for this serializer class. + If this option is not specified, all serializers have an implicit name derived from their + class name. In order to avoid possible collisions, it is better to explicitly define ref_name + on the Meta class. + Suggested format: ref_name = f'{model._meta.app_label}_{model._meta.model_name}' + +.. note:: + + Plugin authors can provide manual overrides using the `@swagger_auto_schema decorator + `_ + +The OpenAPI schema for pulpcore and all installed plugins can be downloaded from the ``pulp-api`` +server: + +.. code-block:: bash + + curl -o api.json http://localhost:24817/pulp/api/v3/docs/api.json + +The OpenAPI schema for a specific plugin can be downloaded by specifying the plugin's module name +as a GET parameter. For example for pulp_rpm only endpoints use a query like this: + +.. code-block:: bash + + curl -o api.json http://localhost:24817/pulp/api/v3/docs/api.json?plugin=pulp_rpm diff --git a/docs2/reference/index.rst b/docs2/reference/index.rst new file mode 100644 index 0000000000..6aca58012d --- /dev/null +++ b/docs2/reference/index.rst @@ -0,0 +1,16 @@ +Plugin Writing Reference Material +================================= + +This section includes in-depth material that is topic specific. + +.. toctree:: + :maxdepth: 2 + + object-relationships + how-plugins-work + error-handling + how-to-doc-api + live-api + on-demand-support + releasing + content-protection diff --git a/docs2/reference/live-api.rst b/docs2/reference/live-api.rst new file mode 100644 index 0000000000..8439c6e83a --- /dev/null +++ b/docs2/reference/live-api.rst @@ -0,0 +1,19 @@ +.. _live-apis: + +Live APIs +--------- + +The Pulp 3 Plugin API allows plugin writers to add web views that can respond to client requests +and facilitate content discovery. Conceptually, this is called a "Live API". Not many content types +require this, but if they do, it's important to understand what the requirements are. + +Typically only published content needs to be discovered, so the "Live API" requirement is thought of +as a publishing requirement. + +.. note:: + Write down any requirements for a webserver to interact with a client to facilitate content + discovery. + + + + diff --git a/docs2/reference/object-relationships.rst b/docs2/reference/object-relationships.rst new file mode 100644 index 0000000000..b87b84ce8b --- /dev/null +++ b/docs2/reference/object-relationships.rst @@ -0,0 +1,55 @@ +.. _object-relationships: + +Object Relationships +==================== + +There are models which are expected to be used in plugin implementation, so understanding what they +are designed for is useful for a plugin writer. Each model below has a link to its documentation +where its purpose, all attributes and relations are listed. + +Here is a gist of how models are related to each other and what each model is responsible for. + +* :class:`~pulpcore.app.models.Repository` contains :class:`~pulpcore.plugin.models.Content`. + :class:`~pulpcore.plugin.models.RepositoryContent` is used to represent this relation. +* :class:`~pulpcore.plugin.models.Content` can have :class:`~pulpcore.plugin.models.Artifact` + associated with it. :class:`~pulpcore.plugin.models.ContentArtifact` is used to represent this + relation. +* :class:`~pulpcore.plugin.models.ContentArtifact` can have + :class:`~pulpcore.plugin.models.RemoteArtifact` associated with it. +* :class:`~pulpcore.plugin.models.Artifact` is a file. +* :class:`~pulpcore.plugin.models.RemoteArtifact` contains information about + :class:`~pulpcore.plugin.models.Artifact` from a remote source, including URL to perform + download later at any point. +* :class:`~pulpcore.plugin.models.Remote` knows specifics of the plugin + :class:`~pulpcore.plugin.models.Content` to put it into Pulp. + :class:`~pulpcore.plugin.models.Remote` defines how to synchronize remote content. Pulp + Platform provides support for concurrent :ref:`downloading ` of remote content. + Plugin writer is encouraged to use one of them but is not required to. +* :class:`~pulpcore.plugin.models.PublishedArtifact` refers to + :class:`~pulpcore.plugin.models.ContentArtifact` which is published and belongs to a certain + :class:`~pulpcore.app.models.Publication`. +* :class:`~pulpcore.plugin.models.PublishedMetadata` is a file generated while publishing and + belongs to a certain :class:`~pulpcore.app.models.Publication`. +* :class:`~pulpcore.plugin.models.Publisher` knows specifics of the plugin + :class:`~pulpcore.plugin.models.Content` to make it available outside of Pulp. + :class:`~pulpcore.plugin.models.Publisher` defines how to publish content available in Pulp. +* :class:`~pulpcore.app.models.Publication` is a result of publish operation of a specific + :class:`~pulpcore.plugin.models.Publisher`. +* :class:`~pulpcore.app.models.Distribution` defines how a publication is distributed for a specific + :class:`~pulpcore.plugin.models.Publisher`. +* :class:`~pulpcore.plugin.models.ProgressReport` is used to report progress of the task. + + +An important feature of the current design is deduplication of +:class:`~pulpcore.plugin.models.Content` and :class:`~pulpcore.plugin.models.Artifact` data. +:class:`~pulpcore.plugin.models.Content` is shared between :class:`~pulpcore.app.models.Repository`, +:class:`~pulpcore.plugin.models.Artifact` is shared between +:class:`~pulpcore.plugin.models.Content`. +See more details on how it affects remote implementation in :ref:`define-remote` section. + + +Check ``pulp_file`` `implementation `_ to see how all +those models are used in practice. +More detailed explanation of model usage with references to ``pulp_file`` code is below. + + diff --git a/docs2/reference/on-demand-support.rst b/docs2/reference/on-demand-support.rst new file mode 100644 index 0000000000..c95fe086a1 --- /dev/null +++ b/docs2/reference/on-demand-support.rst @@ -0,0 +1,114 @@ +.. _on-demand-support: + +On-Demand Support +----------------- + +"On-Demand support" refers to a plugin's ability to support downloading and creating Content but not +downloading their associated Artifacts. By convention, users expect the `Remote.policy` attribute to +determine when Artifacts will be downloaded. See the user docs for specifics on the user +expectations there. + +.. _on-demand-support-with-da: + +Adding Support when using DeclarativeVersion +============================================ + +Plugins like `pulp-file` sync content using `DeclarativeVersion`. +On-demand support can be added by specifying `deferred_download=True` at instantiation of +:class:`pulpcore.plugin.stages.DeclarativeArtifact`. + +`Remote.policy` can take several values. To easily translate them, consider a snippet like this one +taken from `pulp-file`.:: + + async def run(self): + # Interpret download policy + deferred_download = (self.remote.policy != Remote.IMMEDIATE) + <...> + da = DeclarativeArtifact( + artifact=artifact, + url=url, + relative_path=relative_path, + remote=self.remote, + deferred_download=deferred_download, + ) + <...> + +.. hint:: + + The `deferred_download` flag is used at the artifact level, to support on-demand concepts for + plugins that need some artifacts to download immediately in all cases. + See also :ref:`multi-level-discovery`. + + +Adding Support when using a Custom Stages API Pipeline +====================================================== + +Plugins like `pulp-rpm` that sync content using a custom pipeline can enable on-demand support by +excluding the `QueryExistingArtifacts`, `ArtifactDownloader` and `ArtifactSaver` stages. Without +these stages included, no Artifact downloading will occur. Content unit saving will occur, which +will correctly create the on-demand content units. + +`Remote.policy` can take several values. To easily maintain the pipeline consider a snippet like +this one inspired by `pulp-rpm`:: + + download = (remote.policy == Remote.IMMEDIATE) # Interpret policy to download Artifacts or not + stages = [first_stage] + if download: + stages.extend([QueryExistingArtifacts(), ArtifactDownloader(), ArtifactSaver()]) + stages.extend(the_rest_of_the_pipeline) # This adds the Content and Association Stages + +.. warning:: + + Skipping of those Stages does not work with :ref:`multi-level-discovery`. + If you need some artifacts downloaded anyway, follow the example on + :ref:on-demand-support-with-dv` and include the artifact stages in the custom pipeline. + +.. hint:: + + Consider to also exclude the `ResolveContentFutures` stage. + +What if the Custom Pipeline Needs Artifact Downloading? +======================================================= + +For example, `pulp-docker` uses a custom Stages API Pipeline, and relies on Artifact downloading to +download metadata that is saved and stored as a Content unit. This metadata defines more Content +units to be created without downloading their corresponding Artifacts. The on-demand support for +this type needs to download Artifacts for those content types, but not others. + +.. warning:: + TODO: https://pulp.plan.io/issues/4209 + + +How Does This Work at the Model Layer? +====================================== + +The presence of a `RemoteArtifact` is what allows the Pulp content app to fetch and serve that +Artifact on-demand. So a Content unit is on-demand if and only if: + +1. It has a saved Content unit + +2. A `ContentArtifact` for each `Artifact` is saved that the Content unit would have referenced. + Note: the `ContentArtifact` is created in both on-demand and not on-demand cases. + +3. Instead of creating and saving an `Artifact`, a `RemoteArtifact` is created. This contains any + known digest or size information allowing for automatic validation when the `Artifact` is + fetched. + + +How does the Content App work with this Model Layer? +==================================================== + +When a request for content arrives, it is matched against a `Distribution` and eventually against a +specific Artifact path, which actually matches against a `ContentArtifact` not an `Artifact`. If an +`Artifact` exists, it is served to the client. Otherwise a `RemoteArtifact` allows the `Artifact` to +be downloaded on-demand and served to the client. + +If `remote.policy == Remote.ON_DEMAND` the Artifact is saved on the first download. This causes +future requests to serve the already-downloaded and validated Artifact. + +.. note:: + In situations where multiple Remotes synced and provided the same `Content` unit, only one + `Content` unit is created but many `RemoteArtifact` objects may be created. The Pulp Content app + will try all `RemoteArtifact` objects that correspond with a `ContentArtifact`. It's possible an + unexpected `Remote` could be used when fetching that equivalent `Content` unit. Similar warnings + are in the user documentation on on-demand. diff --git a/docs2/reference/releasing.rst b/docs2/reference/releasing.rst new file mode 100644 index 0000000000..398451c1bd --- /dev/null +++ b/docs2/reference/releasing.rst @@ -0,0 +1,10 @@ +Releasing Your Plugin +===================== + +Packaging +--------- + +The Plugin API is available from PyPI as pulpcore-plugin. A plugin writer needs to specify the +minimum version of pulpcore-plugin their plugin is dependent on. A plugin writer does not need to +specify which version of pulpcore would work with their plugin since pulpcore-plugin will +resolve the pulpcore dependency. diff --git a/docs2/static/.gitignore b/docs2/static/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pulpcore/app/views/status.py b/pulpcore/app/views/status.py index aa8498cf72..90b99a4b9d 100644 --- a/pulpcore/app/views/status.py +++ b/pulpcore/app/views/status.py @@ -32,7 +32,7 @@ def get(self, request, format=None): Returns app information including the version of pulpcore and loaded pulp plugins, known workers, database connection status, and messaging connection status """ - components = ['pulpcore', 'pulpcore-plugin'] + INSTALLED_PULP_PLUGINS + components = ['pulpcore'] + INSTALLED_PULP_PLUGINS versions = [{ 'component': component, 'version': get_distribution(component).version diff --git a/pulpcore/plugin/__init__.py b/pulpcore/plugin/__init__.py new file mode 100644 index 0000000000..720c2ef704 --- /dev/null +++ b/pulpcore/plugin/__init__.py @@ -0,0 +1,7 @@ +__version__ = '0.1.0rc8.dev' + +# plugins declare that they are a pulp plugin by subclassing PulpPluginAppConfig +from pulpcore.app.apps import PulpPluginAppConfig # noqa + +# Allow plugin writers to subclass PulpException +from pulpcore.exceptions import PulpException # noqa diff --git a/pulpcore/plugin/content.py b/pulpcore/plugin/content.py new file mode 100644 index 0000000000..66fde4da89 --- /dev/null +++ b/pulpcore/plugin/content.py @@ -0,0 +1,2 @@ +from pulpcore.content import app # noqa +from pulpcore.content.handler import Handler, PathNotResolved # noqa diff --git a/pulpcore/plugin/download/__init__.py b/pulpcore/plugin/download/__init__.py new file mode 100644 index 0000000000..72ca5af280 --- /dev/null +++ b/pulpcore/plugin/download/__init__.py @@ -0,0 +1,8 @@ +from pulpcore.download import ( # noqa + BaseDownloader, + DownloadResult, + DownloaderFactory, + FileDownloader, + http_giveup, + HttpDownloader, +) diff --git a/pulpcore/plugin/exceptions.py b/pulpcore/plugin/exceptions.py new file mode 100644 index 0000000000..55233dce8d --- /dev/null +++ b/pulpcore/plugin/exceptions.py @@ -0,0 +1 @@ +from pulpcore.exceptions import DigestValidationError, PulpException, SizeValidationError # noqa diff --git a/pulpcore/plugin/models/__init__.py b/pulpcore/plugin/models/__init__.py new file mode 100644 index 0000000000..f9afe74cfb --- /dev/null +++ b/pulpcore/plugin/models/__init__.py @@ -0,0 +1,26 @@ +# Models are exposed selectively in the versioned plugin API. +# Any models defined in the pulpcore.plugin namespace should probably be proxy models. + +from pulpcore.app.models import ( # noqa + Artifact, + BaseDistribution, + Content, + ContentArtifact, + ContentGuard, + CreatedResource, + MasterModel, + Model, + ProgressReport, + Publication, + PublicationDistribution, + PublishedArtifact, + PublishedMetadata, + Publisher, + Repository, + Remote, + RemoteArtifact, + RepositoryContent, + RepositoryVersion, + RepositoryVersionDistribution, + Task, +) diff --git a/pulpcore/plugin/serializers/__init__.py b/pulpcore/plugin/serializers/__init__.py new file mode 100644 index 0000000000..70e74796f4 --- /dev/null +++ b/pulpcore/plugin/serializers/__init__.py @@ -0,0 +1,28 @@ +# Import Serializers in platform that are potentially useful to plugin writers +from pulpcore.app.serializers import ( # noqa + ArtifactSerializer, + AsyncOperationResponseSerializer, + BaseDistributionSerializer, + ContentChecksumSerializer, + ContentGuardSerializer, + NoArtifactContentSerializer, + SingleArtifactContentSerializer, + MultipleArtifactContentSerializer, + DetailRelatedField, + IdentityField, + ModelSerializer, + NestedIdentityField, + NestedRelatedField, + RemoteSerializer, + PublicationDistributionSerializer, + PublicationSerializer, + PublisherSerializer, + RelatedField, + RepositorySyncURLSerializer, + RepositoryVersionDistributionSerializer, + SingleContentArtifactField, + relative_path_validator, + validate_unknown_fields, +) + +from .content import SingleArtifactContentUploadSerializer # noqa diff --git a/pulpcore/plugin/serializers/content.py b/pulpcore/plugin/serializers/content.py new file mode 100644 index 0000000000..f2bc5d47e3 --- /dev/null +++ b/pulpcore/plugin/serializers/content.py @@ -0,0 +1,97 @@ +from gettext import gettext as _ + +from logging import getLogger + +from rest_framework.serializers import ( + FileField, + ValidationError, + HyperlinkedRelatedField, +) +from pulpcore.plugin.models import Artifact, Repository, RepositoryVersion +from pulpcore.plugin.serializers import SingleArtifactContentSerializer + + +log = getLogger(__name__) + + +class SingleArtifactContentUploadSerializer(SingleArtifactContentSerializer): + """ + A serializer for content_types with a single Artifact. + + The Artifact can either be specified via it's url, or a new file can be uploaded. + Additionally a repository can be specified, to which the content unit will be added. + + When using this serializer, the creation of the real content must be wrapped in a task that + locks the Artifact and when specified, the repository. + """ + + file = FileField( + help_text=_( + "An uploaded file that should be turned into the artifact of the content unit." + ), + required=False, + write_only=True, + ) + repository = HyperlinkedRelatedField( + help_text=_( + "A URI of a repository the new content unit should be associated with." + ), + required=False, + write_only=True, + queryset=Repository.objects.all(), + view_name="repositories-detail", + ) + + def __init__(self, *args, **kwargs): + """Initializer for SingleArtifactContentUploadSerializer.""" + super().__init__(*args, **kwargs) + if self.fields.get("artifact"): + self.fields["artifact"].required = False + + def validate(self, data): + """Validate that we have an Artifact or can create one.""" + + data = super().validate(data) + + if "file" in data: + if "artifact" in data: + raise ValidationError( + _("Only one of 'file' and 'artifact' may be specified.") + ) + data["artifact"] = Artifact.init_and_validate(data.pop("file")) + elif "artifact" not in data: + raise ValidationError(_("One of 'file' and 'artifact' must be specified.")) + + if "request" not in self.context: + data = self.deferred_validate(data) + + return data + + def deferred_validate(self, data): + """Validate the content unit by deeply analyzing the specified Artifact. + + This is only called when validating without a request context to prevent stalling + an ongoing http request. + It should be overwritten by plugins to extract metadata from the actual content in + much the same way as `validate`. + """ + return data + + def create(self, validated_data): + """Save the GenericContent unit. + This must be used inside a task that locks on the Artifact and if given, the repository. + """ + + repository = validated_data.pop("repository", None) + content = super().create(validated_data) + + if repository: + content_to_add = self.Meta.model.objects.filter(pk=content.pk) + + # create new repo version with uploaded package + with RepositoryVersion.create(repository) as new_version: + new_version.add_content(content_to_add) + return content + + class Meta(SingleArtifactContentSerializer.Meta): + fields = SingleArtifactContentSerializer.Meta.fields + ("file", "repository") diff --git a/pulpcore/plugin/stages/__init__.py b/pulpcore/plugin/stages/__init__.py new file mode 100644 index 0000000000..4ca7038e83 --- /dev/null +++ b/pulpcore/plugin/stages/__init__.py @@ -0,0 +1,16 @@ +from .api import create_pipeline, EndStage, Stage # noqa +from .artifact_stages import ( # noqa + ArtifactDownloader, + ArtifactSaver, + QueryExistingArtifacts, + RemoteArtifactSaver, +) +from .association_stages import ( # noqa + ContentAssociation, + ContentUnassociation, + RemoveDuplicates +) +from .content_stages import ContentSaver, QueryExistingContents, ResolveContentFutures # noqa +from .declarative_version import DeclarativeVersion # noqa +from .models import DeclarativeArtifact, DeclarativeContent # noqa +from .profiler import ProfilingQueue, create_profile_db_and_connection # noqa diff --git a/pulpcore/plugin/stages/api.py b/pulpcore/plugin/stages/api.py new file mode 100644 index 0000000000..12e2b88f19 --- /dev/null +++ b/pulpcore/plugin/stages/api.py @@ -0,0 +1,240 @@ +import asyncio +import logging + +from gettext import gettext as _ + +from django.conf import settings + +from .profiler import ProfilingQueue + + +log = logging.getLogger(__name__) + + +class Stage: + """ + The base class for all Stages API stages. + + To make a stage, inherit from this class and implement :meth:`run` on the subclass. + """ + + def __init__(self): + self._in_q = None + self._out_q = None + + def _connect(self, in_q, out_q): + """ + Connect to queues within a pipeline. + + Args: + in_q (asyncio.Queue): The stage input queue. + out_q (asyncio.Queue): The stage output queue. + """ + self._in_q = in_q + self._out_q = out_q + + async def __call__(self): + """ + This coroutine makes the stage callable. + + It calls :meth:`run` and signals the next stage that its work is finished. + """ + log.debug(_('%(name)s - begin.'), {'name': self}) + await self.run() + await self._out_q.put(None) + log.debug(_('%(name)s - put end-marker.'), {'name': self}) + + async def run(self): + """ + The coroutine that is run as part of this stage. + + Returns: + The coroutine that runs this stage. + + """ + raise NotImplementedError(_('A plugin writer must implement this method')) + + async def items(self): + """ + Asynchronous iterator yielding items of :class:`DeclarativeContent` from `self._in_q`. + + The iterator will get instances of :class:`DeclarativeContent` one by one as they get + available. + + Yields: + An instance of :class:`DeclarativeContent` + + Examples: + Used in stages to get d_content instances one by one from `self._in_q`:: + + class MyStage(Stage): + async def run(self): + async for d_content in self.items(): + # process declarative content + await self.put(d_content) + + """ + while True: + content = await self._in_q.get() + if content is None: + break + log.debug(_('%(name)s - next: %(content)s.'), {'name': self, 'content': content}) + yield content + + async def batches(self, minsize=50): + """ + Asynchronous iterator yielding batches of :class:`DeclarativeContent` from `self._in_q`. + + The iterator will try to get as many instances of + :class:`DeclarativeContent` as possible without blocking, but + at least `minsize` instances. + + Args: + minsize (int): The minimum batch size to yield (unless it is the final batch) + + Yields: + A list of :class:`DeclarativeContent` instances + + Examples: + Used in stages to get large chunks of d_content instances from `self._in_q`:: + + class MyStage(Stage): + async def run(self): + async for batch in self.batches(): + for d_content in batch: + # process declarative content + await self.put(d_content) + + """ + batch = [] + shutdown = False + no_block = False + + def add_to_batch(content): + nonlocal batch + nonlocal shutdown + nonlocal no_block + if content is None: + shutdown = True + log.debug(_('%(name)s - shutdown.'), {'name': self}) + else: + if not content.does_batch: + no_block = True + batch.append(content) + + while not shutdown: + content = await self._in_q.get() + add_to_batch(content) + while not shutdown: + try: + content = self._in_q.get_nowait() + except asyncio.QueueEmpty: + break + else: + add_to_batch(content) + + if batch and (len(batch) >= minsize or shutdown or no_block): + log.debug( + _('%(name)s - next batch[%(length)d].'), + { + 'name': self, + 'length': len(batch), + }) + yield batch + batch = [] + no_block = False + + async def put(self, item): + """ + Coroutine to pass items to the next stage. + + Args: + item: A handled instance of :class:`pulpcore.plugin.stages.DeclarativeContent` + + Raises: + ValueError: When `item` is None. + """ + if item is None: + raise ValueError(_('(None) not permitted.')) + await self._out_q.put(item) + log.debug(_('%(name)s - put: %(content)s'), {'name': self, 'content': item}) + + def __str__(self): + return '[{id}] {name}'.format(id=id(self), name=self.__class__.__name__) + + +async def create_pipeline(stages, maxsize=100): + """ + A coroutine that builds a Stages API linear pipeline from the list `stages` and runs it. + + Each stage is an instance of a class derived from :class:`pulpcore.plugin.stages.Stage` that + implements the :meth:`run` coroutine. This coroutine reads asyncromously either from the + `items()` iterator or the `batches()` iterator and outputs the items with `put()`. Here is an + example of the simplest stage that only passes data. + + >>> class MyStage(Stage): + >>> async def run(self): + >>> async for d_content in self.items(): # Fetch items from the previous stage + >>> await self.put(d_content) # Hand them over to the next stage + + Args: + stages (list of coroutines): A list of Stages API compatible coroutines. + maxsize (int): The maximum amount of items a queue between two stages should hold. Optional + and defaults to 100. + + Returns: + A single coroutine that can be used to run, wait, or cancel the entire pipeline with. + Raises: + ValueError: When a stage instance is specified more than once. + """ + futures = [] + history = set() + in_q = None + for i, stage in enumerate(stages): + if stage in history: + raise ValueError(_('Each stage instance must be unique.')) + history.add(stage) + if i < len(stages) - 1: + if settings.PROFILE_STAGES_API: + out_q = ProfilingQueue.make_and_record_queue(stages[i + 1], i + 1, maxsize) + else: + out_q = asyncio.Queue(maxsize=maxsize) + else: + out_q = None + stage._connect(in_q, out_q) + futures.append(asyncio.ensure_future(stage())) + in_q = out_q + + try: + await asyncio.gather(*futures) + except Exception: + # One of the stages raised an exception, cancel all stages... + pending = [] + for task in futures: + if not task.done(): + task.cancel() + pending.append(task) + # ...and run until all Exceptions show up + if pending: + await asyncio.wait(pending, timeout=60) + raise + + +class EndStage(Stage): + """ + A Stages API stage that drains incoming items and does nothing with the items. This is + required at the end of all pipelines. + + Without this stage, the `maxsize` of the last stage's `_out_q` could fill up and block the + entire pipeline. + """ + + async def __call__(self): + """ + This method drains items from the last queue and drops them. + + Importantly it does not try to put items into the nonexistent next queue. + """ + # We overwrite __call__ here to avoid trying to put None in `self._out_q`. + async for _ in self.items(): # noqa + pass diff --git a/pulpcore/plugin/stages/artifact_stages.py b/pulpcore/plugin/stages/artifact_stages.py new file mode 100644 index 0000000000..b854c8017e --- /dev/null +++ b/pulpcore/plugin/stages/artifact_stages.py @@ -0,0 +1,286 @@ +import asyncio +from gettext import gettext as _ +import logging + +from django.db.models import Q, Prefetch, prefetch_related_objects + +from pulpcore.plugin.models import Artifact, ContentArtifact, ProgressReport, RemoteArtifact + +from .api import Stage + +log = logging.getLogger(__name__) + + +class QueryExistingArtifacts(Stage): + """ + A Stages API stage that replaces :attr:`DeclarativeContent.content` objects with already-saved + :class:`~pulpcore.plugin.models.Artifact` objects. + + This stage expects :class:`~pulpcore.plugin.stages.DeclarativeContent` units from `self._in_q` + and inspects their associated :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects. Each + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object stores one + :class:`~pulpcore.plugin.models.Artifact`. + + This stage inspects any unsaved :class:`~pulpcore.plugin.models.Artifact` objects and searches + using their metadata for existing saved :class:`~pulpcore.plugin.models.Artifact` objects inside + Pulp with the same digest value(s). Any existing :class:`~pulpcore.plugin.models.Artifact` + objects found will replace their unsaved counterpart in the + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object. + + Each :class:`~pulpcore.plugin.stages.DeclarativeContent` is sent to `self._out_q` after all of + its :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects have been handled. + + This stage drains all available items from `self._in_q` and batches everything into one large + call to the db for efficiency. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + all_artifacts_q = Q(pulp_created=None) + for d_content in batch: + for d_artifact in d_content.d_artifacts: + one_artifact_q = d_artifact.artifact.q() + if one_artifact_q: + all_artifacts_q |= one_artifact_q + + for artifact in Artifact.objects.filter(all_artifacts_q): + for d_content in batch: + for d_artifact in d_content.d_artifacts: + for digest_name in artifact.DIGEST_FIELDS: + digest_value = getattr(d_artifact.artifact, digest_name) + if digest_value and digest_value == getattr(artifact, digest_name): + d_artifact.artifact = artifact + break + for d_content in batch: + await self.put(d_content) + + +class ArtifactDownloader(Stage): + """ + A Stages API stage to download :class:`~pulpcore.plugin.models.Artifact` files, but don't save + the :class:`~pulpcore.plugin.models.Artifact` in the db. + + This stage downloads the file for any :class:`~pulpcore.plugin.models.Artifact` objects missing + files and creates a new :class:`~pulpcore.plugin.models.Artifact` object from the downloaded + file and its digest data. The new :class:`~pulpcore.plugin.models.Artifact` is not saved but + added to the :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object, replacing the likely + incomplete :class:`~pulpcore.plugin.models.Artifact`. + + Each :class:`~pulpcore.plugin.stages.DeclarativeContent` is sent to `self._out_q` after all of + its :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects have been handled. + + This stage creates a ProgressReport named 'Downloading Artifacts' that counts the number of + downloads completed. Since it's a stream the total count isn't known until it's finished. + + This stage drains all available items from `self._in_q` and starts as many downloaders as + possible (up to `download_concurrency` set on a Remote) + + Args: + max_concurrent_content (int): The maximum number of + :class:`~pulpcore.plugin.stages.DeclarativeContent` instances to handle simultaneously. + Default is 200. + args: unused positional arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + kwargs: unused keyword arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + """ + + def __init__(self, max_concurrent_content=200, *args, **kwargs): + super().__init__(*args, **kwargs) + self.max_concurrent_content = max_concurrent_content + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + def _add_to_pending(coro): + nonlocal pending + task = asyncio.ensure_future(coro) + pending.add(task) + return task + + #: (set): The set of unfinished tasks. Contains the content + # handler tasks and may contain `content_get_task`. + pending = set() + + content_iterator = self.items() + + #: (:class:`asyncio.Task`): The task that gets new content from `self._in_q`. + # Set to None if stage is shutdown. + content_get_task = _add_to_pending(content_iterator.__anext__()) + + with ProgressReport(message='Downloading Artifacts', code='downloading.artifacts') as pb: + try: + while pending: + done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED) + for task in done: + if task is content_get_task: + try: + _add_to_pending(self._handle_content_unit(task.result())) + except StopAsyncIteration: + # previous stage is finished and we retrieved all + # content instances: shutdown + content_get_task = None + else: + pb.done += task.result() # download_count + pb.save() + + if content_get_task and content_get_task not in pending: # not yet shutdown + if len(pending) < self.max_concurrent_content: + content_get_task = _add_to_pending(content_iterator.__anext__()) + except asyncio.CancelledError: + # asyncio.wait does not cancel its tasks when cancelled, we need to do this + for future in pending: + future.cancel() + raise + + async def _handle_content_unit(self, d_content): + """Handle one content unit. + + Returns: + The number of downloads + """ + downloaders_for_content = [ + d_artifact.download() for d_artifact in d_content.d_artifacts + if d_artifact.artifact._state.adding and + not d_artifact.deferred_download and + not d_artifact.artifact.file + ] + if downloaders_for_content: + await asyncio.gather(*downloaders_for_content) + await self.put(d_content) + return len(downloaders_for_content) + + +class ArtifactSaver(Stage): + """ + A Stages API stage that saves any unsaved :attr:`DeclarativeArtifact.artifact` objects. + + This stage expects :class:`~pulpcore.plugin.stages.DeclarativeContent` units from `self._in_q` + and inspects their associated :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects. Each + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object stores one + :class:`~pulpcore.plugin.models.Artifact`. + + Any unsaved :class:`~pulpcore.plugin.models.Artifact` objects are saved. Each + :class:`~pulpcore.plugin.stages.DeclarativeContent` is sent to `self._out_q` after all of its + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects have been handled. + + This stage drains all available items from `self._in_q` and batches everything into one large + call to the db for efficiency. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + da_to_save = [] + for d_content in batch: + for d_artifact in d_content.d_artifacts: + if d_artifact.artifact._state.adding and not d_artifact.deferred_download: + d_artifact.artifact.file = str(d_artifact.artifact.file) + da_to_save.append(d_artifact) + + if da_to_save: + for d_artifact, artifact in zip(da_to_save, Artifact.objects.bulk_get_or_create( + d_artifact.artifact for d_artifact in da_to_save)): + d_artifact.artifact = artifact + + for d_content in batch: + await self.put(d_content) + + +class RemoteArtifactSaver(Stage): + """ + A Stage that saves :class:`~pulpcore.plugin.models.RemoteArtifact` objects + + An :class:`~pulpcore.plugin.models.RemoteArtifact` object is saved for each + :class:`~pulpcore.plugin.stages.DeclarativeArtifact`. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + RemoteArtifact.objects.bulk_get_or_create(self._needed_remote_artifacts(batch)) + for d_content in batch: + await self.put(d_content) + + def _needed_remote_artifacts(self, batch): + """ + Build a list of only :class:`~pulpcore.plugin.models.RemoteArtifact` that need + to be created for the batch. + + Args: + batch (list): List of :class:`~pulpcore.plugin.stages.DeclarativeContent`. + + Returns: + List: Of :class:`~pulpcore.plugin.models.RemoteArtifact`. + """ + remotes_present = set() + for d_content in batch: + for d_artifact in d_content.d_artifacts: + if d_artifact.remote: + remotes_present.add(d_artifact.remote) + + prefetch_related_objects( + [d_c.content for d_c in batch], + Prefetch( + 'contentartifact_set', + queryset=ContentArtifact.objects.prefetch_related( + Prefetch( + 'remoteartifact_set', + queryset=RemoteArtifact.objects.filter(remote__in=remotes_present), + to_attr='_remote_artifact_saver_ras', + ) + ), + to_attr='_remote_artifact_saver_cas', + ), + ) + needed_ras = [] + for d_content in batch: + for content_artifact in d_content.content._remote_artifact_saver_cas: + for d_artifact in d_content.d_artifacts: + if d_artifact.relative_path == content_artifact.relative_path: + break + else: + msg = _('No declared artifact with relative path "{rp}" for content "{c}"') + raise ValueError(msg.format(rp=content_artifact.relative_path, + c=d_content.content)) + for remote_artifact in content_artifact._remote_artifact_saver_ras: + if remote_artifact.remote_id == d_artifact.remote.pk: + break + else: + if d_artifact.remote: + remote_artifact = self._create_remote_artifact(d_artifact, content_artifact) + needed_ras.append(remote_artifact) + return needed_ras + + @staticmethod + def _create_remote_artifact(d_artifact, content_artifact): + return RemoteArtifact( + url=d_artifact.url, + size=d_artifact.artifact.size, + md5=d_artifact.artifact.md5, + sha1=d_artifact.artifact.sha1, + sha224=d_artifact.artifact.sha224, + sha256=d_artifact.artifact.sha256, + sha384=d_artifact.artifact.sha384, + sha512=d_artifact.artifact.sha512, + content_artifact=content_artifact, + remote=d_artifact.remote, + ) diff --git a/pulpcore/plugin/stages/association_stages.py b/pulpcore/plugin/stages/association_stages.py new file mode 100644 index 0000000000..116c7c071b --- /dev/null +++ b/pulpcore/plugin/stages/association_stages.py @@ -0,0 +1,135 @@ +from django.db.models import Q + +from pulpcore.plugin.models import Content, ProgressReport + +from .api import Stage + + +class ContentAssociation(Stage): + """ + A Stages API stage that associates content units with `new_version`. + + This stage stores all content unit primary keys in memory before running. This is done to + compute the units already associated but not received from `self._in_q`. These units are passed + via `self._out_q` to the next stage as a :class:`django.db.models.query.QuerySet`. + + This stage creates a ProgressReport named 'Associating Content' that counts the number of units + associated. Since it's a stream the total count isn't known until it's finished. + + Args: + new_version (:class:`~pulpcore.plugin.models.RepositoryVersion`): The repo version this + stage associates content with. + args: unused positional arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + kwargs: unused keyword arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + """ + + def __init__(self, new_version, *args, **kwargs): + super().__init__(*args, **kwargs) + self.new_version = new_version + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + with ProgressReport(message='Associating Content', code='associating.content') as pb: + to_delete = set(self.new_version.content.values_list('pk', flat=True)) + async for batch in self.batches(): + to_add = set() + for d_content in batch: + try: + to_delete.remove(d_content.content.pk) + except KeyError: + to_add.add(d_content.content.pk) + + if to_add: + self.new_version.add_content(Content.objects.filter(pk__in=to_add)) + pb.done = pb.done + len(to_add) + pb.save() + + if to_delete: + await self.put(Content.objects.filter(pk__in=to_delete)) + + +class ContentUnassociation(Stage): + """ + A Stages API stage that unassociates content units from `new_version`. + + This stage creates a ProgressReport named 'Un-Associating Content' that counts the number of + units un-associated. Since it's a stream the total count isn't known until it's finished. + + Args: + new_version (:class:`~pulpcore.plugin.models.RepositoryVersion`): The repo version this + stage unassociates content from. + args: unused positional arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + kwargs: unused keyword arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. + """ + + def __init__(self, new_version, *args, **kwargs): + super().__init__(*args, **kwargs) + self.new_version = new_version + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + with ProgressReport(message='Un-Associating Content', code='unassociating.content') as pb: + async for queryset_to_unassociate in self.items(): + self.new_version.remove_content(queryset_to_unassociate) + pb.done = pb.done + queryset_to_unassociate.count() + pb.save() + + await self.put(queryset_to_unassociate) + + +class RemoveDuplicates(Stage): + """ + Stage allows plugins to remove content that would break repository uniqueness constraints. + + This stage is expected to be added by the + :class:`~pulpcore.plugin.stages.DeclarativeVersion`. See that class for example usage. + """ + + def __init__(self, new_version, model, field_names): + """ + Args: + new_version (:class:`~pulpcore.plugin.models.RepositoryVersion`): The repo version this + stage unassociates content from. + model (:class:`pulpcore.plugin.models.Content`): Subclass of a Content model to + indicate which content type to operate on. + field_names (list): List of field names to ensure uniqueness within a repository + version. + """ + self.new_version = new_version + self.model = model + self.field_names = field_names + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + rm_q = Q() + for d_content in batch: + if isinstance(d_content.content, self.model): + unit_q_dict = { + field: getattr(d_content.content, field) for field in self.field_names + } + # Don't remove *this* object if it is already in the repository version. + not_this = ~Q(pk=d_content.content.pk) + dupe = Q(**unit_q_dict) + rm_q |= Q(dupe & not_this) + if rm_q: + queryset_to_unassociate = self.model.objects.filter(rm_q) + self.new_version.remove_content(queryset_to_unassociate) + + for d_content in batch: + await self.put(d_content) diff --git a/pulpcore/plugin/stages/content_stages.py b/pulpcore/plugin/stages/content_stages.py new file mode 100644 index 0000000000..3d3137cd25 --- /dev/null +++ b/pulpcore/plugin/stages/content_stages.py @@ -0,0 +1,184 @@ +from collections import defaultdict + +from django.db import IntegrityError, transaction +from django.db.models import Q + +from pulpcore.plugin.models import ContentArtifact + +from .api import Stage + + +class QueryExistingContents(Stage): + """ + A Stages API stage that saves :attr:`DeclarativeContent.content` objects and saves its related + :class:`~pulpcore.plugin.models.ContentArtifact` objects too. + + This stage expects :class:`~pulpcore.plugin.stages.DeclarativeContent` units from `self._in_q` + and inspects their associated :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects. Each + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object stores one + :class:`~pulpcore.plugin.models.Artifact`. + + This stage inspects any "unsaved" Content unit objects and searches for existing saved Content + units inside Pulp with the same unit key. Any existing Content objects found, replace their + "unsaved" counterpart in the :class:`~pulpcore.plugin.stages.DeclarativeContent` object. + + Each :class:`~pulpcore.plugin.stages.DeclarativeContent` is sent to `self._out_q` after it has + been handled. + + This stage drains all available items from `self._in_q` and batches everything into one large + call to the db for efficiency. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + content_q_by_type = defaultdict(lambda: Q(pulp_created=None)) + for d_content in batch: + if d_content.content._state.adding: + model_type = type(d_content.content) + unit_q = d_content.content.q() + content_q_by_type[model_type] = content_q_by_type[model_type] | unit_q + + for model_type in content_q_by_type.keys(): + for result in model_type.objects.filter(content_q_by_type[model_type]): + for d_content in batch: + if type(d_content.content) is not model_type: + continue + not_same_unit = False + for field in result.natural_key_fields(): + in_memory_digest_value = getattr(d_content.content, field) + if in_memory_digest_value != getattr(result, field): + not_same_unit = True + break + if not_same_unit: + continue + d_content.content = result + for d_content in batch: + await self.put(d_content) + + +class ContentSaver(Stage): + """ + A Stages API stage that saves :attr:`DeclarativeContent.content` objects and saves its related + :class:`~pulpcore.plugin.models.ContentArtifact` objects too. + + This stage expects :class:`~pulpcore.plugin.stages.DeclarativeContent` units from `self._in_q` + and inspects their associated :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects. Each + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` object stores one + :class:`~pulpcore.plugin.models.Artifact`. + + Each "unsaved" Content objects is saved and a :class:`~pulpcore.plugin.models.ContentArtifact` + objects too. + + Each :class:`~pulpcore.plugin.stages.DeclarativeContent` is sent to after it has been handled. + + This stage drains all available items from `self._in_q` and batches everything into one large + call to the db for efficiency. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for batch in self.batches(): + content_artifact_bulk = [] + with transaction.atomic(): + await self._pre_save(batch) + for d_content in batch: + # Are we saving to the database for the first time? + content_already_saved = not d_content.content._state.adding + if not content_already_saved: + try: + with transaction.atomic(): + d_content.content.save() + except IntegrityError: + d_content.content = \ + d_content.content.__class__.objects.get( + d_content.content.q()) + continue + for d_artifact in d_content.d_artifacts: + if not d_artifact.artifact._state.adding: + artifact = d_artifact.artifact + else: + # set to None for on-demand synced artifacts + artifact = None + content_artifact = ContentArtifact( + content=d_content.content, + artifact=artifact, + relative_path=d_artifact.relative_path + ) + content_artifact_bulk.append(content_artifact) + ContentArtifact.objects.bulk_get_or_create(content_artifact_bulk) + await self._post_save(batch) + for declarative_content in batch: + await self.put(declarative_content) + + async def _pre_save(self, batch): + """ + A hook plugin-writers can override to save related objects prior to content unit saving. + + This is run within the same transaction as the content unit saving. + + Args: + batch (list of :class:`~pulpcore.plugin.stages.DeclarativeContent`): The batch of + :class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be saved. + + """ + pass + + async def _post_save(self, batch): + """ + A hook plugin-writers can override to save related objects after content unit saving. + + This is run within the same transaction as the content unit saving. + + Args: + batch (list of :class:`~pulpcore.plugin.stages.DeclarativeContent`): The batch of + :class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be saved. + + """ + pass + + +class ResolveContentFutures(Stage): + """ + This stage resolves the futures in :class:`~pulpcore.plugin.stages.DeclarativeContent`. + + Futures results are set to the found/created :class:`~pulpcore.plugin.models.Content`. + + This is useful when data downloaded from the plugin API needs to be parsed by FirstStage to + create additional :class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be send down + the pipeline. Consider an example where content type `Foo` references additional instances of a + different content type `Bar`. Consider this code in FirstStage:: + + # Create d_content and d_artifact for a `foo_a` + foo_a = DeclarativeContent(..., does_batch=False) + foo_a_future = foo_a.get_or_create_future() # This is awaitable + + ... + + foo_a_content = await foo_a_future # awaits until the foo_a reaches this stage + + This creates a "looping" pattern, of sorts, where downloaded content at the end of the pipeline + can introduce new additional to-be-downloaded content at the beginning of the pipeline. + """ + + async def run(self): + """ + The coroutine for this stage. + + Returns: + The coroutine for this stage. + """ + async for d_content in self.items(): + if d_content.future is not None: + d_content.future.set_result(d_content.content) + await self.put(d_content) diff --git a/pulpcore/plugin/stages/declarative_version.py b/pulpcore/plugin/stages/declarative_version.py new file mode 100644 index 0000000000..2c61e72dfe --- /dev/null +++ b/pulpcore/plugin/stages/declarative_version.py @@ -0,0 +1,169 @@ +import asyncio + +from pulpcore.plugin.models import RepositoryVersion +from pulpcore.plugin.tasking import WorkingDirectory + +from .api import create_pipeline, EndStage +from .artifact_stages import ( + ArtifactDownloader, + ArtifactSaver, + QueryExistingArtifacts, + RemoteArtifactSaver, +) +from .association_stages import ContentAssociation, ContentUnassociation, RemoveDuplicates +from .content_stages import ContentSaver, QueryExistingContents, ResolveContentFutures + + +class DeclarativeVersion: + + def __init__(self, first_stage, repository, mirror=False, remove_duplicates=None): + """ + A pipeline that creates a new :class:`~pulpcore.plugin.models.RepositoryVersion` from a + stream of :class:`~pulpcore.plugin.stages.DeclarativeContent` objects. + + The plugin writer needs to specify a first_stage that will create a + :class:`~pulpcore.plugin.stages.DeclarativeContent` object for each Content unit that should + exist in the new :class:`~pulpcore.plugin.models.RepositoryVersion`. + + The pipeline stages perform the following steps by default: + + 1. Create the new :class:`~pulpcore.plugin.models.RepositoryVersion` + 2. Use the provided `first_stage` to construct + :class:`~pulpcore.plugin.stages.DeclarativeContent` + 3. Query existing artifacts to determine which are already local to Pulp with + :class:`~pulpcore.plugin.stages.QueryExistingArtifacts` + 4. Download any undownloaded :class:`~pulpcore.plugin.models.Artifact` objects with + :class:`~pulpcore.plugin.stages.ArtifactDownloader` + 5. Save the newly downloaded :class:`~pulpcore.plugin.models.Artifact` objects with + :class:`~pulpcore.plugin.stages.ArtifactSaver` + 6. Query for Content units already present in Pulp with + :class:`~pulpcore.plugin.stages.QueryExistingContents` + 7. Save new Content units not yet present in Pulp with + :class:`~pulpcore.plugin.stages.ContentSaver` + 8. Attach :class:`~pulpcore.plugin.models.RemoteArtifact` to the + :class:`~pulpcore.plugin.models.Content` via + :class:`~pulpcore.plugin.stages.RemoteArtifactSaver` + 9. Resolve the attached :class:`~asyncio.Future` of + :class:`~pulpcore.plugin.stages.DeclarativeContent` with + :class:`~pulpcore.plugin.stages.ResolveContentFutures` + 10. Remove duplicate content in the repository version if `remove_duplicates` is given by + :class:`~pulpcore.plugin.stages.RemoveDuplicates` + 11. Associate all content units with the new + :class:`~pulpcore.plugin.models.RepositoryVersion` with + :class:`~pulpcore.plugin.stages.ContentAssociation` + 12. Unassociate any content units not declared in the stream (only when mirror=True) + with :class:`~pulpcore.plugin.stages.ContentUnassociation` + + To do this, the plugin writer should subclass the + :class:`~pulpcore.plugin.stages.Stage` class and define its + :meth:`run()` interface which returns a coroutine. This coroutine should + download metadata, create the corresponding + :class:`~pulpcore.plugin.stages.DeclarativeContent` objects, and put them into the + :class:`asyncio.Queue` via :meth:`put()` to send them down the pipeline. For example: + + >>> class MyFirstStage(Stage): + >>> + >>> def __init__(remote): + >>> self.remote = remote + >>> + >>> async def run(self): + >>> downloader = remote.get_downloader(url=remote.url) + >>> result = await downloader.run() + >>> for entry in read_my_metadata_file_somehow(result.path) + >>> unit = MyContent(entry) # make the content unit in memory-only + >>> artifact = Artifact(entry) # make Artifact in memory-only + >>> da = DeclarativeArtifact(artifact, url, entry.relative_path, self.remote) + >>> dc = DeclarativeContent(content=unit, d_artifacts=[da]) + >>> await self.put(dc) + + To use your first stage with the pipeline you have to instantiate the subclass and pass it + to :class:`~pulpcore.plugin.stages.DeclarativeVersion`. + + 1. Create the instance of the subclassed :class:`~pulpcore.plugin.stages.Stage` object. + 2. Create the :class:`~pulpcore.plugin.stages.DeclarativeVersion` instance, passing the + :class:`~pulpcore.plugin.stages.Stage` subclass instance to it + 3. Call the :meth:`~pulpcore.plugin.stages.DeclarativeVersion.create` method on your + :class:`~pulpcore.plugin.stages.DeclarativeVersion` instance + + Here is an example: + + >>> first_stage = MyFirstStage(remote) + >>> DeclarativeVersion(first_stage, repository).create() + + Example using remove_duplicates: + + # This will enforce that within a repository version, `FileContent.relative_path` is + # unique. + >>> remove_dupes = [{'model': FileContent, 'field_names': ['relative_path']}] + >>> DeclarativeVersion(first_stage, repository, remove_duplicates=remove_dupes).create() + + Args: + first_stage (:class:`~pulpcore.plugin.stages.Stage`): The first stage to receive + :class:`~pulpcore.plugin.stages.DeclarativeContent` from. + repository (:class:`~pulpcore.plugin.models.Repository`): The repository receiving the + new version. + mirror (bool): 'True' removes content units from the + :class:`~pulpcore.plugin.models.RepositoryVersion` that are not + requested in the :class:`~pulpcore.plugin.stages.DeclarativeVersion` stream. + 'False' (additive) only adds content units observed in the + :class:`~pulpcore.plugin.stages.DeclarativeVersion stream`, and does not remove any + pre-existing units in the :class:`~pulpcore.plugin.models.RepositoryVersion`. + 'False' is the default. + remove_duplicates (list): A list of dictionaries that indicate objects which are + considered duplicates within a single repository version. These objects will be + removed from the new version, making room for the new objects passing through the + pipeline. Each dict should have 2 keys, `model`, which is a subclass of + :class:`pulpcore.plugin.models.Content` and `field_names` which is a list of + strings corresponding to fields on the provided model. + + """ + self.first_stage = first_stage + self.repository = repository + self.mirror = mirror + self.remove_duplicates = remove_duplicates or [] + + def pipeline_stages(self, new_version): + """ + Build the list of pipeline stages feeding into the ContentAssociation stage. + + Plugin-writers may override this method to build a custom pipeline. This + can be achieved by returning a list with different stages or by extending + the list returned by this method. + + Args: + new_version (:class:`~pulpcore.plugin.models.RepositoryVersion`): The + new repository version that is going to be built. + + Returns: + list: List of :class:`~pulpcore.plugin.stages.Stage` instances + + """ + pipeline = [ + self.first_stage, + QueryExistingArtifacts(), + ArtifactDownloader(), + ArtifactSaver(), + QueryExistingContents(), + ContentSaver(), + RemoteArtifactSaver(), + ResolveContentFutures(), + ] + for dupe_query_dict in self.remove_duplicates: + pipeline.extend([RemoveDuplicates(new_version, **dupe_query_dict)]) + + return pipeline + + def create(self): + """ + Perform the work. This is the long-blocking call where all syncing occurs. + """ + with WorkingDirectory(): + with RepositoryVersion.create(self.repository) as new_version: + loop = asyncio.get_event_loop() + stages = self.pipeline_stages(new_version) + stages.append(ContentAssociation(new_version)) + if self.mirror: + stages.append(ContentUnassociation(new_version)) + stages.append(EndStage()) + pipeline = create_pipeline(stages) + loop.run_until_complete(pipeline) diff --git a/pulpcore/plugin/stages/models.py b/pulpcore/plugin/stages/models.py new file mode 100644 index 0000000000..057a23dcc8 --- /dev/null +++ b/pulpcore/plugin/stages/models.py @@ -0,0 +1,144 @@ +from gettext import gettext as _ + +import asyncio + +from pulpcore.plugin.models import Artifact + + +class DeclarativeArtifact: + """ + Relates an :class:`~pulpcore.plugin.models.Artifact`, how to download it, and its + `relative_path` used later during publishing. + + This is used by the Stages API stages to determine if an + :class:`~pulpcore.plugin.models.Artifact` is already present and ensure Pulp can download it in + the future. The `artifact` can be either saved or unsaved. If unsaved, the `artifact` attributes + may be incomplete because not all digest information can be computed until the + :class:`~pulpcore.plugin.models.Artifact` is downloaded. + + Attributes: + artifact (:class:`~pulpcore.plugin.models.Artifact`): An + :class:`~pulpcore.plugin.models.Artifact` either saved or unsaved. If unsaved, it + may have partial digest information attached to it. + url (str): the url to fetch the :class:`~pulpcore.plugin.models.Artifact` from. + relative_path (str): the relative_path this :class:`~pulpcore.plugin.models.Artifact` + should be published at for any Publication. + remote (:class:`~pulpcore.plugin.models.Remote`): The remote used to fetch this + :class:`~pulpcore.plugin.models.Artifact`. + extra_data (dict): A dictionary available for additional data to be stored in. + deferred_download (bool): Whether this artifact should be downloaded and saved + in the artifact stages. Defaults to `False`. See :ref:`on-demand-support`. + + Raises: + ValueError: If `artifact`, `url`, or `relative_path` are not specified. If `remote` is not + specified and `artifact` doesn't have a file. + """ + + __slots__ = ('artifact', 'url', 'relative_path', 'remote', + 'extra_data', 'deferred_download') + + def __init__(self, artifact=None, url=None, relative_path=None, remote=None, extra_data=None, + deferred_download=False): + if not url: + raise ValueError(_("DeclarativeArtifact must have a 'url'")) + if not relative_path: + raise ValueError(_("DeclarativeArtifact must have a 'relative_path'")) + if not artifact: + raise ValueError(_("DeclarativeArtifact must have a 'artifact'")) + if not remote and not artifact.file: + raise ValueError(_("DeclarativeArtifact must have a 'remote' if the Artifact doesn't " + "have a file backing it.")) + self.artifact = artifact + self.url = url + self.relative_path = relative_path + self.remote = remote + self.extra_data = extra_data or {} + self.deferred_download = deferred_download + + async def download(self): + """ + Download content and update the associated Artifact. + + Returns: + Returns the :class:`~pulpcore.plugin.download.DownloadResult` of the Artifact. + """ + expected_digests = {} + validation_kwargs = {} + for digest_name in self.artifact.DIGEST_FIELDS: + digest_value = getattr(self.artifact, digest_name) + if digest_value: + expected_digests[digest_name] = digest_value + if expected_digests: + validation_kwargs['expected_digests'] = expected_digests + if self.artifact.size: + expected_size = self.artifact.size + validation_kwargs['expected_size'] = expected_size + downloader = self.remote.get_downloader( + url=self.url, + **validation_kwargs + ) + # Custom downloaders may need extra information to complete the request. + download_result = await downloader.run(extra_data=self.extra_data) + self.artifact = Artifact( + **download_result.artifact_attributes, + file=download_result.path + ) + return download_result + + +class DeclarativeContent: + """ + Relates a Content unit and zero or more :class:`~pulpcore.plugin.stages.DeclarativeArtifact` + objects. + + This is used by the Stages API stages to determine if a Content unit is already present and + ensure all of its associated :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects are + related correctly. The `content` can be either saved or unsaved depending on where in the Stages + API pipeline this is used. + + Attributes: + content (subclass of :class:`~pulpcore.plugin.models.Content`): A Content unit, possibly + unsaved + d_artifacts (list): A list of zero or more + :class:`~pulpcore.plugin.stages.DeclarativeArtifact` objects associated with `content`. + extra_data (dict): A dictionary available for additional data to be stored in. + does_batch (bool): If `False`, prevent batching mechanism to block this item. + Defaults to `True`. + future (:class:`~asyncio.Future`): A future that gets resolved to the + :class:`~pulpcore.plugin.models.Content` in the + :class:`~pulpcore.plugin.stages.ResolveContentFutures` stage. See the + :class:`~pulpcore.plugin.stages.ResolveContentFutures` stage for example usage. + + Raises: + ValueError: If `content` is not specified. + """ + + __slots__ = ('content', 'd_artifacts', 'extra_data', 'does_batch', 'future') + + def __init__(self, content=None, d_artifacts=None, extra_data=None, does_batch=True): + if not content: + raise ValueError(_("DeclarativeContent must have a 'content'")) + self.content = content + self.d_artifacts = d_artifacts or [] + self.extra_data = extra_data or {} + self.does_batch = does_batch + self.future = None + + def get_or_create_future(self): + """ + Return the existing or a new future. + + If you rely on this future in a the course of the pipeline, consider clearing the + `does_batch` attribute to prevent deadlocks. + See the :class:`~pulpcore.plugin.stages.ResolveContentFutures` stage for example usage. + + Returns: + An existing :class:`asyncio.Future` or a newly created one. + """ + if self.future is None: + # If on 3.7, we could preferrably use get_running_loop() + self.future = asyncio.get_event_loop().create_future() + return self.future + + def __str__(self): + return str(self.content.__class__.__name__) diff --git a/pulpcore/plugin/stages/profiler.py b/pulpcore/plugin/stages/profiler.py new file mode 100644 index 0000000000..93ae41d9d1 --- /dev/null +++ b/pulpcore/plugin/stages/profiler.py @@ -0,0 +1,166 @@ +from asyncio import Queue +import pathlib +import time +import uuid + +from rq.job import get_current_job + +from pulpcore.tasking import connection + + +CONN = None + + +class ProfilingQueue(Queue): + """ + A customized subclass of asyncio.Queue that records time in the queue and between queues. + + This Profiler records some data on items that are inserted and removed from Queues. This data is + stored on items in a dictionary attribute called 'extra_data'. If this attribute does not exist + on an item, the ProfileQueue adds it. + + The following statistics are computed for each Queue and the stage that it feeds into: + + * waiting time - The number of seconds an item waited in the Queue for this stage. + * service time - The number of seconds an item received service in this stage. + * queue_length - The number of waiting items in the queue, measured before each new arrival. + * interarrival_time - The number of seconds since the previous arrival to this Queue. + + See the :meth:`create_profile_db_and_connection()` docs for more info on the database tables and + layout. + + Args: + stage_uuid (uuid.UUID): The uuid of the stage this ProfilingQueue delivers work into. + args (tuple): unused positional arguments + kwargs (dict): unused keyword arguments + """ + + def __init__(self, stage_uuid, *args, **kwargs): + self.last_arrival_time = time.time() + self.stage_uuid = stage_uuid + return super().__init__(*args, **kwargs) + + def get_nowait(self): + """ + Thinly wrap `asyncio.get_nowait` and record when get_nowait() operations happen. + """ + item = super().get_nowait() + if item: + now = time.time() + item.extra_data['last_waiting_time'] = now - item.extra_data['lastput_time'] + item.extra_data['last_get_time'] = now + return item + + def put_nowait(self, item): + """ + Thinly wrap `asyncio.put_nowait` happens and write statistics about them to the sqlite3 DB. + + This method computes and writes the following statistics: waiting time, service time, queue + length, and interarrival time. + """ + if item: + now = time.time() + if not hasattr(item, 'extra_data'): + # track stages that use QuerySet items too + item.extra_data = {} + try: + last_waiting_time = item.extra_data['last_waiting_time'] + except KeyError: + pass + else: + service_time = now - item.extra_data['last_get_time'] + sql = "INSERT INTO traffic (uuid, waiting_time, service_time) VALUES (" \ + "'{uuid}','{waiting_time}','{service_time}')" + formatted_sql = sql.format( + uuid=self.stage_uuid, waiting_time=last_waiting_time, service_time=service_time + ) + CONN.cursor().execute(formatted_sql) + + interarrival_time = now - self.last_arrival_time + sql = "INSERT INTO system (uuid, length, interarrival_time) VALUES (" \ + "'{uuid}','{length}','{interarrival}')" + formatted_sql = sql.format( + uuid=self.stage_uuid, length=super().qsize(), interarrival=interarrival_time + ) + CONN.cursor().execute(formatted_sql) + CONN.commit() + + item.extra_data['lastput_time'] = now + self.last_arrival_time = now + return super().put_nowait(item) + + @staticmethod + def make_and_record_queue(stage, num, maxsize): + """ + Create a ProfileQueue that is associated with the stage it feeds and record it in sqlite3. + + Args: + stage (uuid.UUID): The uuid of this stage for correlation with other table data. + num: (int): The number in the pipeline this stage is at, starting from 0, 1, etc. + maxsize: The `maxsize` parameter being used to configure the ProfilingQueue with. + + Returns: + ProfilingQueue: The configured ProfilingQueue that was also recorded in the db. + """ + if CONN is None: + create_profile_db_and_connection() + stage_id = uuid.uuid4() + stage_name = '.'.join([stage.__class__.__module__, stage.__class__.__name__]) + sql = "INSERT INTO stages (uuid, name, num) VALUES (" \ + "'{uuid}','{stage}','{num}')" + formatted_sql = sql.format( + uuid=stage_id, stage=stage_name, num=num) + CONN.cursor().execute(formatted_sql) + in_q = ProfilingQueue(stage_id, maxsize=maxsize) + CONN.commit() + return in_q + + +def create_profile_db_and_connection(): + """ + Create a profile db from this tasks UUID and a sqlite3 connection to that databases. + + The database produced has three tables with the following SQL format: + + The `stages` table stores info about the pipeline itself and stores 3 fields + * uuid - the uuid of the stage + * name - the name of the stage + * num - the number of the stage starting at 0 + + The `traffic` table stores 3 fields: + * uuid - the uuid of the stage this queue feeds into + * waiting_time - the amount of time the item is waiting in the queue before it enters the stage. + * service_time - the service time the item spent in the stage. + + The `system` table stores 3 fields: + * uuid - The uuid of stage this queue feeds into + * length - The length of items in this queue, measured just before each arrival. + * interarrival_time - The amount of time since the last arrival. + """ + debug_data_dir = "/var/lib/pulp/debug/" + pathlib.Path(debug_data_dir).mkdir(parents=True, exist_ok=True) + redis_conn = connection.get_redis_connection() + current_job = get_current_job(connection=redis_conn) + if current_job: + db_path = debug_data_dir + current_job.id + else: + db_path = debug_data_dir + uuid.uuid4() + + import sqlite3 + global CONN + CONN = sqlite3.connect(db_path) + c = CONN.cursor() + + # Create table + c.execute('''CREATE TABLE stages + (uuid varchar(36), name text, num int)''') + + # Create table + c.execute('''CREATE TABLE traffic + (uuid varchar(36), waiting_time real, service_time real)''') + + # Create table + c.execute('''CREATE TABLE system + (uuid varchar(36), length int, interarrival_time real)''') + + return CONN diff --git a/pulpcore/plugin/storage.py b/pulpcore/plugin/storage.py new file mode 100644 index 0000000000..3517d0b503 --- /dev/null +++ b/pulpcore/plugin/storage.py @@ -0,0 +1,28 @@ +import os + +from pulpcore.app.apps import get_plugin_config + +# Include in the API +from pulpcore.app.models.storage import get_tls_path # noqa + + +def get_plugin_storage_path(plugin_app_label): + """ + Returns the path to the plugin's storage + + An interface for finding the path to a plugin's persistent storage location. It is + designed to be used by plugins that need to store more than just + :class:`~pulpcore.plugin.models.Artifact` models. + + Args: + plugin_app_label (str): Django app label of the pulp plugin + + Returns: + String containing the absolute path to the plugin's storage on the filesystem. + + Raises: + :class:`~pulpcore.exceptions.plugin.MissingPlugin`: When plugin with the requested app + label is not installed. + """ + get_plugin_config(plugin_app_label) + return os.path.join('/var/lib/pulp/shared', plugin_app_label, '') diff --git a/pulpcore/plugin/tasking.py b/pulpcore/plugin/tasking.py new file mode 100644 index 0000000000..0c4cda3699 --- /dev/null +++ b/pulpcore/plugin/tasking.py @@ -0,0 +1,5 @@ +# Support plugins dispatching tasks +from pulpcore.tasking.tasks import enqueue_with_reservation # noqa + +# Support plugins working with the working directory. +from pulpcore.tasking.services.storage import WorkingDirectory # noqa diff --git a/pulpcore/plugin/viewsets/__init__.py b/pulpcore/plugin/viewsets/__init__.py new file mode 100644 index 0000000000..87cc57d0d4 --- /dev/null +++ b/pulpcore/plugin/viewsets/__init__.py @@ -0,0 +1,23 @@ +# ALlow plugin viewsets to return 202s +from pulpcore.app.response import OperationPostponedResponse # noqa + +# Import Viewsets in platform that are potentially useful to plugin writers +from pulpcore.app.viewsets import ( # noqa + BaseDistributionViewSet, + BaseFilterSet, + ContentFilter, + ContentGuardFilter, + ContentGuardViewSet, + ContentViewSet, + NamedModelViewSet, + PublicationViewSet, + PublisherViewSet, + ReadOnlyContentViewSet, + RemoteFilter, + RemoteViewSet, +) + +# Import custom filters that are potentially useful to plugin writers +from pulpcore.app.viewsets.custom_filters import CharInFilter, HyperlinkRelatedFilter # noqa + +from .content import SingleArtifactContentUploadViewSet # noqa diff --git a/pulpcore/plugin/viewsets/content.py b/pulpcore/plugin/viewsets/content.py new file mode 100644 index 0000000000..6e7868d4b5 --- /dev/null +++ b/pulpcore/plugin/viewsets/content.py @@ -0,0 +1,70 @@ +from drf_yasg.utils import swagger_auto_schema + +from django.db.utils import IntegrityError + +from pulpcore.app import tasks +from pulpcore.plugin.serializers import ( + ArtifactSerializer, + AsyncOperationResponseSerializer, +) +from pulpcore.plugin.models import Artifact +from pulpcore.plugin.tasking import enqueue_with_reservation +from pulpcore.plugin.viewsets import ( + ContentViewSet, + OperationPostponedResponse, +) + + +class SingleArtifactContentUploadViewSet(ContentViewSet): + """A base ContentViewSet with added upload functionality.""" + + def get_deferred_context(self, request): + """Supply context for deferred validation. + + When overwriting this method, it must return a dict, that is serializable by rq + and does _not_ contain 'request' as a key. + """ + return {} + + @swagger_auto_schema( + operation_description="Trigger an asynchronous task to create content," + "optionally create new repository version.", + responses={202: AsyncOperationResponseSerializer}, + ) + def create(self, request): + """ + Create a content unit. + """ + serializer = self.get_serializer(data=request.data) + serializer.is_valid(raise_exception=True) + artifact = serializer.validated_data["artifact"] + repository = serializer.validated_data.get("repository") + + task_payload = {k: v for k, v in request.data.items()} + if task_payload.pop("file", None): + # in the upload code path make sure, the artifact exists, and the 'file' + # parameter is replaced by 'artifact' + try: + artifact.save() + except IntegrityError: + # if artifact already exists, let's use it + artifact = Artifact.objects.get(sha256=artifact.sha256) + task_payload["artifact"] = ArtifactSerializer( + artifact, context={"request": request} + ).data["pulp_href"] + + shared_resources = [artifact] + if repository: + shared_resources.append(repository) + + app_label = self.queryset.model._meta.app_label + async_result = enqueue_with_reservation( + tasks.base.general_create, + shared_resources, + args=(app_label, serializer.__class__.__name__), + kwargs={ + "data": task_payload, + "context": self.get_deferred_context(request), + }, + ) + return OperationPostponedResponse(async_result, request) diff --git a/pulpcore/stages/__init__.py b/pulpcore/stages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pulpcore/stages/test_artifactdownloader.py b/pulpcore/stages/test_artifactdownloader.py new file mode 100644 index 0000000000..ddd464e3b4 --- /dev/null +++ b/pulpcore/stages/test_artifactdownloader.py @@ -0,0 +1,364 @@ +import asyncio + +import asynctest +from unittest import mock +from uuid import uuid4 + +from pulpcore.plugin.stages import DeclarativeContent, DeclarativeArtifact +from pulpcore.plugin.stages.artifact_stages import ArtifactDownloader + + +class MockException(Exception): + """ + A tracer exception. + """ + pass + + +class DownloaderMock: + """Mock for a Downloader. + + URLs are expected to be the delay to wait to simulate downloading, + e.g `url='5'` will wait for 5 seconds. Negative numbers will raise + an exception after waiting for the absolute value, e.g. `url=-5` fails + after 5 seconds. `DownloaderMock` manages _global_ statistics about the + downloads. + """ + running = 0 + downloads = 0 + canceled = 0 + + def __init__(self, url, **kwargs): + self.url = url + + @classmethod + def reset(cls): + cls.running = 0 + cls.downloads = 0 + cls.canceled = 0 + + async def run(self, extra_data=None): + DownloaderMock.running += 1 + try: + duration = int(self.url) + await asyncio.sleep(abs(duration)) + if duration < 0: + raise MockException("Download Failed") + except asyncio.CancelledError: + DownloaderMock.running -= 1 + DownloaderMock.canceled += 1 + raise + DownloaderMock.running -= 1 + DownloaderMock.downloads += 1 + result = mock.Mock() + result.url = self.url + result.artifact_attributes = {} + return result + + +class TestArtifactDownloader(asynctest.ClockedTestCase): + + def setUp(self): + super().setUp() + DownloaderMock.reset() + self.now = 0 + self.in_q = asyncio.Queue() + self.out_q = asyncio.Queue() + + async def advance_to(self, now): + delta = now - self.now + assert delta >= 0 + await self.advance(delta) + self.now = now + + async def advance(self, delta): + await super().advance(delta) + self.now += delta + + def queue_dc(self, delays=[], artifact_path=None): + """Put a DeclarativeContent instance into `in_q` + + For each `delay` in `delays`, associate a DeclarativeArtifact + with download duration `delay` to the content unit. `delay == + None` means that the artifact is already present (pk is set) + and no download is required. `artifact_path != None` means + that the Artifact already has a file associated with it and a + download does not need to be scheduled. + """ + das = [] + for delay in delays: + artifact = mock.Mock() + artifact.pk = uuid4() + artifact._state.adding = delay is not None + artifact.DIGEST_FIELDS = [] + artifact.file = artifact_path + remote = mock.Mock() + remote.get_downloader = DownloaderMock + das.append(DeclarativeArtifact(artifact=artifact, url=str(delay), + relative_path='path', remote=remote)) + dc = DeclarativeContent(content=mock.Mock(), d_artifacts=das) + self.in_q.put_nowait(dc) + + async def download_task(self, max_concurrent_content=3): + """ + A coroutine running the downloader stage with a mocked ProgressReport. + + Returns: + The done count of the ProgressReport. + """ + with mock.patch('pulpcore.plugin.stages.artifact_stages.ProgressReport') as pb: + pb.return_value.__enter__.return_value.done = 0 + ad = ArtifactDownloader(max_concurrent_content=max_concurrent_content) + ad._connect(self.in_q, self.out_q) + await ad() + return pb.return_value.__enter__.return_value.done + + def assertQueued(self, num): + self.assertEqual(self.in_q.qsize(), num) + + def assertHandled(self, num): + self.assertEqual(self.out_q.qsize(), num) + + async def test_downloads(self): + download_task = self.loop.create_task(self.download_task()) + + # Create 28 content units, every third one must be downloaded. + # The downloads take 0, 3, 6,..., 27 seconds; content units + # 1, 2, 4, 5, ..., 26 do not need downloads. + for i in range(28): + self.queue_dc(delays=[i if not i % 3 else None]) + self.in_q.put_nowait(None) + + # At 0.5 seconds + await self.advance_to(0.5) + # 3, 6 and 9 are running. 0 is finished + self.assertEqual(DownloaderMock.running, 3) + # non-downloads 1, 2, 4, 5, 7, 8 are forwarded + self.assertHandled(7) + # 9 - 26 + None are waiting to be picked up + self.assertQueued(19) + + # Two downloads run in parallel. The most asymmetric way + # to schedule the remaining downloads is: + # 3 + 12 + 21: finished after 36 seconds + # 6 + 15 + 24: finished after 45 seconds + # 9 + 18 + 27: finished after 54 seconds + for t in range(1, 36): # until 35.5 seconds three downloads must run + await self.advance_to(t + 0.5) + self.assertEqual(DownloaderMock.running, 3) + + # At 54.5 seconds, the stage is done at the latest + await self.advance_to(54.5) + self.assertEqual(DownloaderMock.running, 0) + self.assertEqual(DownloaderMock.downloads, 10) + self.assertEqual(download_task.result(), DownloaderMock.downloads) + self.assertQueued(0) + self.assertHandled(29) + + async def test_multi_artifact_downloads(self): + # Content units should fill the slot like + # + # 0 1 2 3 s + # . . . . + # +---+-------+ + # | 1 | 4 | + # +---+---+---+ + # | 2 | + # +-------+ + # | 3 | + # +-------+ + # + download_task = self.loop.create_task(self.download_task()) + self.queue_dc(delays=[]) # must be forwarded to next stage immediately + self.queue_dc(delays=[1]) + self.queue_dc(delays=[2, 2]) + self.queue_dc(delays=[2]) + self.queue_dc(delays=[2, None]) # schedules only one download + self.in_q.put_nowait(None) + # At 0.5 seconds, three content units are downloading with four + # downloads overall + await self.advance_to(0.5) + self.assertEqual(DownloaderMock.running, 4) + self.assertHandled(1) + # At 1.5 seconds, the download for the first content unit has completed. + # At 1 second, the download of the forth content unit is started + await self.advance_to(1.5) + self.assertEqual(DownloaderMock.running, 4) + self.assertHandled(2) + # At 2.5 seconds, the downloads for the second and the third content unit + # have completed + await self.advance_to(2.5) + self.assertEqual(DownloaderMock.running, 1) + self.assertHandled(4) + + # At 3.5 seconds, stage must de done + await self.advance_to(3.5) + self.assertEqual(DownloaderMock.running, 0) + self.assertEqual(DownloaderMock.downloads, 5) + self.assertEqual(download_task.result(), DownloaderMock.downloads) + self.assertQueued(0) + self.assertHandled(6) + + async def test_sparse_batches_dont_block_stage(self): + """Regression test for issue https://pulp.plan.io/issues/4018.""" + + def queue_content_with_a_single_download(batchsize=100, delay=100): + """ + Queue a batch of `batchsize` declarative_content instances. Only the + first one triggers a download of duration `delay`. + """ + self.queue_dc(delays=[delay]) + for i in range(batchsize - 1): + self.queue_dc([None]) + + download_task = self.loop.create_task(self.download_task()) + + queue_content_with_a_single_download() + + # At 0.5 seconds, the first content unit is downloading + await self.advance_to(0.5) + self.assertEqual(DownloaderMock.running, 1) + self.assertHandled(99) + + # at 0.5 seconds next batch arrives (last batch) + queue_content_with_a_single_download() + self.in_q.put_nowait(None) + + # at 1.0 seconds, two downloads are running + await self.advance_to(1) + self.assertEqual(DownloaderMock.running, 2) + self.assertHandled(2 * 99) + + # at 101 seconds, stage should have completed + await self.advance_to(101) + + self.assertEqual(DownloaderMock.running, 0) + self.assertEqual(DownloaderMock.downloads, 2) + self.assertEqual(download_task.result(), DownloaderMock.downloads) + self.assertQueued(0) + self.assertHandled(201) + + async def test_cancel(self): + download_task = self.loop.create_task(self.download_task()) + for i in range(4): + self.queue_dc(delays=[100]) + self.in_q.put_nowait(None) + + # After 0.5 seconds, the three downloads must have started + await self.advance_to(0.5) + self.assertEqual(DownloaderMock.running, 3) + + download_task.cancel() + + await self.advance_to(1.0) + + with self.assertRaises(asyncio.CancelledError): + download_task.result() + self.assertEqual(DownloaderMock.running, 0) + self.assertEqual(DownloaderMock.canceled, 3) + + async def test_exception_with_empty_in_q(self): + download_task = self.loop.create_task(self.download_task()) + + # Create three content units with 1 downloads, followed by one thowing an exception. + self.queue_dc(delays=[1]) + self.queue_dc(delays=[2]) + self.queue_dc(delays=[2]) + self.queue_dc(delays=[-1]) + + # At 0.5 seconds + await self.advance_to(0.5) + # 3 downloads are running. No unit is finished + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(0) + self.assertQueued(1) + + # At 1.5 seconds + await self.advance_to(1.5) + # 3 downloads are running. One unit is finished. + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(1) + self.assertQueued(0) + + # At 2.5 seconds, the exception must have been triggered + await self.advance_to(2.5) + self.assertTrue(download_task.done()) + self.assertIsInstance(download_task.exception(), MockException) + + async def test_exception_finished_in_q(self): + download_task = self.loop.create_task(self.download_task()) + + # Create three content units with 1 downloads, followed by one thowing an exception. + self.queue_dc(delays=[1]) + self.queue_dc(delays=[2]) + self.queue_dc(delays=[2]) + self.queue_dc(delays=[-1]) + await self.in_q.put(None) + + # At 0.5 seconds + await self.advance_to(0.5) + # 3 downloads are running. No unit is finished + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(0) + self.assertQueued(2) + + # At 1.5 seconds + await self.advance_to(1.5) + # 3 downloads are running. One unit is finished. + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(1) + self.assertQueued(1) + + # At 2.5 seconds, the exception must have been triggered + await self.advance_to(2.5) + self.assertTrue(download_task.done()) + self.assertIsInstance(download_task.exception(), MockException) + + async def test_exception_with_saturated_content_slots(self): + download_task = self.loop.create_task(self.download_task()) + + # Create three content units with 1 downloads, followed by one thowing an exception. + self.queue_dc(delays=[1]) + self.queue_dc(delays=[3]) + self.queue_dc(delays=[3]) + self.queue_dc(delays=[-1]) + self.queue_dc(delays=[1]) # This unit will be waiting for a free slot + + # At 0.5 seconds + await self.advance_to(0.5) + # 3 downloads are running. No unit is finished + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(0) + self.assertQueued(2) + + # At 1.5 seconds + await self.advance_to(1.5) + # 3 downloads are running. One unit is finished. + self.assertEqual(DownloaderMock.running, 3) + self.assertHandled(1) + self.assertQueued(1) + + # At 2.5 seconds, the exception must have been triggered + await self.advance_to(2.5) + self.assertTrue(download_task.done()) + self.assertIsInstance(download_task.exception(), MockException) + + async def test_download_artifact_with_file(self): + download_task = self.loop.create_task(self.download_task()) + + # Create 3 downloads with Artifacts that already have a file + self.queue_dc(delays=[1, 2, 3], artifact_path='/foo/bar') + # Create 3 downloads with Artifacts that don't have a file + self.queue_dc(delays=[1, 2, 3], artifact_path=None) + self.in_q.put_nowait(None) + + # At 0.5 seconds only 3 should be running and 0 done + await self.advance_to(0.5) + self.assertEqual(DownloaderMock.downloads, 0) + self.assertEqual(DownloaderMock.running, 3) + + # At 10 seconds all 3 should be done + await self.advance_to(3) + self.assertEqual(DownloaderMock.downloads, 3) + self.assertEqual(DownloaderMock.running, 0) + self.assertEqual(download_task.result(), DownloaderMock.downloads) diff --git a/pulpcore/stages/test_stages.py b/pulpcore/stages/test_stages.py new file mode 100644 index 0000000000..c1705cf42f --- /dev/null +++ b/pulpcore/stages/test_stages.py @@ -0,0 +1,124 @@ +import asyncio + +import asynctest +import mock + +from pulpcore.plugin.stages import Stage, EndStage + + +class TestStage(asynctest.TestCase): + + def setUp(self): + self.in_q = asyncio.Queue() + self.stage = Stage() + self.stage._connect(self.in_q, None) + + async def test_none_only(self): + self.in_q.put_nowait(None) + batch_it = self.stage.batches(minsize=1) + with self.assertRaises(StopAsyncIteration): + await batch_it.__anext__() + + async def test_single_batch_and_none(self): + c1 = mock.Mock(does_batch=True) + c2 = mock.Mock(does_batch=True) + self.in_q.put_nowait(c1) + self.in_q.put_nowait(c2) + self.in_q.put_nowait(None) + batch_it = self.stage.batches(minsize=1) + self.assertEqual([c1, c2], await batch_it.__anext__()) + with self.assertRaises(StopAsyncIteration): + await batch_it.__anext__() + + async def test_batch_and_single_none(self): + c1 = mock.Mock(does_batch=True) + c2 = mock.Mock(does_batch=True) + self.in_q.put_nowait(c1) + self.in_q.put_nowait(c2) + batch_it = self.stage.batches(minsize=1) + self.assertEqual([c1, c2], await batch_it.__anext__()) + self.in_q.put_nowait(None) + with self.assertRaises(StopAsyncIteration): + await batch_it.__anext__() + + async def test_two_batches(self): + c1 = mock.Mock(does_batch=True) + c2 = mock.Mock(does_batch=True) + c3 = mock.Mock(does_batch=True) + c4 = mock.Mock(does_batch=True) + self.in_q.put_nowait(c1) + self.in_q.put_nowait(c2) + batch_it = self.stage.batches(minsize=1) + self.assertEqual([c1, c2], await batch_it.__anext__()) + self.in_q.put_nowait(c3) + self.in_q.put_nowait(c4) + self.in_q.put_nowait(None) + self.assertEqual([c3, c4], await batch_it.__anext__()) + with self.assertRaises(StopAsyncIteration): + await batch_it.__anext__() + + +class TestMultipleStages(asynctest.TestCase): + + class FirstStage(Stage): + def __init__(self, num, minsize, test_case, *args, **kwargs): + super().__init__(*args, **kwargs) + self.num = num + self.minsize = minsize + self.test_case = test_case + + async def run(self): + for i in range(self.num): + await asyncio.sleep(0) # Force reschedule + await self.put(mock.Mock(does_batch=True)) + + class MiddleStage(Stage): + def __init__(self, num, minsize, test_case, *args, **kwargs): + super().__init__(*args, **kwargs) + self.num = num + self.minsize = minsize + self.test_case = test_case + + async def run(self): + async for batch in self.batches(self.minsize): + self.test_case.assertTrue(batch) + self.test_case.assertGreaterEqual(len(batch), min(self.minsize, self.num)) + self.num -= len(batch) + for b in batch: + await self.put(b) + self.test_case.assertEqual(self.num, 0) + + class LastStage(Stage): + def __init__(self, num, minsize, test_case, *args, **kwargs): + super().__init__(*args, **kwargs) + self.num = num + self.minsize = minsize + self.test_case = test_case + + async def run(self): + async for batch in self.batches(self.minsize): + self.test_case.assertTrue(batch) + self.test_case.assertGreaterEqual(len(batch), min(self.minsize, self.num)) + self.num -= len(batch) + self.test_case.assertEqual(self.num, 0) + + async def test_batch_queue_and_min_sizes(self): + """Test batches iterator in a small stages setting with various sizes""" + for num in range(10): + for minsize in range(1, 5): + for qsize in range(1, num + 1): + queues = [asyncio.Queue(maxsize=qsize) for i in range(3)] + first_stage = self.FirstStage(num, minsize, self) + middle_stage = self.MiddleStage(num, minsize, self) + last_stage = self.LastStage(num, minsize, self) + end_stage = EndStage() + first_stage._connect(None, queues[0]) + middle_stage._connect(queues[0], queues[1]) + last_stage._connect(queues[1], queues[2]) + end_stage._connect(queues[2], None) + await asyncio.gather( + last_stage(), + middle_stage(), + first_stage(), + end_stage(), + ) diff --git a/pulpcore/tests/unit/models/test_content.py b/pulpcore/tests/unit/models/test_content.py new file mode 100644 index 0000000000..8cbd1ccc4d --- /dev/null +++ b/pulpcore/tests/unit/models/test_content.py @@ -0,0 +1,43 @@ +import os +import tempfile + +from django.test import TestCase +from pulpcore.plugin.models import Artifact, Content, ContentArtifact + + +class ContentCRUDTestCase(TestCase): + + artifact01_path = os.path.join(tempfile.gettempdir(), 'artifact01-tmp') + artifact02_path = os.path.join(tempfile.gettempdir(), 'artifact02-tmp') + + def setUp(self): + with open(self.artifact01_path, 'w') as f: + f.write('Temp Artifact File 01') + with open(self.artifact02_path, 'w') as f: + f.write('Temp Artifact File 02') + self.artifact01 = Artifact.init_and_validate(self.artifact01_path) + self.artifact01.save() + self.artifact02 = Artifact.init_and_validate(self.artifact02_path) + self.artifact02.save() + + def test_create_and_read_content(self): + content = Content.objects.create() + content.save() + content_artifact = ContentArtifact.objects.create( + artifact=self.artifact01, + content=content, + relative_path=self.artifact01.file.path) + content_artifact.save() + self.assertTrue( + Content.objects.filter(pk=content.pk).exists() + and ContentArtifact.objects.get( + pk=content_artifact.pk + ).content.pk == Content.objects.get(pk=content.pk).pk + ) + + def test_remove_content(self): + content = Content.objects.create() + content.save() + # Assumes creation is tested by test_create_and_read_content function + Content.objects.filter(pk=content.pk).delete() + self.assertFalse(Content.objects.filter(pk=content.pk).exists()) diff --git a/pulpcore/tests/unit/models/test_repo_version.py b/pulpcore/tests/unit/models/test_repo_version.py new file mode 100644 index 0000000000..887b021e1f --- /dev/null +++ b/pulpcore/tests/unit/models/test_repo_version.py @@ -0,0 +1,47 @@ +from unittest.mock import patch +from django.core.files.uploadedfile import SimpleUploadedFile + +from django.test import TestCase +from pulpcore.plugin.models import Artifact, Content, ContentArtifact, Repository, RepositoryVersion +from pulpcore.app.models.task import Task + + +class PublisherCRUDTestCase(TestCase): + + def setUp(self): + artifact = Artifact.objects.create( + md5="ec0df26316b1deb465d2d18af7b600f5", + sha1="cf6121b0425c2f2e3a2fcfe6f402d59730eb5661", + sha224="9a6297eb28d91fad5277c0833856031d0e940432ad807658bd2b60f4", + sha256="c8ddb3dcf8da48278d57b0b94486832c66a8835316ccf7ca39e143cbfeb9184f", + sha384=("53a8a0cebcb7780ed7624790c9d9a4d09ba74b47270d397f5ed7bc1c46777a0f" + "be362aaf2bbe7f0966a350a12d76e28d"), + # noqa + sha512=("a94a65f19b864d184a2a5e07fa29766f08c6d49b6f624b3dd3a36a9826" + "7b9137d9c35040b3e105448a869c23c2aec04c9e064e3555295c1b8de6515eed4da27d"), + # noqa + size=1024, + file=SimpleUploadedFile('test_filename', b'test content') + ) + artifact.save() + self.content = Content.objects.create() + self.content.save() + self.content_artifact = ContentArtifact.objects.create(artifact=artifact, + content=self.content, + relative_path=artifact.file.path) + self.content_artifact.save() + self.repository = Repository.objects.create(name='foo') + self.repository.save() + self.task = Task.objects.create(state='Completed', name='test-task') + self.task.save() + + @patch('pulpcore.app.models.task.get_current_job') + def test_create_repository_version(self, mock_task): + mock_task.return_value.id = self.task.pk + with RepositoryVersion.create(self.repository) as new_version: + new_version.add_content(Content.objects.filter(pk=self.content.pk)) + self.assertTrue(RepositoryVersion.objects.filter().exists()) + + def test_remove_repository_version(self): + RepositoryVersion.objects.filter().delete() + self.assertFalse(RepositoryVersion.objects.filter().exists())