Skip to content

Commit

Permalink
Merge fcc8bfd into 6d3b6a4
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Dec 31, 2019
2 parents 6d3b6a4 + fcc8bfd commit 61cd7e4
Show file tree
Hide file tree
Showing 43 changed files with 96,350 additions and 982 deletions.
21 changes: 21 additions & 0 deletions docs/changelog.rst
@@ -1,6 +1,27 @@
Changelog
=========

0.2.0 (Unreleased)
------------------

Changed
~~~~~~~

CLI:

- ``compile`` accepts either release packages or individual releases
- ``compile`` is memory efficient if given a long list of inputs

Library:

- Rename ``compile_release_packages`` to ``merge``
- Add ``packager`` module with ``Packager`` class

Fixed
~~~~~

- ``--linked-releases`` no longer uses the same linked releases for all records

0.1.5 (2019-12-18)
------------------

Expand Down
4 changes: 4 additions & 0 deletions docs/cli/examples.rst
Expand Up @@ -127,4 +127,8 @@ Add newlines to ends of files (Fish shell)::

for i in *.json; echo >> $i; end

Read line 10,000 of a file::

sed -n '10000 p' < filename.json

On Windows, you may need to install `Cygwin <http://cygwin.com>`__ to use some command-line tools. PowerShell has `some corresponding tools <http://xahlee.info/powershell/PowerShell_for_unixer.html>`__.
10 changes: 5 additions & 5 deletions docs/cli/ocds.rst
Expand Up @@ -31,15 +31,13 @@ Reads OCDS files, and reports whether each is:
compile
-------

Reads release packages from standard input, merges the releases by OCID, and prints the compiled releases.

If ``--package`` is set, and if the ``--publisher-*`` options aren't used, the output package will have the same publisher as the last input package.
Reads release packages and individual releases from standard input, merges the releases by OCID, and prints the compiled releases.

Optional arguments:

* ``--schema SCHEMA`` the URL or path of the release schema to use
* ``--package`` wrap the compiled releases in a record package
* ``--linked-releases`` if ``--package`` is set, use linked releases instead of full releases
* ``--linked-releases`` if ``--package`` is set, use linked releases instead of full releases, if the input is a release package
* ``--versioned`` if ``--package`` is set, include versioned releases in the record package; otherwise, print versioned releases instead of compiled releases
* ``--uri URI`` if ``--package`` is set, set the record package's ``uri`` to this value
* ``--published-date PUBLISHED_DATE`` if ``--package`` is set, set the record package's ``publishedDate`` to this value
Expand All @@ -49,11 +47,13 @@ Optional arguments:
* ``--publisher-uid PUBLISHER_UID`` if ``--package`` is set, set the record package's ``publisher``'s ``uid`` to this value
* ``--fake`` if ``--package`` is set, set the record package's required metadata to dummy values

If ``--package`` is set, and if the ``--publisher-*`` options aren't used, the output package will have the same publisher as the last input package.

::

cat tests/fixtures/realdata/release-package-1.json | ocdskit compile > out.json

For the Python API, see :meth:`ocdskit.combine.compile_release_packages`.
For the Python API, see :meth:`ocdskit.combine.merge`.

upgrade
-------
Expand Down
16 changes: 11 additions & 5 deletions ocdskit/cli/commands/compile.py
@@ -1,16 +1,18 @@
import logging
import sys

import ocdskit.packager
from ocdskit.cli.commands.base import OCDSCommand
from ocdskit.combine import compile_release_packages
from ocdskit.combine import merge
from ocdskit.exceptions import CommandError, InconsistentVersionError

logger = logging.getLogger('ocdskit')


class Command(OCDSCommand):
name = 'compile'
help = 'reads release packages from standard input, merges the releases by OCID, and prints the compiled releases'
help = 'reads release packages and individual releases from standard input, merges the releases by OCID, and ' \
'prints the compiled releases'

def add_arguments(self):
self.add_argument('--schema', help='the URL or path of the release schema to use')
Expand All @@ -30,13 +32,17 @@ def handle(self):
kwargs['use_linked_releases'] = self.args.linked_releases
kwargs['return_versioned_release'] = self.args.versioned

if not ocdskit.packager.using_sqlite:
logger.warning('sqlite3 is unavailable, so the command will run in memory. If input files are too large, '
'the command might exceed available memory.')

try:
for output in compile_release_packages(self.items(), **kwargs):
for output in merge(self.items(), **kwargs):
self.print(output)
except InconsistentVersionError as e:
versions = [e.earlier_version, e.current_version]
if versions[1] < versions[0]:
versions.reverse()

raise CommandError('{}\nTry first upgrading packages to the same version:\n cat file [file ...] | ocdskit'
' upgrade {}:{} | ocdskit compile {}'.format(str(e), *versions, ' '.join(sys.argv[2:])))
raise CommandError('{}\nTry first upgrading items to the same version:\n cat file [file ...] | ocdskit '
'upgrade {}:{} | ocdskit compile {}'.format(str(e), *versions, ' '.join(sys.argv[2:])))
198 changes: 43 additions & 155 deletions ocdskit/combine.py
@@ -1,10 +1,10 @@
from collections import defaultdict

from ocdsextensionregistry import ProfileBuilder
from ocdsmerge.merge import get_release_schema_url, get_tags, merge, merge_versioned
from ocdsmerge import Merger
from ocdsmerge.util import get_release_schema_url, get_tags

from ocdskit.exceptions import InconsistentVersionError
from ocdskit.util import get_ocds_minor_version
from ocdskit.packager import Packager
from ocdskit.util import (_empty_record_package, _empty_release_package, _remove_empty_optional_metadata,
_set_extensions_metadata, _update_package_metadata)


def _package(key, items, uri, publisher, published_date, extensions):
Expand All @@ -19,7 +19,7 @@ def _package(key, items, uri, publisher, published_date, extensions):
'uri': uri,
'publisher': publisher,
'publishedDate': published_date,
'version': '1.1',
'version': '1.1', # fields might be deprecated
'extensions': extensions,
key: items,
}
Expand Down Expand Up @@ -57,28 +57,15 @@ def combine_record_packages(packages, uri='', publisher=None, published_date='')
"""
Collects the packages and records from the record packages into one record package.
:param list packages: a list of record packages
:param packages: an iterable of record packages
:param str uri: the record package's ``uri``
:param dict publisher: the record package's ``publisher``
:param str published_date: the record package's ``publishedDate``
"""
if publisher is None:
publisher = {}

output = {
'uri': uri,
'publisher': publisher,
'publishedDate': published_date,
'license': None,
'publicationPolicy': None,
'version': None,
'extensions': {},
'packages': [],
'records': [],
}
output = _empty_record_package(uri, publisher, published_date)

for package in packages:
_update_package_metadata(output, package, publisher)
_update_package_metadata(output, package)

output['records'].extend(package['records'])

Expand All @@ -88,6 +75,9 @@ def combine_record_packages(packages, uri='', publisher=None, published_date='')
if not output['packages']:
del output['packages']

if publisher:
output['publisher'] = publisher

_set_extensions_metadata(output)
_remove_empty_optional_metadata(output)

Expand All @@ -98,173 +88,71 @@ def combine_release_packages(packages, uri='', publisher=None, published_date=''
"""
Collects the releases from the release packages into one release package.
:param list packages: a list of release packages
:param packages: an iterable of release packages
:param str uri: the release package's ``uri``
:param dict publisher: the release package's ``publisher``
:param str published_date: the release package's ``publishedDate``
"""
if publisher is None:
publisher = {}

output = {
'uri': uri,
'publisher': publisher,
'publishedDate': published_date,
'license': None,
'publicationPolicy': None,
'version': None,
'extensions': {},
'releases': [],
}
output = _empty_release_package(uri, publisher, published_date)

for package in packages:
_update_package_metadata(output, package, publisher)
_update_package_metadata(output, package)

output['releases'].extend(package['releases'])

if publisher:
output['publisher'] = publisher

_set_extensions_metadata(output)
_remove_empty_optional_metadata(output)

return output


def compile_release_packages(packages, uri='', publisher=None, published_date='', schema=None,
return_versioned_release=False, return_package=False, use_linked_releases=False):
def merge(data, uri='', publisher=None, published_date='', schema=None, return_versioned_release=False,
return_package=False, use_linked_releases=False):
"""
Merges releases by OCID and yields compiled releases.
Merges release packages and individual releases.
If ``return_versioned_release`` is ``True``, yields the versioned release. If ``return_package`` is ``True``, wraps
the compiled releases (and versioned releases if ``return_versioned_release`` is ``True``) in a record package.
By default, yields compiled releases. If ``return_versioned_release`` is ``True``, yields versioned releases. If
``return_package`` is ``True``, wraps the compiled releases (and versioned releases if ``return_versioned_release``
is ``True``) in a record package.
If ``return_package`` is set and ``publisher`` isn't set, the output record package will have the same publisher as
the last input release package.
:param list packages: a list of release packages
:param data: an iterable of release packages and individual releases
:param str uri: if ``return_package`` is ``True``, the record package's ``uri``
:param dict publisher: if ``return_package`` is ``True``, the record package's ``publisher``
:param str published_date: if ``return_package`` is ``True``, the record package's ``publishedDate``
:param dict schema: the URL or path of the release schema to use
:param dict schema: the URL, path or dict of the patched release schema to use
:param bool return_package: wrap the compiled releases in a record package
:param bool use_linked_releases: if ``return_package`` is ``True``, use linked releases instead of full releases
:param bool use_linked_releases: if ``return_package`` is ``True``, use linked releases instead of full releases,
if the input is a release package
:param bool return_versioned_release: if ``return_package`` is ``True``, include versioned releases in the record
package; otherwise, yield versioned releases instead of compiled releases
"""
if return_package:
output = {
'uri': uri,
'publisher': publisher,
'publishedDate': published_date,
'license': None,
'publicationPolicy': None,
'version': None,
'extensions': {},
'packages': [],
'records': [],
}
# To avoid duplicating code, we track extensions in the same place even if ``return_package`` is false.
else:
output = {
'extensions': {},
}

version = None
releases_by_ocid = defaultdict(list)
linked_releases = []

for i, package in enumerate(packages):
if not version:
version = get_ocds_minor_version(package)
else:
v = get_ocds_minor_version(package)
if v != version:
raise InconsistentVersionError('item {}: version error: this package uses version {}, but earlier '
'packages used version {}'.format(i, v, version), version, v)
with Packager() as packager:
packager.add(data)

if not schema:
prefix = version.replace('.', '__') + '__'
if not schema and packager.version:
prefix = packager.version.replace('.', '__') + '__'
tag = next(tag for tag in reversed(get_tags()) if tag.startswith(prefix))
schema = get_release_schema_url(tag)

for release in package['releases']:
releases_by_ocid[release['ocid']].append(release)
if packager.package['extensions']:
builder = ProfileBuilder(tag, list(packager.package['extensions']))
schema = builder.patched_release_schema()

if return_package and use_linked_releases:
linked_releases.append({
'url': package['uri'] + '#' + release['id'],
'date': release['date'],
'tag': release['tag'],
})
merger = Merger(schema)

if return_package:
_update_package_metadata(output, package, publisher)
packager.package['uri'] = uri
packager.package['publishedDate'] = published_date
if publisher:
packager.package['publisher'] = publisher

output['packages'].append(package['uri'])
yield from packager.output_package(merger, return_versioned_release=return_versioned_release,
use_linked_releases=use_linked_releases)
else:
_update_extensions_metadata(output, package)

if output['extensions']:
builder = ProfileBuilder(tag, list(output['extensions']))
schema = builder.patched_release_schema()

if return_package:
for ocid, releases in releases_by_ocid.items():
record = {
'ocid': ocid,
'releases': [],
'compiledRelease': merge(releases, schema),
}

if use_linked_releases:
record['releases'] = linked_releases
else:
record['releases'] = releases

if return_versioned_release:
record['versionedRelease'] = merge_versioned(releases, schema)

output['records'].append(record)

_set_extensions_metadata(output)
_remove_empty_optional_metadata(output)

yield output
else:
for releases in releases_by_ocid.values():
if return_versioned_release:
merge_method = merge_versioned
else:
merge_method = merge

merged_release = merge_method(releases, schema)

yield merged_release


def _update_package_metadata(output, package, publisher):
_update_extensions_metadata(output, package)

if not publisher and 'publisher' in package:
output['publisher'] = package['publisher']

for field in ('license', 'publicationPolicy', 'version'):
if field in package:
output[field] = package[field]


def _update_extensions_metadata(output, package):
if 'extensions' in package:
# We use an insertion-ordered dict to keep extensions in order without duplication.
output['extensions'].update(dict.fromkeys(package['extensions']))


def _set_extensions_metadata(output):
if output['extensions']:
output['extensions'] = list(output['extensions'])
else:
del output['extensions']


def _remove_empty_optional_metadata(output):
for field in ('license', 'publicationPolicy', 'version'):
if output[field] is None:
del output[field]
yield from packager.output_releases(merger, return_versioned_release=return_versioned_release)

0 comments on commit 61cd7e4

Please sign in to comment.