Skip to content

Commit

Permalink
Merge pull request #261 from open-contracting/236_stage_5
Browse files Browse the repository at this point in the history
Add collection_id to release, record, compiled_release - Stage 5
  • Loading branch information
odscjames committed Jan 14, 2020
2 parents f4a239c + 3551472 commit bfcf4b5
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 32 deletions.
Binary file modified docs/_static/database-tables.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 12 additions & 10 deletions docs/database-structure.rst
Expand Up @@ -75,28 +75,30 @@ This situation might arise when:
release, record and compiled_release tables
-------------------------------------------

Each row is linked to `collection_file_item` and thus to collections. Each row is also linked to the `data` and `package_data` tables that actually hold the data.
Each row is linked to `collection_file_item` and thus to collections. However, we also include a `collection_id` column so it's easy to select all data in one collection.

Each row is also linked to the `data` and `package_data` tables that actually hold the data.

Note that the ``compiled_release`` table is only populated by the compile-releases transform, and not by loading records from a data source.

.. _with-collection-views:

release, record and compiled_release views with added collection information
-----------------------------------------------------------------------------

Filtering records, releases or compiled releases on a specific collection involves several joins.
release, record and compiled_release views with added collection information [deprecated]
-----------------------------------------------------------------------------------------

You need to join `release`/`record`/`compiled_release` to `collection_file_item`, which needs to be joined to `collection_file`, which then has a `collection_id` column you can filter on.

To make this easier, three views are provided.
In the past, three views were provided to make something easier:

* `release_with_collection`
* `record_with_collection`
* `compiled_release_with_collection`

These contain the normal columns each table has, but also have the additional column `collection_id`.
The data in these views is now exactly the same as the normal tables:

* `release`
* `record`
* `compiled_release`

You should use these views where possible to avoid having to write several joins yourself.
If you see any use of these views, please change to using the tables directly - you will get better performance.

release_check, record_check, release_check_error and record_check_error tables
------------------------------------------------------------------------------
Expand Down
40 changes: 20 additions & 20 deletions ocdskingfisherprocess/database.py
Expand Up @@ -637,55 +637,55 @@ def delete_collection(self, collection_id):
self._delete_collection_run_sql("release_check_error", """DELETE FROM release_check_error
WHERE release_id IN
(
SELECT id FROM release_with_collection
SELECT id FROM release
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("record_check_error", """DELETE FROM record_check_error
WHERE record_id IN
(
SELECT id FROM record_with_collection
SELECT id FROM record
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("record_check", """DELETE FROM record_check
WHERE record_id IN
(
SELECT id FROM record_with_collection
SELECT id FROM record
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("release_check", """DELETE FROM release_check
WHERE release_id IN
(
SELECT id FROM release_with_collection
SELECT id FROM release
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("compiled_release", """DELETE FROM compiled_release
WHERE id IN
(
SELECT id FROM compiled_release_with_collection
SELECT id FROM compiled_release
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("transform_upgrade_1_0_to_1_1_status_record", """DELETE FROM transform_upgrade_1_0_to_1_1_status_record
WHERE source_record_id IN
(
SELECT id FROM record_with_collection
SELECT id FROM record
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("record", """DELETE FROM record
WHERE id IN
(
SELECT id FROM record_with_collection
SELECT id FROM record
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("transform_upgrade_1_0_to_1_1_status_release", """DELETE FROM transform_upgrade_1_0_to_1_1_status_release
WHERE source_release_id IN
(
SELECT id FROM release_with_collection
SELECT id FROM release
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql("release", """DELETE FROM release
WHERE id IN
(
SELECT id FROM release_with_collection
SELECT id FROM release
WHERE collection_id = :collection_id
);""", collection_id)
self._delete_collection_run_sql_in_blocks(
Expand Down Expand Up @@ -773,26 +773,26 @@ def _delete_orphan_data_package_data(self):
def _get_check_query(self, obj_type, collection_id, override_schema_version):
data = {'collection_id': collection_id}
sql = """ SELECT
release_with_collection.id,
release_with_collection.data_id,
release_with_collection.package_data_id
FROM release_with_collection"""
release.id,
release.data_id,
release.package_data_id
FROM release"""
if override_schema_version:
sql += """ LEFT JOIN release_check ON release_check.release_id = release_with_collection.id
sql += """ LEFT JOIN release_check ON release_check.release_id = release.id
AND release_check.override_schema_version = :override_schema_version
LEFT JOIN release_check_error ON release_check_error.release_id = release_check_error.id
AND release_check_error.override_schema_version = :override_schema_version
LEFT JOIN package_data on package_data.id = package_data_id
WHERE release_with_collection.collection_id = :collection_id
WHERE release.collection_id = :collection_id
AND release_check.id IS NULL AND release_check_error.id IS NULL
AND coalesce(data ->> 'version', '1.0') <> :override_schema_version"""
data['override_schema_version'] = override_schema_version
else:
sql += """ LEFT JOIN release_check ON release_check.release_id = release_with_collection.id
sql += """ LEFT JOIN release_check ON release_check.release_id = release.id
AND release_check.override_schema_version IS NULL
LEFT JOIN release_check_error ON release_check_error.release_id = release_check_error.id
AND release_check_error.override_schema_version IS NULL
WHERE release_with_collection.collection_id = :collection_id
WHERE release.collection_id = :collection_id
AND release_check.id IS NULL AND release_check_error.id IS NULL """

return sql.replace('release', obj_type), data
Expand Down Expand Up @@ -845,7 +845,7 @@ def mark_collection_check_older_data_with_schema_version_1_1(self, collection_id
def update_collection_cached_columns(self, collection_id):
with self.get_engine().begin() as connection:
s = sa.sql.expression.text(
"SELECT count(*) as release_count FROM release_with_collection WHERE collection_id = :collection_id")
"SELECT count(*) as release_count FROM release WHERE collection_id = :collection_id")
result = connection.execute(s, {"collection_id": collection_id})
data = result.fetchone()

Expand All @@ -857,7 +857,7 @@ def update_collection_cached_columns(self, collection_id):

with self.get_engine().begin() as connection:
s = sa.sql.expression.text(
"SELECT count(*) as record_count FROM record_with_collection WHERE collection_id = :collection_id")
"SELECT count(*) as record_count FROM record WHERE collection_id = :collection_id")
result = connection.execute(s, {"collection_id": collection_id})
data = result.fetchone()

Expand All @@ -869,7 +869,7 @@ def update_collection_cached_columns(self, collection_id):

with self.get_engine().begin() as connection:
s = sa.sql.expression.text(
"SELECT count(*) as compiled_release_count FROM compiled_release_with_collection " +
"SELECT count(*) as compiled_release_count FROM compiled_release " +
"WHERE collection_id = :collection_id")
result = connection.execute(s, {"collection_id": collection_id})
data = result.fetchone()
Expand Down
4 changes: 2 additions & 2 deletions ocdskingfisherprocess/transform/compile_releases.py
Expand Up @@ -40,8 +40,8 @@ def get_ocids(self):
with self.database.get_engine().begin() as engine:
query = engine.execute(
sa.text(
" SELECT r.ocid FROM release_with_collection AS r" +
" LEFT JOIN compiled_release_with_collection AS cr ON " +
" SELECT r.ocid FROM release AS r" +
" LEFT JOIN compiled_release AS cr ON " +
" cr.ocid = r.ocid and cr.collection_id = :destination_collection_id" +
" WHERE r.collection_id = :collection_id and cr.ocid is NULL" +
" GROUP BY r.ocid "
Expand Down

0 comments on commit bfcf4b5

Please sign in to comment.