From a40719c16d07978c92614d58ea12cb06e770a9a7 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Mon, 25 Nov 2024 17:46:25 +0000 Subject: [PATCH 1/5] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3beb19d4..f5cd73865 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added + - Add database indexes to common attributes to improve performance ## [[0.13.1]](https://github.com/thoth-pub/thoth/releases/tag/v0.13.1) - 2024-11-25 ### Added From 4c81f6cdda741c7a554315b8d2c5133c9075d5e7 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Mon, 25 Nov 2024 17:48:15 +0000 Subject: [PATCH 2/5] Add indexes --- thoth-api/migrations/v0.13.1/down.sql | 100 ++++++++++++++++++++++++++ thoth-api/migrations/v0.13.1/up.sql | 100 ++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 thoth-api/migrations/v0.13.1/down.sql create mode 100644 thoth-api/migrations/v0.13.1/up.sql diff --git a/thoth-api/migrations/v0.13.1/down.sql b/thoth-api/migrations/v0.13.1/down.sql new file mode 100644 index 000000000..4484ef28b --- /dev/null +++ b/thoth-api/migrations/v0.13.1/down.sql @@ -0,0 +1,100 @@ +-- Remove indexes from account table +DROP INDEX IF EXISTS idx_account_email; + +-- Remove indexes from publisher_account table +DROP INDEX IF EXISTS idx_publisher_account_account_id; + +-- Remove indexes from work table +DROP INDEX IF EXISTS idx_work_work_id; +DROP INDEX IF EXISTS idx_work_doi; +DROP INDEX IF EXISTS idx_work_reference; +DROP INDEX IF EXISTS idx_work_short_abstract_substr; +DROP INDEX IF EXISTS idx_work_long_abstract_substr; +DROP INDEX IF EXISTS idx_work_landing_page; +DROP INDEX IF EXISTS idx_work_imprint_id; +DROP INDEX IF EXISTS idx_work_updated_at_with_relations_desc; +DROP INDEX IF EXISTS idx_work_full_title_asc; +DROP INDEX IF EXISTS idx_work_publication_date_asc; +DROP INDEX IF EXISTS idx_work_publication_date_desc; + +-- Remove indexes from work_relation table +DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_relator_asc; +DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_related_asc; + +-- Remove indexes from publisher table +DROP INDEX IF EXISTS idx_publisher_publisher_id; +DROP INDEX IF EXISTS idx_publisher_publisher_name; +DROP INDEX IF EXISTS idx_publisher_publisher_shortname; + +-- Remove indexes from imprint table +DROP INDEX IF EXISTS idx_imprint_id; +DROP INDEX IF EXISTS idx_imprint_imprint_name; +DROP INDEX IF EXISTS idx_imprint_imprint_url; +DROP INDEX IF EXISTS idx_imprint_publisher_id; + +-- Remove indexes from subject table +DROP INDEX IF EXISTS idx_subject_subject_code_asc; + +-- Remove indexes from publication table +DROP INDEX IF EXISTS idx_publication_work_id; +DROP INDEX IF EXISTS idx_publication_isbn; +DROP INDEX IF EXISTS idx_publication_publication_type; + +-- Remove indexes from location table +DROP INDEX IF EXISTS idx_location_location_platform_asc; + +-- Remove indexes from price table +DROP INDEX IF EXISTS idx_price_currency_code_asc; + +-- Remove indexes from contributor table +DROP INDEX IF EXISTS idx_contributor_full_name; +DROP INDEX IF EXISTS idx_contributor_last_name; +DROP INDEX IF EXISTS idx_contributor_orcid; + +-- Remove indexes from contribution table +DROP INDEX IF EXISTS idx_contribution_work_id; +DROP INDEX IF EXISTS idx_contribution_contributor_id; +DROP INDEX IF EXISTS idx_contribution_ordinal_asc; + +-- Remove indexes from affiliation table +DROP INDEX IF EXISTS idx_affiliation_contribution_id; +DROP INDEX IF EXISTS idx_affiliation_ordinal_asc; + +-- Remove indexes from institution table +DROP INDEX IF EXISTS idx_institution_institution_name; +DROP INDEX IF EXISTS idx_institution_ror; +DROP INDEX IF EXISTS idx_institution_institution_doi; + +-- Remove indexes from funding table +DROP INDEX IF EXISTS idx_funding_work_id; +DROP INDEX IF EXISTS idx_funding_program; + +-- Remove indexes from series table +DROP INDEX IF EXISTS idx_series_series_name; +DROP INDEX IF EXISTS idx_series_issn_print; +DROP INDEX IF EXISTS idx_series_issn_digital; +DROP INDEX IF EXISTS idx_series_series_url; +DROP INDEX IF EXISTS idx_series_series_description; +DROP INDEX IF EXISTS idx_series_imprint_id; + +-- Remove indexes from issue table +DROP INDEX IF EXISTS idx_issue_ordinal_work_id_asc; +DROP INDEX IF EXISTS idx_issue_ordinal_series_id_asc; + +-- Remove indexes from language table +DROP INDEX IF EXISTS idx_language_language_code_asc; + +-- Remove indexes from reference table +DROP INDEX IF EXISTS idx_reference_work_id; +DROP INDEX IF EXISTS idx_reference_doi; +DROP INDEX IF EXISTS idx_reference_unstructured_citation; +DROP INDEX IF EXISTS idx_reference_issn; +DROP INDEX IF EXISTS idx_reference_isbn; +DROP INDEX IF EXISTS idx_reference_journal_title; +DROP INDEX IF EXISTS idx_reference_article_title; +DROP INDEX IF EXISTS idx_reference_series_title; +DROP INDEX IF EXISTS idx_reference_volume_title; +DROP INDEX IF EXISTS idx_reference_author_substr; +DROP INDEX IF EXISTS idx_reference_standard_designator; +DROP INDEX IF EXISTS idx_reference_standards_body_name; +DROP INDEX IF EXISTS idx_reference_standards_body_acronym; diff --git a/thoth-api/migrations/v0.13.1/up.sql b/thoth-api/migrations/v0.13.1/up.sql new file mode 100644 index 000000000..06a9d06fe --- /dev/null +++ b/thoth-api/migrations/v0.13.1/up.sql @@ -0,0 +1,100 @@ +-- Indexes account table +CREATE INDEX idx_account_email ON account (email); + +-- Indexes publisher_account table +CREATE INDEX idx_publisher_account_account_id ON publisher_account (account_id); + +-- Indexes work table +CREATE INDEX idx_work_work_id ON work (work_id); +CREATE INDEX idx_work_doi ON work (doi); +CREATE INDEX idx_work_reference ON work (reference); +CREATE INDEX idx_work_short_abstract_substr ON work (substring(short_abstract FROM 1 FOR 255)); +CREATE INDEX idx_work_long_abstract_substr ON work (substring(long_abstract FROM 1 FOR 255)); +CREATE INDEX idx_work_landing_page ON work (landing_page); +CREATE INDEX idx_work_imprint_id ON work (imprint_id); +CREATE INDEX idx_work_updated_at_with_relations_desc ON work (updated_at_with_relations DESC, work_id); +CREATE INDEX idx_work_full_title_asc ON work (full_title ASC, work_id); +CREATE INDEX idx_work_publication_date_asc ON work (publication_date ASC, work_id); +CREATE INDEX idx_work_publication_date_desc ON work (publication_date DESC, work_id); + +-- Indexes work_relation table +CREATE INDEX idx_work_relation_relation_ordinal_relator_asc ON work_relation (relation_ordinal ASC, relator_work_id); +CREATE INDEX idx_work_relation_relation_ordinal_related_asc ON work_relation (relation_ordinal ASC, related_work_id); + +-- Indexes publisher table +CREATE INDEX idx_publisher_publisher_id ON publisher (publisher_id); +CREATE INDEX idx_publisher_publisher_name ON publisher (publisher_name); +CREATE INDEX idx_publisher_publisher_shortname ON publisher (publisher_shortname); + +-- Indexes imprint table +CREATE INDEX idx_imprint_id ON imprint (imprint_id); +CREATE INDEX idx_imprint_imprint_name ON imprint (imprint_name); +CREATE INDEX idx_imprint_imprint_url ON imprint (imprint_url); +CREATE INDEX idx_imprint_publisher_id ON imprint (publisher_id); + +-- Indexes subject table +CREATE INDEX idx_subject_subject_code_asc ON subject (subject_code ASC, work_id); + +-- Indexes publication table +CREATE INDEX idx_publication_work_id ON publication (work_id); +CREATE INDEX idx_publication_isbn ON publication (isbn); +CREATE INDEX idx_publication_publication_type ON publication (publication_type); + +-- Indexes location table +CREATE INDEX idx_location_location_platform_asc ON location (location_platform ASC, publication_id); + +-- Indexes price table +CREATE INDEX idx_price_currency_code_asc ON price (currency_code ASC, publication_id); + +-- Indexes contributor table +CREATE INDEX idx_contributor_full_name ON contributor (full_name); +CREATE INDEX idx_contributor_last_name ON contributor (last_name); +CREATE INDEX idx_contributor_orcid ON contributor (orcid); + +-- Indexes contribution table +CREATE INDEX idx_contribution_work_id ON contribution (work_id); +CREATE INDEX idx_contribution_contributor_id ON contribution (contributor_id); +CREATE INDEX idx_contribution_ordinal_asc ON contribution (contribution_ordinal ASC, work_id); + +-- Indexes affiliation table +CREATE INDEX idx_affiliation_contribution_id ON affiliation (contribution_id); +CREATE INDEX idx_affiliation_ordinal_asc ON affiliation (affiliation_ordinal ASC, contribution_id); + +-- Indexes contributor table +CREATE INDEX idx_institution_institution_name ON institution (institution_name); +CREATE INDEX idx_institution_ror ON institution (ror); +CREATE INDEX idx_institution_institution_doi ON institution (institution_doi); + +-- Indexes funding table +CREATE INDEX idx_funding_work_id ON funding (work_id); +CREATE INDEX idx_funding_program ON funding (program); + +-- Indexes series table +CREATE INDEX idx_series_series_name ON series (series_name); +CREATE INDEX idx_series_issn_print ON series (issn_print); +CREATE INDEX idx_series_issn_digital ON series (issn_digital); +CREATE INDEX idx_series_series_url ON series (series_url); +CREATE INDEX idx_series_series_description ON series (series_description); +CREATE INDEX idx_series_imprint_id ON series (imprint_id); + +-- Indexes issue table +CREATE INDEX idx_issue_ordinal_work_id_asc ON issue (issue_ordinal ASC, work_id); +CREATE INDEX idx_issue_ordinal_series_id_asc ON issue (issue_ordinal ASC, series_id); + +-- Indexes language table +CREATE INDEX idx_language_language_code_asc ON language (language_code ASC, work_id); + +-- Indexes reference table +CREATE INDEX idx_reference_work_id ON reference (work_id); +CREATE INDEX idx_reference_doi ON reference (doi); +CREATE INDEX idx_reference_unstructured_citation ON reference (unstructured_citation); +CREATE INDEX idx_reference_issn ON reference (issn); +CREATE INDEX idx_reference_isbn ON reference (isbn); +CREATE INDEX idx_reference_journal_title ON reference (journal_title); +CREATE INDEX idx_reference_article_title ON reference (article_title); +CREATE INDEX idx_reference_series_title ON reference (series_title); +CREATE INDEX idx_reference_volume_title ON reference (volume_title); +CREATE INDEX idx_reference_author_substr ON reference ((substring(author FROM 1 FOR 255))); +CREATE INDEX idx_reference_standard_designator ON reference (standard_designator); +CREATE INDEX idx_reference_standards_body_name ON reference (standards_body_name); +CREATE INDEX idx_reference_standards_body_acronym ON reference (standards_body_acronym); From d79e4ff0baa66345c15de2242e4d7fb96dbd9024 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Mon, 25 Nov 2024 17:49:13 +0000 Subject: [PATCH 3/5] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5cd73865..d9e7cb802 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - - Add database indexes to common attributes to improve performance + - [656](https://github.com/thoth-pub/thoth/pull/656) - Add database indexes to common attributes to improve performance ## [[0.13.1]](https://github.com/thoth-pub/thoth/releases/tag/v0.13.1) - 2024-11-25 ### Added From 9859271dda512980a1d4084a3d8b9fe2c4d3609e Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 26 Nov 2024 11:15:36 +0000 Subject: [PATCH 4/5] Optimise indexes --- thoth-api/migrations/v0.13.1/down.sql | 10 +++++----- thoth-api/migrations/v0.13.1/up.sql | 17 +++++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/thoth-api/migrations/v0.13.1/down.sql b/thoth-api/migrations/v0.13.1/down.sql index 4484ef28b..1ce4f65e7 100644 --- a/thoth-api/migrations/v0.13.1/down.sql +++ b/thoth-api/migrations/v0.13.1/down.sql @@ -5,7 +5,6 @@ DROP INDEX IF EXISTS idx_account_email; DROP INDEX IF EXISTS idx_publisher_account_account_id; -- Remove indexes from work table -DROP INDEX IF EXISTS idx_work_work_id; DROP INDEX IF EXISTS idx_work_doi; DROP INDEX IF EXISTS idx_work_reference; DROP INDEX IF EXISTS idx_work_short_abstract_substr; @@ -16,24 +15,25 @@ DROP INDEX IF EXISTS idx_work_updated_at_with_relations_desc; DROP INDEX IF EXISTS idx_work_full_title_asc; DROP INDEX IF EXISTS idx_work_publication_date_asc; DROP INDEX IF EXISTS idx_work_publication_date_desc; +DROP INDEX IF EXISTS idx_work_type_status_pub_date_desc; +DROP INDEX IF EXISTS idx_work_books_pub_date_desc; -- Remove indexes from work_relation table -DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_relator_asc; -DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_related_asc; +DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_relator_relation_type_asc; +DROP INDEX IF EXISTS idx_work_relation_relation_ordinal_related_relation_type_asc; -- Remove indexes from publisher table -DROP INDEX IF EXISTS idx_publisher_publisher_id; DROP INDEX IF EXISTS idx_publisher_publisher_name; DROP INDEX IF EXISTS idx_publisher_publisher_shortname; -- Remove indexes from imprint table -DROP INDEX IF EXISTS idx_imprint_id; DROP INDEX IF EXISTS idx_imprint_imprint_name; DROP INDEX IF EXISTS idx_imprint_imprint_url; DROP INDEX IF EXISTS idx_imprint_publisher_id; -- Remove indexes from subject table DROP INDEX IF EXISTS idx_subject_subject_code_asc; +DROP INDEX IF EXISTS idx_subject_subject_ordinal_asc; -- Remove indexes from publication table DROP INDEX IF EXISTS idx_publication_work_id; diff --git a/thoth-api/migrations/v0.13.1/up.sql b/thoth-api/migrations/v0.13.1/up.sql index 06a9d06fe..a06a5d518 100644 --- a/thoth-api/migrations/v0.13.1/up.sql +++ b/thoth-api/migrations/v0.13.1/up.sql @@ -2,10 +2,9 @@ CREATE INDEX idx_account_email ON account (email); -- Indexes publisher_account table -CREATE INDEX idx_publisher_account_account_id ON publisher_account (account_id); +CREATE INDEX ON publisher_account (account_id); -- Indexes work table -CREATE INDEX idx_work_work_id ON work (work_id); CREATE INDEX idx_work_doi ON work (doi); CREATE INDEX idx_work_reference ON work (reference); CREATE INDEX idx_work_short_abstract_substr ON work (substring(short_abstract FROM 1 FOR 255)); @@ -16,24 +15,30 @@ CREATE INDEX idx_work_updated_at_with_relations_desc ON work (updated_at_with_re CREATE INDEX idx_work_full_title_asc ON work (full_title ASC, work_id); CREATE INDEX idx_work_publication_date_asc ON work (publication_date ASC, work_id); CREATE INDEX idx_work_publication_date_desc ON work (publication_date DESC, work_id); +CREATE INDEX idx_work_type_status_pub_date_desc + ON work (work_type, work_status, publication_date DESC); +CREATE INDEX idx_work_books_pub_date_desc + ON work (publication_date DESC) + WHERE work_type IN ('monograph', 'edited-book', 'textbook') AND work_status = 'active'; -- Indexes work_relation table -CREATE INDEX idx_work_relation_relation_ordinal_relator_asc ON work_relation (relation_ordinal ASC, relator_work_id); -CREATE INDEX idx_work_relation_relation_ordinal_related_asc ON work_relation (relation_ordinal ASC, related_work_id); +CREATE INDEX idx_work_relation_relation_ordinal_relator_relation_type_asc + ON work_relation (relation_ordinal ASC, relator_work_id, relation_type); +CREATE INDEX idx_work_relation_relation_ordinal_related_relation_type_asc + ON work_relation (relation_ordinal ASC, related_work_id, relation_type); -- Indexes publisher table -CREATE INDEX idx_publisher_publisher_id ON publisher (publisher_id); CREATE INDEX idx_publisher_publisher_name ON publisher (publisher_name); CREATE INDEX idx_publisher_publisher_shortname ON publisher (publisher_shortname); -- Indexes imprint table -CREATE INDEX idx_imprint_id ON imprint (imprint_id); CREATE INDEX idx_imprint_imprint_name ON imprint (imprint_name); CREATE INDEX idx_imprint_imprint_url ON imprint (imprint_url); CREATE INDEX idx_imprint_publisher_id ON imprint (publisher_id); -- Indexes subject table CREATE INDEX idx_subject_subject_code_asc ON subject (subject_code ASC, work_id); +CREATE INDEX idx_subject_subject_ordinal_asc ON subject (subject_ordinal ASC, work_id); -- Indexes publication table CREATE INDEX idx_publication_work_id ON publication (work_id); From 6bcc6f5be2fcc93c919253709780039626695e22 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 26 Nov 2024 15:34:13 +0000 Subject: [PATCH 5/5] Restore index name --- thoth-api/migrations/v0.13.1/up.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thoth-api/migrations/v0.13.1/up.sql b/thoth-api/migrations/v0.13.1/up.sql index a06a5d518..c8e408b13 100644 --- a/thoth-api/migrations/v0.13.1/up.sql +++ b/thoth-api/migrations/v0.13.1/up.sql @@ -2,7 +2,7 @@ CREATE INDEX idx_account_email ON account (email); -- Indexes publisher_account table -CREATE INDEX ON publisher_account (account_id); +CREATE INDEX idx_publisher_account_account_id ON publisher_account (account_id); -- Indexes work table CREATE INDEX idx_work_doi ON work (doi);