Skip to content

Commit

Permalink
files: inject files information in the record index.
Browse files Browse the repository at this point in the history
* Adds custom mappings and jsonschema.
* Reindex record file when a files is added, updated or removed.
* Adds the total size taken by the files and the number of main files related to a given record file.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
  • Loading branch information
jma committed Apr 8, 2024
1 parent d68c06f commit 8a70e5b
Show file tree
Hide file tree
Showing 13 changed files with 418 additions and 15 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ stats_cfg = "rero_ils.modules.stats_cfg.jsonschemas"
templates = "rero_ils.modules.templates.jsonschemas"
users = "rero_ils.modules.users.jsonschemas"
vendors = "rero_ils.modules.vendors.jsonschemas"
files = "rero_ils.modules.files.jsonschemas"

[tool.poetry.plugins."invenio_oauth2server.scopes"]
birthdate = "rero_ils.oauth.scopes:birthdate"
Expand Down Expand Up @@ -440,6 +441,7 @@ stats = "rero_ils.modules.stats.mappings"
stats_cfg = "rero_ils.modules.stats_cfg.mappings"
templates = "rero_ils.modules.templates.mappings"
vendors = "rero_ils.modules.vendors.mappings"
files = "rero_ils.modules.files.mappings"

[tool.poetry.plugins."invenio_search.templates"]
operation_logs = "rero_ils.modules.operation_logs.es_templates:list_es_templates"
Expand Down
36 changes: 36 additions & 0 deletions rero_ils/modules/files/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for manipulating record file."""

from invenio_records.systemfields import ConstantField
from invenio_records_resources.records.systemfields import IndexField
from rero_invenio_files.records.api import FileRecord
from rero_invenio_files.records.api import RecordWithFile as RecordWithFileBase


class RecordWithFile(RecordWithFileBase):
"""Object record with file API."""

# Jsonschema
schema = ConstantField("$schema", "local://files/record-v1.0.0.json")

# Elasticsearch index
index = IndexField("files-record-v1.0.0", search_alias="files")


FileRecord.record_cls = RecordWithFile
12 changes: 6 additions & 6 deletions rero_ils/modules/files/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from rero_ils.modules.operation_logs.logs.api import SpecificOperationLog
from rero_ils.modules.operation_logs.models import OperationLogOperation

from .operations import ReindexDoc
from .operations import ReindexDoc, ReindexRecordFile


class OperationLogRecordFactory(OperationLogFactory):
Expand Down Expand Up @@ -179,18 +179,18 @@ def delete_file(self, identity, id_, file_key, record, deleted_file):
deleted_file=deleted_file)


class DocumentReindexComponent(FileServiceComponent):
"""Component to reindex document linked to the file record."""
class ReindexComponent(FileServiceComponent):
"""Component to reindex linked resources to the file record."""

def _register(self, record):
"""Register a document reindex operation.
:param record: obj - record instance.
"""
doc_pid = record["metadata"]["links"][0].replace("doc_", "")
operation = ReindexDoc(doc_pid)
if operation not in self.uow._operations:
self.uow.register(operation)
for operation in [ReindexDoc(doc_pid), ReindexRecordFile(record.id)]:
if operation not in self.uow._operations:
self.uow.register(operation)

def commit_file(self, identity, id_, file_key, record):
"""Commit file handler.
Expand Down
46 changes: 46 additions & 0 deletions rero_ils/modules/files/dumpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


"""Files indexer dumpers."""

from invenio_records.dumpers import SearchDumperExt


class FileInformationDumperExt(SearchDumperExt):
"""File information dumper extension."""

def dump(self, record, data):
"""Dump additional information.
:param record: The record to dump.
:param data: The initial dump data passed in by ``record.dumps()``.
"""
n_main_files = 0
size = 0
# inject files informations
for f in record.files:
file = record.files[f]
f_type = file.get("type")
# main files only
if f_type not in ["fulltext", "thumbnail"]:
n_main_files += 1
# main files or extracted text
if f_type != "thumbnail":
size += record.files[f].file.size
data["metadata"]["n_files"] = n_main_files
data["metadata"]["file_size"] = size
18 changes: 18 additions & 0 deletions rero_ils/modules/files/jsonschemas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Record File jsonschemas."""
59 changes: 59 additions & 0 deletions rero_ils/modules/files/jsonschemas/files/record-v1.0.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Names vocabulary.",
"type": "object",
"additionalProperties": false,
"properties": {
"$schema": {
"title": "Schema",
"description": "Schema to validate user records against.",
"type": "string",
"minLength": 9
},
"metadata": {
"type": "object",
"additionalProperties": false,
"properties": {
"collections": {
"title": "Collections",
"type": "array",
"minItems": 1,
"items": {
"title": "Collection Name",
"type": "string",
"minLength": 1
}
},
"owners": {
"title": "Owners",
"type": "array",
"minItems": 1,
"items": {
"title": "Owner",
"type": "string",
"minLength": 1
}
},
"links": {
"title": "Links",
"type": "array",
"minItems": 1,
"items": {
"title": "Link",
"type": "string",
"minLength": 1
}
}
}
},
"id": {
"$ref": "local://definitions-v1.0.0.json#/identifier"
},
"pid": {
"$ref": "local://definitions-v2.0.0.json#/internal-pid"
},
"files": {
"$ref": "local://definitions-v2.0.0.json#/files"
}
}
}
18 changes: 18 additions & 0 deletions rero_ils/modules/files/mappings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Record File Elasticsearch mappings."""
18 changes: 18 additions & 0 deletions rero_ils/modules/files/mappings/v7/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Record File Elasticsearch mappings."""
74 changes: 74 additions & 0 deletions rero_ils/modules/files/mappings/v7/files/record-v1.0.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"mappings": {
"properties": {
"$schema": {
"type": "keyword",
"index": "false"
},
"created": {
"type": "date"
},
"updated": {
"type": "date"
},
"files": {
"properties": {
"enabled": {
"type": "boolean"
}
}
},
"bucket_id": {
"type": "text"
},
"uuid": {
"type": "keyword"
},
"version_id": {
"type": "integer"
},
"indexed_at": {
"type": "date"
},
"pid": {
"properties": {
"obj_type": {
"type": "keyword"
},
"pid_type": {
"type": "keyword"
},
"pk": {
"type": "long"
},
"status": {
"type": "keyword"
}
}
},
"metadata": {
"type": "object",
"properties": {
"owners": {
"type": "keyword"
},
"collections": {
"type": "keyword"
},
"links": {
"type": "keyword"
},
"n_files": {
"type": "long"
},
"file_size": {
"type": "long"
}
}
},
"id": {
"type": "keyword"
}
}
}
}
45 changes: 39 additions & 6 deletions rero_ils/modules/files/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,64 @@

"""Files Operations."""

from flask import current_app
from invenio_records_resources.services.uow import Operation

from rero_ils.modules.documents.tasks import reindex_document


class ReindexDoc(Operation):
"""Reindex a given document."""
class ReindexOperationBase(Operation):
"""Base class for reindex operations."""

def __init__(self, pid):
def __init__(self, id):
"""Constructor.
:param pid: str - document pid value.
"""
self.pid = pid
self.id = id

def __eq__(self, other):
"""Comparison method.
:param other: obj - instance to compare with.
"""
return isinstance(other, ReindexDoc) and self.pid == other.pid
return isinstance(other, self.__class__) and self.id == other.id

def on_post_commit(self, uow):
"""Run the post task operation.
:param uow: obj - UnitOfWork instance.
"""
raise NotImplementedError


class ReindexDoc(ReindexOperationBase):
"""Reindex a given document."""

def on_post_commit(self, uow):
"""Run the post task operation.
:param uow: obj - UnitOfWork instance.
"""
reindex_document.delay(self.id)


class ReindexRecordFile(ReindexOperationBase):
"""Reindex a given record file."""

def __init__(self, id):
"""Constructor.
:param pid: str - record file id value.
"""
ext = current_app.extensions["rero-invenio-files"]
# get services
self.record_service = ext.records_service
super().__init__(id)

def on_post_commit(self, uow):
"""Run the post task operation.
:param uow: obj - UnitOfWork instance.
"""
reindex_document.delay(self.pid)
self.record_service.indexer.index_by_id(self.id)

0 comments on commit 8a70e5b

Please sign in to comment.