Skip to content

Commit

Permalink
files: add file preview
Browse files Browse the repository at this point in the history
* Adds and configures invenio-previewer.
* Fixes reindex the related document when a record file collections has been changed.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
  • Loading branch information
jma committed Apr 24, 2024
1 parent eb452d1 commit 71b1c47
Show file tree
Hide file tree
Showing 15 changed files with 1,981 additions and 1,251 deletions.
12 changes: 12 additions & 0 deletions docker/nginx/conf.d/default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ server {
client_max_body_size 50G;
}

location /api/records {
uwsgi_pass api_server;
include uwsgi_params;
uwsgi_buffering off;
uwsgi_request_buffering off;
uwsgi_param Host $host;
uwsgi_param X-Forwarded-For $proxy_add_x_forwarded_for;
uwsgi_param X-Forwarded-Proto $scheme;
# Max upload size for files is set to 50GB (configure as needed).
client_max_body_size 500M;
}

# Static content is served directly by nginx and not the application server.
location /static {
alias /invenio/instance/static;
Expand Down
2 changes: 1 addition & 1 deletion docker/uwsgi/uwsgi_rest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

[uwsgi]
socket = 0.0.0.0:5001
socket = localhost:5001
module = invenio_app.wsgi_rest:application
master = true
die-on-term = true
Expand Down
2 changes: 1 addition & 1 deletion docker/uwsgi/uwsgi_ui.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

[uwsgi]
socket = 0.0.0.0:5000
socket = localhost:5000
module = invenio_app.wsgi_ui:application
master = true
die-on-term = true
Expand Down
1,954 changes: 1,216 additions & 738 deletions poetry.lock

Large diffs are not rendered by default.

23 changes: 18 additions & 5 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,8 @@ def _(x):
'img-src': [
'*',
"'self'",
'data:'
'data:',
'blob:'
],
'style-src': [
'*',
Expand Down Expand Up @@ -565,11 +566,17 @@ def _(x):
#: route correct hosts to the application.
APP_ALLOWED_HOSTS = ['localhost', '127.0.0.1']

# TODO: Check if needed one day
# Previewers
# ==========
#: Include IIIF preview for images.
# PREVIEWER_PREFERENCE = ['iiif_image'] + BASE_PREFERENCE
PREVIEWER_RECORD_FILE_FACOTRY = (
"rero_invenio_files.records.previewer.record_file_factory"
)

PREVIEWER_MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024
"""Maximum file size in bytes for image files."""

PREVIEWER_MAX_FILE_SIZE_BYTES = 5 * 1024 * 1024
"""Maximum file size in bytes for JSON/XML files."""

# Debug
# =====
Expand Down Expand Up @@ -2897,7 +2904,6 @@ def _(x):
'rero_ils.modules.permissions:can_use_debug_mode',
'rero_ils.modules.items.permissions:late_issue_management'
]

# Detailed View Configuration
# ===========================
RECORDS_UI_ENDPOINTS = {
Expand Down Expand Up @@ -2930,6 +2936,12 @@ def _(x):
record_class='rero_ils.modules.stats.api.api:Stat',
view_imp='rero_ils.modules.stats.views.stats_view_method',
permission_factory_imp='rero_ils.modules.stats.permissions:stats_ui_permission_factory',
),
"recid_preview": dict(
pid_type="recid",
route="/records/preview/<pid_value>/<path:filename>",
view_imp="rero_invenio_files.records.previewer.preview",
record_class="rero_invenio_files.records.api:RecordWithFile",
)
}

Expand All @@ -2948,6 +2960,7 @@ def _(x):
}
}


RERO_ILS_DEFAULT_JSON_SCHEMA = {
'acac': '/acq_accounts/acq_account-v0.0.1.json',
'acol': '/acq_order_lines/acq_order_line-v0.0.1.json',
Expand Down
6 changes: 0 additions & 6 deletions rero_ils/modules/documents/dumpers/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"""Indexing dumper."""

from flask import current_app
from invenio_access.permissions import system_identity
from invenio_records.dumpers import Dumper

from ..extensions import TitleExtension
Expand Down Expand Up @@ -232,11 +231,6 @@ def _process_files(self, record, data):
"""Add full text from files."""
ext = current_app.extensions['rero-invenio-files']
sfr = ext.records_service
search = sfr.search_request(
system_identity, dict(size=1), sfr.record_cls, sfr.config.search
)
search = search.source('uuid')\
.filter('term', metadata__links=f'doc_{record.pid}')
files = {}
for record_file in record.get_records_files():
collections = record_file.get('metadata', {}).get('collections')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@
}
}
}
}
}
44 changes: 42 additions & 2 deletions rero_ils/modules/files/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def delete_file(self, identity, id_, file_key, record, deleted_file):
deleted_file=deleted_file)


class ReindexComponent(FileServiceComponent):
class ReindexFileComponent(FileServiceComponent):
"""Component to reindex linked resources to the file record."""

def _register(self, record):
Expand All @@ -188,7 +188,7 @@ def _register(self, record):
:param record: obj - record instance.
"""
doc_pid = record["metadata"]["links"][0].replace("doc_", "")
for operation in [ReindexDoc(doc_pid), ReindexRecordFile(record.id)]:
for operation in [ReindexRecordFile(record.id), ReindexDoc(doc_pid)]:
if operation not in self.uow._operations:
self.uow.register(operation)

Expand Down Expand Up @@ -231,3 +231,43 @@ def delete_all_files(self, identity, id_, record, results):
:param record: obj - record instance.
"""
self._register(record)


class ReindexRecordComponent(FileServiceComponent):
"""Component to reindex linked resources to the file record."""

def _register(self, record):
"""Register a document reindex operation.
:param record: obj - record instance.
"""
doc_pid = record["metadata"]["links"][0].replace("doc_", "")
for operation in [ReindexDoc(doc_pid)]:
if operation not in self.uow._operations:
self.uow.register(operation)

def create(self, identity, data, record, errors=None, **kwargs):
"""Create handler.
:param identity: flask principal Identity
:param data: dict - creation data
:param record: obj - the created record
"""
self._register(record)

def update(self, identity, data, record, **kwargs):
"""Update handler.
:param identity: flask principal Identity
:param data: dict - data to update the record
:param record: obj - the updated record
"""
self._register(record)

def delete(self, identity, record, **kwargs):
"""Delete handler.
:param identity: flask principal Identity
:param record: obj - the updated record
"""
self._register(record)
3 changes: 1 addition & 2 deletions rero_ils/modules/files/dumpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


"""Files indexer dumpers."""

from invenio_records.dumpers import SearchDumperExt
Expand All @@ -40,7 +39,7 @@ def dump(self, record, data):
if f_type not in ["fulltext", "thumbnail"]:
n_main_files += 1
# main files or extracted text
if f_type != "thumbnail":
if f_type != "thumbnail" and record.files[f].file:
size += record.files[f].file.size
data["metadata"]["n_files"] = n_main_files
data["metadata"]["file_size"] = size
43 changes: 43 additions & 0 deletions rero_ils/modules/files/results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Files results classes."""

from invenio_records_resources.services.files.results import FileList


class MainFileList(FileList):
"""List of file items result."""

@property
def entries(self):
"""Iterator over the hits."""
for entry in self._results:
# keep only the main files
if entry.metadata.get('type') in ['fulltext', 'thumbnail']:
continue
projection = self._service.file_schema.dump(
entry,
context=dict(
identity=self._identity,
),
)
if self._links_item_tpl:
projection["links"] = self._links_item_tpl.expand(
self._identity, entry)

yield projection
13 changes: 12 additions & 1 deletion rero_ils/modules/files/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"""Files support for the RERO invenio instances."""

from invenio_records_resources.services.records.schema import BaseRecordSchema
from marshmallow import Schema, fields
from marshmallow import Schema, fields, pre_load
from marshmallow_utils.fields import SanitizedUnicode


Expand All @@ -32,6 +32,17 @@ class MetadataSchema(Schema):
file_size = fields.Int(dump_only=True)
files = fields.Dict()

@pre_load
def remove_fields(self, data, **kwargs):
"""Removes computed fields.
:param data: Dict of record data.
:returns: Modified data.
"""
data.pop('n_files', None)
data.pop('file_size', None)
return data


class RecordSchema(BaseRecordSchema):
"""Service schema for subjects."""
Expand Down
12 changes: 8 additions & 4 deletions rero_ils/modules/files/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@

from .api import RecordWithFile
from .components import OperationLogsComponent, OperationLogsFileComponent, \
ReindexComponent
ReindexFileComponent, ReindexRecordComponent
from .dumpers import FileInformationDumperExt
from .permissions import FilePermissionPolicy
from .results import MainFileList
from .schemas import RecordSchema


Expand All @@ -41,7 +42,8 @@ class RecordServiceConfig(RecordServiceConfig):
permission_policy_cls = FilePermissionPolicy

# Service components
components = RecordServiceConfig.components + [OperationLogsComponent]
components = RecordServiceConfig.components \
+ [OperationLogsComponent, ReindexRecordComponent]

# Dumper for the indexer
index_dumper = SearchDumper(
Expand All @@ -58,14 +60,16 @@ class RecordFileServiceConfig(FileServiceConfig):
# Record class
record_cls = RecordWithFile

file_result_list_cls = MainFileList

# Common configuration
permission_policy_cls = FilePermissionPolicy

# maximum files per buckets
max_files_count = 1000
max_files_count = 1700

# Service components
components = FileServiceConfig.components + [
ReindexComponent,
ReindexFileComponent,
OperationLogsFileComponent,
]

0 comments on commit 71b1c47

Please sign in to comment.