Skip to content

Commit

Permalink
mef: corrects deleted
Browse files Browse the repository at this point in the history
* Corrects `deleted` creation for MEF records
  during updates.

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed May 15, 2024
1 parent 2ffc021 commit 033e214
Show file tree
Hide file tree
Showing 14 changed files with 499 additions and 504 deletions.
2 changes: 0 additions & 2 deletions classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ class ReroMefRecord{
persistent_identifier(self)
get_metadata_identifier_names(cls)
deleted(self)
mark_as_deleted(self, dbcommit=False, reindex=False)
}
class EntityMefRecord{
Expand Down
809 changes: 396 additions & 413 deletions poetry.lock

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion rero_mef/agents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,21 @@ def create_or_update_mef(self, dbcommit=False, reindex=False,
reindex=reindex
)
mef_actions[mef_record.pid] = Action.UPDATE
elif mef_record.set_deleted():
mef_record = mef_record.update(
data=mef_record,
dbcommit=dbcommit,
reindex=reindex
)
mef_actions[mef_record.pid] = Action.UPDATE
else:
if reindex:
mef_record.reindex()
mef_actions[mef_record.pid] = Action.UPTODATE
else:
# No MEF record create one.
mef_data = {self.name: {'$ref': ref_string}}
if self.deleted:
if self.deleted and not mef_data.get('deleted'):
mef_data['deleted'] = self.deleted
if viaf_records:
mef_data['viaf_pid'] = viaf_records[0].pid
Expand Down
4 changes: 1 addition & 3 deletions rero_mef/agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,4 @@ def get_agent_endpoints():
def get_agent_classes():
"""Get all agents classes."""
endpoints = get_agent_endpoints()
return [
get_entity_class(endpoint) for endpoint, _ in endpoints.items()
]
return [get_entity_class(endpoint) for endpoint, _ in endpoints.items()]
12 changes: 0 additions & 12 deletions rero_mef/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""API for manipulating records."""
from copy import deepcopy
from datetime import datetime, timezone
from enum import Enum
from uuid import uuid4

Expand Down Expand Up @@ -457,17 +456,6 @@ def deleted(self):
"""Get record deleted value."""
return self.get('deleted')

def mark_as_deleted(self, dbcommit=False, reindex=False):
"""Mark record as deleted.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Modified record.
"""
self['deleted'] = datetime.now(timezone.utc).isoformat()
self.update(data=self, dbcommit=dbcommit, reindex=reindex)
return self


class ReroIndexer(RecordIndexer):
"""Indexing class for mef."""
Expand Down
44 changes: 41 additions & 3 deletions rero_mef/api_mef.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

"""API for manipulating MEF records."""

from datetime import datetime

from copy import deepcopy
from datetime import datetime, timezone

import click
from dateutil import parser
Expand All @@ -40,6 +42,42 @@ class EntityMefRecord(ReroMefRecord):
search = None
mef_type = ''

def set_deleted(self):
"""Set deleted.
Sets MEF deleted value from sources.
"""
changed = False
source_data = deepcopy(self).replace_refs()
if sources := source_data['sources']:
for source in sources:
if deleted := self[source].get('deleted'):
self['deleted'] = deleted
changed = True
break
if not changed and self.get('deleted'):
# Delete old deleted data
self.pop('deleted')
changed = True
return changed

def update(self, data, commit=False, dbcommit=False, reindex=False):
"""Update data for record.
:param data: a dict data to update the record.
:param commit: if True push the db transaction.
:param dbcommit: make the change effective in db.
:param reindex: reindex the record.
:returns: the modified record
"""
self.set_deleted()
return super().update(
data=data,
commit=commit,
dbcommit=dbcommit,
reindex=reindex
)

@classmethod
def get_mef(cls, agent_pid, agent_name, pid_only=False):
"""Get MEF record by agent pid value.
Expand Down Expand Up @@ -89,7 +127,7 @@ def get_all_pids_without_viaf(cls):
:returns: Generator of MEF pids without VIAF pid.
"""
query = cls.search() \
.filter('bool', must_not=[Q('exists', field="viaf_pid")])
.exclude('exists', field="viaf_pid")
for pid_type in current_app.config.get(cls.mef_type, []):
query = query \
.filter('bool', should=[Q('exists', field=pid_type)])
Expand Down Expand Up @@ -128,7 +166,7 @@ def get_multiple_missing_pids(cls, record_types=None, verbose=False):
f'Record type not found: {record_type}')

# Get all pids from MEF
date = datetime.utcnow()
date = datetime.now(timezone.utc)
click.echo('Get mef')
progress = progressbar(
items=cls.search()
Expand Down
5 changes: 2 additions & 3 deletions rero_mef/concepts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ def create_or_update_mef(self, dbcommit=False, reindex=False):
if mef_records := ConceptMefRecord.get_mef(self.pid, self.name):
mef_data = mef_records[0]

if self.deleted and not mef_data.get('deleted'):
mef_data['deleted'] = self.deleted

ref_string = build_ref_string(
concept=self.name,
concept_pid=self.pid
Expand All @@ -82,6 +79,8 @@ def create_or_update_mef(self, dbcommit=False, reindex=False):
reindex=reindex
)
else:
if self.deleted and not mef_data.get('deleted'):
mef_data['deleted'] = self.deleted
mef_action = Action.CREATE
mef_record = ConceptMefRecord.create(
data=mef_data,
Expand Down
5 changes: 3 additions & 2 deletions rero_mef/monitoring/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

"""Monitoring utilities."""

from datetime import datetime, timedelta

from datetime import datetime, timedelta, timezone

import click
from elasticsearch.exceptions import NotFoundError
Expand Down Expand Up @@ -110,7 +111,7 @@ def get_es_db_missing_pids(self, doc_type, with_deleted=False,
pids_es = []
pids_db = []
if index:
date = datetime.utcnow()
date = datetime.now(timezone.utc)
pids_es = {}
query = RecordsSearch(index=index) \
.filter('range', _created={'lte': date})
Expand Down
5 changes: 2 additions & 3 deletions rero_mef/places/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ def create_or_update_mef(self, dbcommit=False, reindex=False):
if mef_records := PlaceMefRecord.get_mef(self.pid, self.name):
mef_data = mef_records[0]

if self.deleted and not mef_data.get('deleted'):
mef_data['deleted'] = self.deleted

ref_string = build_ref_string(
place=self.name,
place_pid=self.pid
Expand All @@ -82,6 +79,8 @@ def create_or_update_mef(self, dbcommit=False, reindex=False):
reindex=reindex
)
else:
if self.deleted and not mef_data.get('deleted'):
mef_data['deleted'] = self.deleted
mef_action = Action.CREATE
mef_record = PlaceMefRecord.create(
data=mef_data,
Expand Down
7 changes: 3 additions & 4 deletions rero_mef/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ def _default_parser(qstr=None):
type=lambda v: v.lower() in ['true', '1']
)
if not with_deleted:
search = search.filter('bool', must_not=[
Q('exists', field='deleted'), # no deleted MEF's
Q('exists', field='*.deleted') # no deleted entities
])
search = search \
.exclude('exists', field='deleted') \
.exclude('exists', field='*.deleted')

return search, urlkwargs
49 changes: 23 additions & 26 deletions scripts/test
Original file line number Diff line number Diff line change
Expand Up @@ -63,44 +63,41 @@ if [[ -z "${VIRTUAL_ENV}" ]]; then
fi

set -e
# -> Vulnerability found in flask-caching version 2.0.1
# Vulnerability ID: 40459
# -> Vulnerability found in flask-security version 3.0.0
# Vulnerability ID: 45183
# -> Vulnerability found in flask-security version 3.0.0
# Vulnerability ID: 44501
# CVE-2021-23385, an open redirect vulnerability: When using the...
# -> Vulnerability found in sqlalchemy version 1.4.50
# Vulnerability ID: 51668
# -> Vulnerability found in sqlalchemy-utils version 0.38.3
# Vulnerability ID: 42194
# -> Vulnerability found in wtforms version 2.3.3
# Vulnerability ID: 42852
# -> Vulnerability found in werkzeug version 2.2.3
# Vulnerability ID: 62019
# -> Vulnerability found in pip version 23.2.1
# Vulnerability ID: 62044
# -> Vulnerability found in py version 1.11.0
# Vulnerability ID: 51457
# For more information about this vulnerability, visit
# To ignore this vulnerability, use PyUp vulnerability id 51457 in safety’s
# -> Vulnerability found in pip version 24.0
# Vulnerability ID: 67599
# For more information about this vulnerability, visit
# To ignore this vulnerability, use PyUp vulnerability id 67599 in safety’s
# -> Vulnerability found in werkzeug version 2.2.3
# Vulnerability ID: 62019
# For more information about this vulnerability, visit
# To ignore this vulnerability, use PyUp vulnerability id 62019 in safety’s
# -> Vulnerability found in wtforms version 2.3.3
# Vulnerability ID: 42852
# For more information about this vulnerability, visit
# To ignore this vulnerability, use PyUp vulnerability id 42852 in safety’s
# -> Vulnerability found in sqlalchemy-utils version 0.38.3
# Vulnerability ID: 42194
# For more information about this vulnerability, visit
# To ignore this vulnerability, use PyUp vulnerability id 42194 in safety’s
# -> Vulnerability found in sqlalchemy version 1.4.52
# Vulnerability ID: 51668
info_msg "Check vulnerabilities:"
safety_exceptions="-i 40459 -i 45183 -i 44501 -i 51668 -i 42194 -i 42852 -i 62019 -i 62044 -i 51457"
safety_exceptions="-i 51457 -i 67599 -i 62019 -i 42852 -i 42194 -i 51668"
msg=$(safety check -o text ${safety_exceptions}) || {
echo "Safety vulnerabilites found for packages:" $(safety check -o bare ${safety_exceptions})
echo "Run:" "safety check -o screen ${safety_exceptions} | grep -i vulnerability" "for more details"
echo "Run: \"safety check -o screen ${safety_exceptions} | grep -i vulnerability\" for more details"
exit 1
}
info_msg "Test pydocstyle:"
pydocstyle rero_mef tests docs
info_msg "Test isort:"
isort --check-only --diff "${SCRIPT_PATH}"
info_msg "Test useless imports:"
autoflake --quiet --check --recursive --remove-all-unused-imports --ignore-init-module-imports . &> /dev/null || {
autoflake --recursive --remove-all-unused-imports --ignore-init-module-imports .
exit 1
}
# info_msg "Check-manifest:"
# TODO: check if this is required when rero-ils will be published
# check-manifest --ignore ".travis-*,docs/_build*"
autoflake --recursive --remove-all-unused-imports --ignore-init-module-imports --check-diff --quiet .
info_msg "Sphinx-build:"
sphinx-build -qnNW docs docs/_build/html
info_msg "Tests:"
Expand Down
22 changes: 20 additions & 2 deletions tests/api/test_agents_gnd_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from flask import url_for

from rero_mef.agents import Action, AgentGndRecord
from rero_mef.agents import Action, AgentGndRecord, AgentMefRecord


def test_view_agents_gnd(client, agent_gnd_record):
Expand Down Expand Up @@ -63,7 +63,7 @@ def test_save_deleted_data(client, agent_gnd_record, agent_gnd_data):
data = deepcopy(agent_gnd_data)
data = {
'deleted': '2022-01-31T10:44:22.552001+00:00',
'pid': agent_gnd_record.pid,
'pid': pid,
'relation_pid': {
'type': 'redirect_to',
'value': '1134995709'
Expand All @@ -79,3 +79,21 @@ def test_save_deleted_data(client, agent_gnd_record, agent_gnd_data):
assert action == Action.UPDATE
assert record['deleted'] == data['deleted']
assert record['relation_pid'] == data['relation_pid']

mef_record, mef_actions = record.create_or_update_mef(
dbcommit=True,
reindex=True
)
assert mef_actions == {'1': Action.CREATE}
assert mef_record.deleted
assert mef_record['deleted'] == record['deleted']

record = AgentGndRecord.get_record_by_pid(pid)
record.pop('deleted')
record.update(data=record, dbcommit=True, reindex=True)
mef_record, mef_actions = record.create_or_update_mef(
dbcommit=True,
reindex=True
)
mef_record = AgentMefRecord.get_record_by_pid(mef_record.pid)
assert 'deleted' not in mef_record
27 changes: 0 additions & 27 deletions tests/api/test_agents_mef_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,30 +255,3 @@ def utf_isoformat(date):
}, {
'pid': '4'
}]

mef_pid2 = agent_mef_idref_redirect_record.mark_as_deleted(
dbcommit=True,
reindex=True
)
agent_mef_gnd_redirect_record.delete(dbcommit=True, delindex=True)
AgentMefRecord.flush_indexes()
res, data = postdata(
client,
'api_blueprint.agent_mef_get_updated',
{"pids": ['1', '2', '3', '4']}
)
assert res.status_code == 200
assert data == [{
'pid': agent_mef_record.pid,
'_created': utf_isoformat(agent_mef_record.created),
'_updated': utf_isoformat(agent_mef_record.updated)
}, {
'pid': agent_mef_idref_redirect_record.pid,
'_created': utf_isoformat(agent_mef_idref_redirect_record.created),
'_updated': utf_isoformat(agent_mef_idref_redirect_record.updated),
'deleted': agent_mef_idref_redirect_record.get('deleted')
}, {
'pid': agent_mef_gnd_redirect_record.pid
}, {
'pid': '4'
}]
3 changes: 0 additions & 3 deletions tests/ui/agents/test_agents_mef_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,3 @@ def test_get_multiple_missing_pids(app, agent_mef_data, agent_viaf_record):
}
assert missing_pids == {'aggnd': [], 'agrero': [], 'aidref': []}
assert none_pids == {'aggnd': [], 'agrero': [], 'aidref': []}

m_record_2.mark_as_deleted(dbcommit=True, reindex=True)
assert m_record_2.deleted is not None

0 comments on commit 033e214

Please sign in to comment.