Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added feature to synchronize particular records #18

Merged
merged 2 commits into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions oarepo_oai_pmh_harvester/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,36 @@ def oai():
@click.option("-i", "--start_id", default=0, type=int,
help="The serial number from which the synchronization starts. This is useful if "
"for some reason the previous synchronization was interrupted at some point.")
@click.option("-a", "--oai", default=None, type=str, multiple=True,
help="OAI identifier that will be fetched and synchronized. The field is "
"repeatable. If this option is used, the provider and synchronizer must be "
"specified and "
"star_id or start_oai must not be used")
@cli.with_appcontext
def run(provider, synchronizer, break_on_error, start_oai, start_id):
def run(provider, synchronizer, break_on_error, start_oai, start_id, oai):
"""
Starts harvesting the resources set in invenio.cfg through the OAREPO_OAI_PROVIDERS
environment variable.
"""
if not provider:
provider = None
l = len(oai)
if l > 0 and provider and synchronizer and not start_oai and not start_id:
assert len(provider) <= 1, "OAI option is only for one provider and synchronizer"
assert len(synchronizer) <= 1, "OAI option is only for one provider and synchronizer"
provider = provider[0]
synchronizer = synchronizer[0]
current_oai_client.run_synchronizer_by_ids(list(oai), provider, synchronizer,
break_on_error=break_on_error)
else:
provider = list(provider)
if not synchronizer:
synchronizer = None
else:
synchronizer = list(synchronizer)
current_oai_client.run(providers_codes=provider, synchronizers_codes=synchronizer,
break_on_error=break_on_error, start_oai=start_oai, start_id=start_id)

# TODO: použít minter/nepoužít minter
assert l == 0, " If OAI option is used, the provider and synchronizer must be " \
"specified and star_id or start_oai must not be used"
if not provider:
provider = None
else:
provider = list(provider)
if not synchronizer:
synchronizer = None
else:
synchronizer = list(synchronizer)
current_oai_client.run(providers_codes=provider, synchronizers_codes=synchronizer,
break_on_error=break_on_error, start_oai=start_oai,
start_id=start_id)
8 changes: 7 additions & 1 deletion oarepo_oai_pmh_harvester/ext.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import defaultdict
from typing import List
from typing import List, Union

from pkg_resources import iter_entry_points

Expand Down Expand Up @@ -216,6 +216,12 @@ def _run_synchronizer(self, provider: str, synchronizer: str, start_oai: str = N
synchronizer = provider.synchronizers[synchronizer]
synchronizer.run(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)

def run_synchronizer_by_ids(self, oai_id: Union[str, List[str]], provider: str,
synchronizer: str, break_on_error: bool = True):
provider = self.providers[provider]
synchronizer = provider.synchronizers[synchronizer]
synchronizer.run(break_on_error=break_on_error, oai_id=oai_id)


class OArepoOAIClient:

Expand Down
22 changes: 17 additions & 5 deletions oarepo_oai_pmh_harvester/synchronization.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import logging
import traceback
import uuid
from itertools import islice
from typing import Callable, List
from typing import Callable, List, Union

import arrow
from arrow import Arrow
Expand Down Expand Up @@ -101,7 +100,8 @@ def from_(self, value):
else:
self._from = None

def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = True):
def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = True,
oai_id: Union[str, List[str]] = None):
"""

:return:
Expand All @@ -116,8 +116,20 @@ def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = T
db.session.add(self.oai_sync)
db.session.commit()
try:
self.synchronize(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)
self.update_oai_sync("ok")
if oai_id:
if isinstance(oai_id, str):
oai_ids = [oai_id]
elif isinstance(oai_id, list):
oai_ids = oai_id
else:
raise Exception("OAI identifier must be string or list of strings")
identifiers = self._get_oai_identifiers(identifiers_list=oai_ids)
for idx, identifier in enumerate(identifiers, start=start_id):
self.record_handling(idx, start_oai, break_on_error, identifier)
self.update_oai_sync("ok")
else:
self.synchronize(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)
self.update_oai_sync("ok")
except:
self.update_oai_sync("failed")
raise
Expand Down
2 changes: 1 addition & 1 deletion oarepo_oai_pmh_harvester/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@

from __future__ import absolute_import, print_function

__version__ = '2.0.0a15'
__version__ = '2.0.0a16'
9 changes: 9 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@ def test_run(load_entry_points, app, db):
result = runner.invoke(run, ["-p", "uk"])
assert result.exit_code == 0
patch.stop()


def test_run_2(load_entry_points, app, db):
patch = mock.patch('sickle.app.Sickle.harvest', mock_harvest)
patch.start()
runner = app.test_cli_runner()
result = runner.invoke(run, ["-p", "uk", "-s", "xoai", "-a", "oai:test.example.com:1996652"])
assert result.exit_code == 0
patch.stop()
16 changes: 16 additions & 0 deletions tests/test_synchronization.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,19 @@ def test_run_2(self, load_entry_points, app, db, record_xml):
assert oai_rec.pid == "1"
record = Record.get_record(id_=oai_rec.id)
assert record["title"] == "Testovací záznam"

def test_run_by_id(self, load_entry_points, app, db, record_xml):
patch = mock.patch('sickle.app.Sickle.harvest', mock_harvest)
synchronizer = current_oai_client.providers["uk"].synchronizers["xoai"]
synchronizer.bulk = False
patch.start()
synchronizer.run(oai_id=["oai:test.example.com:1996652"])
patch.stop()

oai_sync = OAISync.query.get(1)
assert oai_sync.status == "ok"
assert oai_sync.records_created == 1
oai_rec = OAIRecord.query.all()[-1]
assert oai_rec.pid == "1"
record = Record.get_record(id_=oai_rec.id)
assert record["title"] == "Testovací záznam"