Skip to content

Commit

Permalink
Merge pull request #18 from oarepo/develop
Browse files Browse the repository at this point in the history
Added feature to synchronize particular records
  • Loading branch information
Semtexcz committed Dec 7, 2020
2 parents e433214 + b107f9c commit 2b18fd2
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 19 deletions.
39 changes: 27 additions & 12 deletions oarepo_oai_pmh_harvester/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,36 @@ def oai():
@click.option("-i", "--start_id", default=0, type=int,
help="The serial number from which the synchronization starts. This is useful if "
"for some reason the previous synchronization was interrupted at some point.")
@click.option("-a", "--oai", default=None, type=str, multiple=True,
help="OAI identifier that will be fetched and synchronized. The field is "
"repeatable. If this option is used, the provider and synchronizer must be "
"specified and "
"star_id or start_oai must not be used")
@cli.with_appcontext
def run(provider, synchronizer, break_on_error, start_oai, start_id):
def run(provider, synchronizer, break_on_error, start_oai, start_id, oai):
"""
Starts harvesting the resources set in invenio.cfg through the OAREPO_OAI_PROVIDERS
environment variable.
"""
if not provider:
provider = None
l = len(oai)
if l > 0 and provider and synchronizer and not start_oai and not start_id:
assert len(provider) <= 1, "OAI option is only for one provider and synchronizer"
assert len(synchronizer) <= 1, "OAI option is only for one provider and synchronizer"
provider = provider[0]
synchronizer = synchronizer[0]
current_oai_client.run_synchronizer_by_ids(list(oai), provider, synchronizer,
break_on_error=break_on_error)
else:
provider = list(provider)
if not synchronizer:
synchronizer = None
else:
synchronizer = list(synchronizer)
current_oai_client.run(providers_codes=provider, synchronizers_codes=synchronizer,
break_on_error=break_on_error, start_oai=start_oai, start_id=start_id)

# TODO: použít minter/nepoužít minter
assert l == 0, " If OAI option is used, the provider and synchronizer must be " \
"specified and star_id or start_oai must not be used"
if not provider:
provider = None
else:
provider = list(provider)
if not synchronizer:
synchronizer = None
else:
synchronizer = list(synchronizer)
current_oai_client.run(providers_codes=provider, synchronizers_codes=synchronizer,
break_on_error=break_on_error, start_oai=start_oai,
start_id=start_id)
8 changes: 7 additions & 1 deletion oarepo_oai_pmh_harvester/ext.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import defaultdict
from typing import List
from typing import List, Union

from pkg_resources import iter_entry_points

Expand Down Expand Up @@ -216,6 +216,12 @@ def _run_synchronizer(self, provider: str, synchronizer: str, start_oai: str = N
synchronizer = provider.synchronizers[synchronizer]
synchronizer.run(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)

def run_synchronizer_by_ids(self, oai_id: Union[str, List[str]], provider: str,
synchronizer: str, break_on_error: bool = True):
provider = self.providers[provider]
synchronizer = provider.synchronizers[synchronizer]
synchronizer.run(break_on_error=break_on_error, oai_id=oai_id)


class OArepoOAIClient:

Expand Down
22 changes: 17 additions & 5 deletions oarepo_oai_pmh_harvester/synchronization.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import logging
import traceback
import uuid
from itertools import islice
from typing import Callable, List
from typing import Callable, List, Union

import arrow
from arrow import Arrow
Expand Down Expand Up @@ -101,7 +100,8 @@ def from_(self, value):
else:
self._from = None

def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = True):
def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = True,
oai_id: Union[str, List[str]] = None):
"""
:return:
Expand All @@ -116,8 +116,20 @@ def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = T
db.session.add(self.oai_sync)
db.session.commit()
try:
self.synchronize(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)
self.update_oai_sync("ok")
if oai_id:
if isinstance(oai_id, str):
oai_ids = [oai_id]
elif isinstance(oai_id, list):
oai_ids = oai_id
else:
raise Exception("OAI identifier must be string or list of strings")
identifiers = self._get_oai_identifiers(identifiers_list=oai_ids)
for idx, identifier in enumerate(identifiers, start=start_id):
self.record_handling(idx, start_oai, break_on_error, identifier)
self.update_oai_sync("ok")
else:
self.synchronize(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)
self.update_oai_sync("ok")
except:
self.update_oai_sync("failed")
raise
Expand Down
2 changes: 1 addition & 1 deletion oarepo_oai_pmh_harvester/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@

from __future__ import absolute_import, print_function

__version__ = '2.0.0a15'
__version__ = '2.0.0a16'
9 changes: 9 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@ def test_run(load_entry_points, app, db):
result = runner.invoke(run, ["-p", "uk"])
assert result.exit_code == 0
patch.stop()


def test_run_2(load_entry_points, app, db):
patch = mock.patch('sickle.app.Sickle.harvest', mock_harvest)
patch.start()
runner = app.test_cli_runner()
result = runner.invoke(run, ["-p", "uk", "-s", "xoai", "-a", "oai:test.example.com:1996652"])
assert result.exit_code == 0
patch.stop()
16 changes: 16 additions & 0 deletions tests/test_synchronization.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,19 @@ def test_run_2(self, load_entry_points, app, db, record_xml):
assert oai_rec.pid == "1"
record = Record.get_record(id_=oai_rec.id)
assert record["title"] == "Testovací záznam"

def test_run_by_id(self, load_entry_points, app, db, record_xml):
patch = mock.patch('sickle.app.Sickle.harvest', mock_harvest)
synchronizer = current_oai_client.providers["uk"].synchronizers["xoai"]
synchronizer.bulk = False
patch.start()
synchronizer.run(oai_id=["oai:test.example.com:1996652"])
patch.stop()

oai_sync = OAISync.query.get(1)
assert oai_sync.status == "ok"
assert oai_sync.records_created == 1
oai_rec = OAIRecord.query.all()[-1]
assert oai_rec.pid == "1"
record = Record.get_record(id_=oai_rec.id)
assert record["title"] == "Testovací záznam"

0 comments on commit 2b18fd2

Please sign in to comment.