Skip to content

Commit

Permalink
Merge be6a3c5 into 3e1cc6b
Browse files Browse the repository at this point in the history
  • Loading branch information
Semtexcz committed Nov 10, 2020
2 parents 3e1cc6b + be6a3c5 commit 86b2d2d
Show file tree
Hide file tree
Showing 12 changed files with 184 additions and 167 deletions.
1 change: 0 additions & 1 deletion oarepo_oai_pmh_harvester/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# TODO: dotesat uživatelské rozhraní
import click
from flask import cli

Expand Down
41 changes: 18 additions & 23 deletions oarepo_oai_pmh_harvester/ext.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from collections import defaultdict
from typing import List

from invenio_db import db
from pkg_resources import iter_entry_points
from sqlalchemy import inspect

from oarepo_oai_pmh_harvester.transformer import OAITransformer
from oarepo_oai_pmh_harvester.utils import infinite_dd
from . import config
from .models import OAIProvider
from .provider import OAIProvider
from .synchronization import OAISynchronizer
from .views import oai_client_blueprint


class Singleton(type):
Expand Down Expand Up @@ -37,13 +36,8 @@ def __init__(self, app, _rules: defaultdict = None, _parsers: defaultdict = None

@property
def providers(self):
# db.session.refresh()
if self._providers is None:
self.create_providers()
# TODO: Codereview - udělat lépe kontrolu jestli je v db.
state = [inspect(provider).persistent for provider in self._providers.values()]
if not all(state):
self.create_providers()
return self._providers

@property
Expand Down Expand Up @@ -79,23 +73,17 @@ def create_providers(self):
providers = self.app.config.get("OAREPO_OAI_PROVIDERS")
if providers:
for k, v in providers.items():
provider = OAIProvider.query.filter_by(code=k).one_or_none()
if provider:
continue
provider = OAIProvider(
code=k,
description=v.get("description"),
)
db.session.add(provider)
db.session.commit()
provider._synchronizers = {}
) # vytvořím providera
provider.synchronizers = {}
for sync_config in v.get("synchronizers", []):
synchronizer = self.create_synchronizer(provider.code, sync_config, provider.id)
provider._synchronizers[sync_config["name"]] = synchronizer
synchronizer = self.create_synchronizer(provider.code, sync_config)
provider.synchronizers[sync_config["name"]] = synchronizer
if not self._providers:
self._providers = {}
self._providers.setdefault(k, provider)
db.session.commit()

def rule(self, provider, parser, path, phase=OAITransformer.PHASE_PRE):
def wrapper(func):
Expand All @@ -119,9 +107,9 @@ def add_parser(self, func, name):
self._parsers = infinite_dd()
self._parsers[name] = func

def create_synchronizer(self, provider_code, config, provider_id):
def create_synchronizer(self, provider_code, config):
return OAISynchronizer(
provider_id=provider_id,
provider_code=provider_code,
metadata_prefix=config["metadata_prefix"],
set_=config["set"],
constant_fields=config.get("constant_field", {}),
Expand All @@ -146,7 +134,7 @@ def run(self, providers_codes: List[str] = None, synchronizers_codes: List[str]
elif len(providers_codes) == 1:
if not synchronizers_codes:
synchronizers_codes = [_ for _ in
self.providers[providers_codes[0]]._synchronizers.keys()]
self.providers[providers_codes[0]].synchronizers.keys()]
if len(synchronizers_codes) > 1:
for code in synchronizers_codes:
self._run_synchronizer(providers_codes[0], code, break_on_error=break_on_error)
Expand All @@ -169,13 +157,13 @@ def run(self, providers_codes: List[str] = None, synchronizers_codes: List[str]

def _run_provider(self, provider: str, break_on_error: bool = True):
provider_ = self.providers[provider]
for synchronizer in provider_._synchronizers.keys():
for synchronizer in provider_.synchronizers.keys():
self._run_synchronizer(provider, synchronizer, break_on_error=break_on_error)

def _run_synchronizer(self, provider: str, synchronizer: str, start_oai: str = None,
start_id: int = 0, break_on_error: bool = True):
provider = self.providers[provider]
synchronizer = provider._synchronizers[synchronizer]
synchronizer = provider.synchronizers[synchronizer]
synchronizer.run(start_oai=start_oai, start_id=start_id, break_on_error=break_on_error)


Expand All @@ -188,6 +176,13 @@ def __init__(self, app=None):

def init_app(self, app):
self.init_config(app)

# register blueprint
prefix = app.config.get('OAREPO_OAI_CLIENT_URL_PREFIX', "/oai-client")
if prefix.startswith('/api'):
prefix = prefix[4:]
app.register_blueprint(oai_client_blueprint, url_prefix=prefix)

app.extensions['oarepo-oai-client'] = OArepoOAIClientState(app)

def init_config(self, app):
Expand Down
129 changes: 58 additions & 71 deletions oarepo_oai_pmh_harvester/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import uuid

from invenio_db import db
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import backref, relationship
from sqlalchemy.sql.schema import ForeignKey
from sqlalchemy_utils import UUIDType, JSONType
from sqlalchemy_utils import UUIDType


class OAIRecord(db.Model):
Expand All @@ -26,17 +25,17 @@ class OAIRecord(db.Model):
nullable=False
)
last_sync_id = db.Column(
db.INTEGER(),
db.Integer(),
ForeignKey('oarepo_oai_sync.id'),
nullable=True
)
modification_sync_id = db.Column(
db.INTEGER(),
db.Integer(),
ForeignKey('oarepo_oai_sync.id'),
nullable=True
)
creation_sync_id = db.Column(
db.INTEGER(),
db.Integer(),
ForeignKey('oarepo_oai_sync.id'),
nullable=True
)
Expand All @@ -60,80 +59,68 @@ def get_record(cls, oai_identifier):
class OAISync(db.Model):
__tablename__ = "oarepo_oai_sync"
id = db.Column(db.Integer, primary_key=True)
provider_id = db.Column(db.Integer, ForeignKey('oarepo_oai_provider.id'))
provider_code = db.Column(db.String, nullable=False)
sync_start = db.Column(db.TIMESTAMP)
sync_end = db.Column(db.TIMESTAMP)
status = db.Column(db.String(32))
logs = db.Column(db.Text())

# number of created, modified and deleted records for statistics
rec_created = db.Column(db.Integer)
rec_modified = db.Column(db.Integer)
rec_deleted = db.Column(db.Integer)
provider = relationship(
"OAIProvider",
backref=backref("synchronizations")
)
traceback = relationship("OAIRecordExc", backref=backref("synchronizations"))


class OAIProvider(db.Model):
__tablename__ = "oarepo_oai_provider"
id = db.Column(db.Integer, primary_key=True)
code = db.Column(db.String(16), nullable=False, unique=True)
description = db.Column(db.String(2048), nullable=True)
synchronizers = relationship("OAISynchronizers", backref=backref("provider"))
records_created = db.Column(db.Integer)
records_modified = db.Column(db.Integer)
records_deleted = db.Column(db.Integer)
tracebacks = relationship("OAIRecordExc", backref=backref("synchronization"))


# TODO: spojit s OAISynchronizer v synchronization.py
class OAISynchronizers(db.Model):
__tablename__ = "oarepo_oai_synchronizers"
id = db.Column(db.Integer, primary_key=True)
provider_id = db.Column(db.Integer, ForeignKey('oarepo_oai_provider.id'))
oai_endpoint = db.Column(db.String(2048), nullable=False)
set_ = db.Column(db.String(256), name="set")
metadata_prefix = db.Column(db.String(32), default="oai_dc")
constant_fields = db.Column(
db.JSON().with_variant(
postgresql.JSONB(none_as_null=True),
'postgresql',
).with_variant(
JSONType(),
'sqlite',
).with_variant(
JSONType(),
'mysql',
),
default=lambda: dict(),
nullable=True
)
unhandled_paths = db.Column(
db.JSON().with_variant(
postgresql.JSONB(none_as_null=True),
'postgresql',
).with_variant(
JSONType(),
'sqlite',
).with_variant(
JSONType(),
'mysql',
),
nullable=True
)
default_endpoint = db.Column(db.String(), nullable=False)
endpoint_mapping = db.Column(
db.JSON().with_variant(
postgresql.JSONB(none_as_null=True),
'postgresql',
).with_variant(
JSONType(),
'sqlite',
).with_variant(
JSONType(),
'mysql',
),
nullable=True
)
# TODO: odstranit a udělat rest api z configu
# class OAISynchronizers(db.Model):
# __tablename__ = "oarepo_oai_synchronizers"
# id = db.Column(db.Integer, primary_key=True)
# provider_id = db.Column(db.Integer, ForeignKey('oarepo_oai_provider.id'))
# oai_endpoint = db.Column(db.String(2048), nullable=False)
# set_ = db.Column(db.String(256), name="set")
# metadata_prefix = db.Column(db.String(32), default="oai_dc")
# constant_fields = db.Column(
# db.JSON().with_variant(
# postgresql.JSONB(none_as_null=True),
# 'postgresql',
# ).with_variant(
# JSONType(),
# 'sqlite',
# ).with_variant(
# JSONType(),
# 'mysql',
# ),
# default=lambda: dict(),
# nullable=True
# )
# unhandled_paths = db.Column(
# db.JSON().with_variant(
# postgresql.JSONB(none_as_null=True),
# 'postgresql',
# ).with_variant(
# JSONType(),
# 'sqlite',
# ).with_variant(
# JSONType(),
# 'mysql',
# ),
# nullable=True
# )
# default_endpoint = db.Column(db.String(), nullable=False)
# endpoint_mapping = db.Column(
# db.JSON().with_variant(
# postgresql.JSONB(none_as_null=True),
# 'postgresql',
# ).with_variant(
# JSONType(),
# 'sqlite',
# ).with_variant(
# JSONType(),
# 'mysql',
# ),
# nullable=True
# )


class OAIRecordExc(db.Model):
Expand Down
17 changes: 17 additions & 0 deletions oarepo_oai_pmh_harvester/provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Dict

from oarepo_oai_pmh_harvester.synchronization import OAISynchronizer


class OAIProvider:
def __init__(self, code, description: str = None, synchronizers: Dict[str, OAISynchronizer] = None):
self.code = code
self.description = description
self.synchronizers = synchronizers

def __repr__(self):
return {
"code": self.code,
"description": self.description,
"synchronizers": self.synchronizers
}
12 changes: 6 additions & 6 deletions oarepo_oai_pmh_harvester/synchronization.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class OAISynchronizer:

def __init__(
self,
provider_id,
provider_code,
oai_endpoint,
metadata_prefix,
set_,
Expand All @@ -56,7 +56,7 @@ def __init__(
self.pid_field = current_app.config.get('PIDSTORE_RECID_FIELD', "recid")
else:
self.pid_field = pid_field
self.provider_id = provider_id
self.provider_code = provider_code
self.metadata_prefix = metadata_prefix
self.oai_endpoint = oai_endpoint
self.oai_sync = None
Expand All @@ -82,7 +82,7 @@ def run(self, start_oai: str = None, start_id: int = 0, break_on_error: bool = T
self.restart_counters()
with db.session.begin_nested():
self.oai_sync = OAISync(
provider_id=self.provider_id,
provider_code=self.provider_code, # TODO: nahradit provider.code
sync_start=arrow.utcnow().datetime, # datetime.datetime.utcnow(),
status="active")
db.session.add(self.oai_sync)
Expand All @@ -101,9 +101,9 @@ def update_oai_sync(self, status):
# self.oai_sync = db.session.merge(self.oai_sync)
self.oai_sync.status = status
self.oai_sync.sync_end = arrow.utcnow().datetime # datetime.datetime.utcnow()
self.oai_sync.rec_modified = self.modified
self.oai_sync.rec_created = self.created
self.oai_sync.rec_deleted = self.deleted
self.oai_sync.records_modified = self.modified
self.oai_sync.records_created = self.created
self.oai_sync.records_deleted = self.deleted
if status == "failed":
self.oai_sync.logs = traceback.format_exc()
db.session.add(self.oai_sync)
Expand Down
12 changes: 12 additions & 0 deletions oarepo_oai_pmh_harvester/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from flask import Blueprint, abort, current_app

oai_client_blueprint = Blueprint('oai_client', __name__)


@oai_client_blueprint.route("/providers", methods=["GET"])
def get_providers():
config = current_app.config.get("OAREPO_OAI_PROVIDERS")
if config:
return config
else:
abort(404, description="Resource not found")
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
'flask.commands': [
'oai = oarepo_oai_pmh_harvester.cli:oai'
],
'invenio_base.api_apps': [
'oarepo_oai_pmh_harvester = oarepo_oai_pmh_harvester.ext:OArepoOAIClient',
],
},
extras_require=extras_require,
install_requires=install_requires,
Expand Down
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from sqlalchemy_utils import database_exists, create_database

from oarepo_oai_pmh_harvester.ext import OArepoOAIClient
from oarepo_oai_pmh_harvester.views import oai_client_blueprint


class TestSchema(Schema):
Expand Down Expand Up @@ -129,6 +130,8 @@ def app():
InvenioPIDStore(app)
app.url_map.converters['pid'] = PIDConverter
OArepoOAIClient(app)
# app.register_blueprint(oai_client_blueprint, url_prefix="/oai-client")
print("\n\nURL MAP", app.url_map)

app_loaded.send(app, app=app)

Expand Down
Loading

0 comments on commit 86b2d2d

Please sign in to comment.