Skip to content

Commit

Permalink
Add report output for check_refint.
Browse files Browse the repository at this point in the history
  • Loading branch information
Stefano Cossu committed Apr 14, 2018
1 parent ea4f4fd commit 84fa563
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 42 deletions.
45 changes: 25 additions & 20 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,48 @@
from shutil import rmtree
from tempfile import gettempdir

from lakesuperior import env_setup, env
from lakesuperior.app import create_app
from lakesuperior import env
from lakesuperior.config_parser import parse_config
from lakesuperior.globals import AppGlobals
from lakesuperior.util.generators import random_image


# Override data directory locations.
config = parse_config()
data_dir = path.join(gettempdir(), 'lsup_test', 'data')
config['application']['data_dir'] = data_dir
config['application']['store']['ldp_nr']['location'] = (
path.join(data_dir, 'ldpnr_store'))
config['application']['store']['ldp_rs']['location'] = (
path.join(data_dir, 'ldprs_store'))

env.app_globals = AppGlobals(config)
from lakesuperior.app import create_app


@pytest.fixture(scope='module')
def app():
# Override data directory locations.
data_dir = path.join(gettempdir(), 'lsup_test', 'data')
makedirs(data_dir, exist_ok=True)
env.app_globals.config['application']['data_dir'] = data_dir
env.app_globals.config['application']['store']['ldp_nr']['location'] = (
path.join(data_dir, 'ldpnr_store'))
env.app_globals.config['application']['store']['ldp_rs']['location'] = (
path.join(data_dir, 'ldprs_store'))
app = create_app(env.app_globals.config['application'])

yield app

# TODO improve this by using tempfile.TemporaryDirectory as a context
# manager.
print('Removing fixture data directory.')
rmtree(data_dir)


@pytest.fixture(scope='module')
def db(app):
'''
Set up and tear down test triplestore.
'''
rdfly = env.app_globals.rdfly
rdfly.bootstrap()
makedirs(data_dir, exist_ok=True)
env.app_globals.rdfly.bootstrap()
env.app_globals.nonrdfly.bootstrap()
print('Initialized data store.')

yield rdfly
yield env.app_globals.rdfly

print('Tearing down fixture graph store.')
rdfly.store.destroy(rdfly.store.path)
# TODO improve this by using tempfile.TemporaryDirectory as a context
# manager.
print('Removing fixture data directory.')
rmtree(data_dir)


@pytest.fixture
Expand Down
9 changes: 2 additions & 7 deletions lakesuperior/api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from lakesuperior import env
from lakesuperior.config_parser import parse_config
from lakesuperior.globals import AppGlobals
from lakesuperior.migrator import Migrator
from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
Expand Down Expand Up @@ -47,16 +46,12 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)


def integrity_check(config_dir=None):
def integrity_check():
"""
Check integrity of the data set.
At the moment this is limited to referential integrity. Other checks can
be added and triggered by different argument flags.
"""
if config_dir:
env.app_globals = AppGlobals(parse_config(config_dir))
else:
import lakesuperior.env_setup
with TxnManager(env.app_globals.rdfly.store):
return { t for t in env.app_globals.rdfly.find_refint_violations()}
return set(env.app_globals.rdfly.find_refint_violations())
2 changes: 1 addition & 1 deletion lakesuperior/env_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Import this module to initialize the configuration for a production setup::
>>> from lakesuperior import env_setup
>>> import lakesuperior.env_setup
Will load the default configuration.
"""
Expand Down
50 changes: 36 additions & 14 deletions lakesuperior/lsup_admin.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import click
import click_log
import csv
import json
import logging
import os
import sys

from os import getcwd, path

import arrow

from lakesuperior import env
from lakesuperior.api import admin as admin_api
from lakesuperior.config_parser import config
from lakesuperior.globals import AppGlobals
from lakesuperior.store.ldp_rs.lmdb_store import TxnManager

__doc__="""
Expand All @@ -23,12 +28,6 @@
logger = logging.getLogger(__name__)
click_log.basic_config(logger)

#report = logging.getLogger('report')
#report_formatter = logging.Formatter('"%(asctime)s",%(message)s')
#report_fpath = '{}/lsup-report-{}'.format(
# env.config['application']['data_dir'],
# arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S'))
#report_handler = logging.FileHandler(report_fpath)

@click.group()
def admin():
Expand Down Expand Up @@ -69,7 +68,8 @@ def bootstrap():
click.echo('Initializing binary store at {}'.format(nonrdfly.root))
nonrdfly.bootstrap()
click.echo('Binary store initialized.')
click.echo('Repository successfully set up. Go to town.')
click.echo('\nRepository successfully set up. Go to town.')
click.echo('If the HTTP server is running, it must be restarted.')


@click.command()
Expand Down Expand Up @@ -117,19 +117,41 @@ def check_refint(config_folder=None, output=None):
resources. For repositories set up with the `referential_integrity` option
(the default), this is a pre-condition for a consistent data set.
Note: this check is run regardless of whether the repository enforces
If inconsistencies are found, a report is generated in CSV format with the
following columns: `s`, `p`, `o` (respectively the terms of the
triple containing the dangling relationship) and `missing` which
indicates which term is the missing URI (currently always set to `o`).
Note: this check can be run regardless of whether the repository enforces
referential integrity.
"""
check_results = admin_api.integrity_check(config_folder)
if config_folder:
env.app_globals = AppGlobals(parse_config(config_dir))
else:
import lakesuperior.env_setup

check_results = admin_api.integrity_check()

click.echo('Integrity check results:')
if len(check_results):
click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
click.echo('Missing object in the following triples:')
for trp in check_results:
click.echo(' '.join([str(t) for t in trp[0]]))
if not output:
output = path.join(getcwd(), 'refint_report-{}.csv'.format(
arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S')))
elif not output.endswith('.csv'):
output += '.csv'

with open(output, 'w', newline='') as fh:
writer = csv.writer(fh)
writer.writerow(('s', 'p', 'o', 'missing'))
for trp in check_results:
# ``o`` is always hardcoded for now.
writer.writerow([t.n3() for t in trp[0]] + ['o'])

click.echo('Report generated at {}'.format(output))
else:
click.echo(click.style('Clean. ', fg='green', bold=True)
+ 'No inconsistency found.')
+ 'No inconsistency found. No report generated.')


@click.command()
Expand Down
1 change: 1 addition & 0 deletions lakesuperior/store/ldp_rs/rsrc_centric_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ def find_refint_violations(self):
:rtype: set
:return: Triples referencing a repository URI that is not a resource.
"""
#import pdb; pdb.set_trace()
for i, obj in enumerate(self.store.all_terms('o'), start=1):
if (
isinstance(obj, URIRef)
Expand Down

0 comments on commit 84fa563

Please sign in to comment.