diff --git a/.github/release_template.md b/.github/release_template.md new file mode 100644 index 0000000..b3093a8 --- /dev/null +++ b/.github/release_template.md @@ -0,0 +1,25 @@ +# What Changed In This Release + +[High-level, colloquial summary of changes—mandatory] + +## New Features + +- + +## Enhancements + +- + +## Bug Fixes + +- + +## Other Changes + +- + +## Notes & Caveats + +- + +## Acknowledgments diff --git a/.gitignore b/.gitignore index 089e1b7..869598a 100644 --- a/.gitignore +++ b/.gitignore @@ -104,5 +104,4 @@ venv.bak/ .mypy_cache/ # Default LAKEsuperior data directories -data/ldpnr_store -data/ldprs_store +/data diff --git a/.travis.yml b/.travis.yml index 8f19450..e48f397 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,5 +15,6 @@ deploy: on: tags: true branch: master + python: "3.5" distributions: "bdist_wheel" diff --git a/MANIFEST.in b/MANIFEST.in index 4d39a1f..f5e85be 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include README.rst include LICENSE +graft lakesuperior/data/bootstrap graft lakesuperior/endpoints/templates graft lakesuperior/etc.defaults diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..df8a612 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.0.0a13 diff --git a/conftest.py b/conftest.py index cade7b7..44ccc5d 100644 --- a/conftest.py +++ b/conftest.py @@ -1,22 +1,31 @@ -import sys - import pytest -sys.path.append('.') -from lakesuperior.config_parser import test_config +from os import makedirs, path +from shutil import rmtree +from tempfile import gettempdir + +from lakesuperior import env +from lakesuperior.config_parser import parse_config from lakesuperior.globals import AppGlobals -from lakesuperior.env import env +from lakesuperior.util.generators import random_image -env.config = test_config -env.app_globals = AppGlobals(test_config) + +# Override data directory locations. +config = parse_config() +data_dir = path.join(gettempdir(), 'lsup_test', 'data') +config['application']['data_dir'] = data_dir +config['application']['store']['ldp_nr']['location'] = ( + path.join(data_dir, 'ldpnr_store')) +config['application']['store']['ldp_rs']['location'] = ( + path.join(data_dir, 'ldprs_store')) + +env.app_globals = AppGlobals(config) from lakesuperior.app import create_app -from lakesuperior.util.generators import random_image -env.config = test_config @pytest.fixture(scope='module') def app(): - app = create_app(env.config['application']) + app = create_app(env.app_globals.config['application']) yield app @@ -26,14 +35,17 @@ def db(app): ''' Set up and tear down test triplestore. ''' - rdfly = env.app_globals.rdfly - rdfly.bootstrap() + makedirs(data_dir, exist_ok=True) + env.app_globals.rdfly.bootstrap() env.app_globals.nonrdfly.bootstrap() + print('Initialized data store.') - yield rdfly + yield env.app_globals.rdfly - print('Tearing down fixture graph store.') - rdfly.store.destroy(rdfly.store.path) + # TODO improve this by using tempfile.TemporaryDirectory as a context + # manager. + print('Removing fixture data directory.') + rmtree(data_dir) @pytest.fixture diff --git a/data/log/.keep b/data/log/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/data/run/.keep b/data/run/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/docs/apidoc/lakesuperior.rst b/docs/apidoc/lakesuperior.rst index 5cc0e32..0cbfcee 100644 --- a/docs/apidoc/lakesuperior.rst +++ b/docs/apidoc/lakesuperior.rst @@ -34,7 +34,7 @@ lakesuperior\.config\_parser module lakesuperior\.env module ------------------------ -.. automodule:: lakesuperior.env +.. automodule:: lakesuperior :members: :undoc-members: :show-inheritance: diff --git a/docs/cli.rst b/docs/cli.rst index 92b69ad..f1c799b 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1,13 +1,39 @@ -LAKEsuperior Command Line Reference -=================================== +Command Line Reference +====================== -The LAKEsuperior command line tool is used for maintenance and -administration purposes. +LAKEsuperior comes with some command-line tools aimed at several purposes. -The script should be in your executable path if you install LAKEsuperior with -``pip``. The tool is self-documented, so this is just a redundant overview:: +If LAKEsuperior is installed via ``pip``, all tools can be invoked as normal +commands (i.e. they are in the virtualenv ``PATH``). - $ lsup_admin +The tools are currently not directly available on Docker instances (*TODO add +instructions and/or code changes to access them*). + +``fcrepo`` +---------- + +This is the main server command. It has no parameters. The command spawns +Gunicorn workers (as many as set up in the configuration) and can be sent in +the background, or started via init script. + +The tool must be run in the same virtual environment LAKEsuperior +was installed in (if it was)—i.e.:: + + source /bin/activate + +must be run before running the server. + +In the case an init script is used, ``coilmq`` (belonging to a 3rd party +package) needs to be launched as well; unless a message broker is already set +up, or if messaging is disabled in the configuration. + +``lsup-admin`` +-------------- + +``lsup-admin`` is the principal repository management tool. It is +self-documented, so this is just a redundant overview:: + + $ lsup-admin Usage: lsup-admin [OPTIONS] COMMAND [ARGS]... Options: @@ -21,8 +47,6 @@ The script should be in your executable path if you install LAKEsuperior with migrate Migrate an LDP repository to LAKEsuperior. stats Print repository statistics. -*TODO: Add instructions to access from Docker.* - All entries marked ``[STUB]`` are not yet implemented, however the ``lsup_admin --help`` command will issue a description of what the command is meant to do. Check the @@ -31,3 +55,37 @@ the radar. All of the above commands are also available via, and based upon, the native Python API. + +``lsup-benchmark`` +------------------ + +``lsup-benchmark`` is used to run performance tests in a predictable way. + +The command has no options but prompts the user for a few settings +interactively (N.B. this may change in favor of parameters). + +The benchmark tool is able to create RDF sources, or non-RDF, or an equal mix +of them, via POST or PUT, in the currently running LAKEsuperior server. It +runs single-threaded. + +The RDF sources are randomly generated graphs of consistent size and +complexity. They include a mix of in-repository references, literals, and +external URIs. Each graph has 200 triples. + +The non-RDF sources are randomly generated 1024x1024 pixel PNG images. + +You are warmly encouraged to run the script and share the performance results ( +*TODO add template for posting results*). + +``profiler`` +------------ + +This command launches a single-threaded HTTP server (Flask) on port 5000 that +logs profiling information. This is useful for analyzing application +performance. + +For more information, consult the `Python profilers guide +`__. + +Do not launch this while a WSGI server (``fcrepo``) is already running, because +that also launches a Flask server on port 5000. diff --git a/docs/conf.py b/docs/conf.py index b4a2766..96edffe 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,13 +17,11 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os import sys +from os import path from unittest.mock import MagicMock -#sys.path.append(os.path.abspath('../')) - class MockModule(MagicMock): @classmethod def __getattr__(cls, name): @@ -37,6 +35,8 @@ def __getattr__(cls, name): import lakesuperior.env_setup +here = path.abspath(path.dirname(__file__)) + # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. @@ -74,10 +74,9 @@ def __getattr__(cls, name): # |version| and |release|, also used in various other places throughout the # built documents. # -# The short X.Y version. -version = '1.0-alpha' -# The full version, including alpha/beta/rc tags. -release = '1.0.0-alpha.8' +# Version and release are the same. +with open(path.realpath(path.join(here, '..', 'VERSION'))) as fh: + version = release = fh.readlines()[0] # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/setup.rst b/docs/setup.rst index e0d9a0a..9d83519 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -79,9 +79,14 @@ Configuration The app should run for testing and evaluation purposes without any further configuration. All the application data are stored by default in -the ``data`` directory. +the ``data`` directory of the Python package. -To change the default configuration you should: +This setup is not recommended for anything more than a quick look at the +application. If more complex interaction is needed, or upgrades to the package +are foreseen, it is strongly advised to set up proper locations for +configuration and data. + +To change the default configuration you need to: #. Copy the ``etc.default`` folder to a separate location #. Set the configuration folder location in the environment: @@ -94,10 +99,13 @@ To change the default configuration you should: The configuration options are documented in the files. -**Note:** ``test.yml`` must specify a different location for the graph -and for the binary stores than the default one, otherwise running a test -suite will destroy your main data store. The application will issue an -error message and refuse to start if these locations overlap. +One thing worth noting is that some locations can be specified as relative +paths. These paths will be relative to the ``data_dir`` location specified in +the ``application.yml`` file. + +If ``data_dir`` is empty, as it is in the default configuration, it defaults +to the ``data`` directory inside the Python package. This is the option that +one may want to change before anything else. Production deployment --------------------- diff --git a/docs/usage.rst b/docs/usage.rst index decb0d8..635b0b0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -123,11 +123,12 @@ Before using the API, either do:: Or, to specify an alternative configuration:: + >>> from lakesuperior import env >>> from lakesuperior.config_parser import parse_config >>> from lakesuperior.globals import AppGlobals - >>> env.config, test_config = parse_config('/my/custom/config_dir') + >>> config = parse_config('/my/custom/config_dir') Reading configuration at /my/custom/config_dir - >>> env.app_globals = AppGlobals(env.config) + >>> env.app_globals = AppGlobals(config) Create and replace resources ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/lakesuperior/__init__.py b/lakesuperior/__init__.py index e69de29..0885ecd 100644 --- a/lakesuperior/__init__.py +++ b/lakesuperior/__init__.py @@ -0,0 +1,49 @@ +import threading + +from os import path + +basedir = path.dirname(path.realpath(__file__)) +""" +Base directory for the module. + +This can be used by modules looking for configuration and data files to be +referenced or copied with a known path relative to the package root. + +:rtype: str +""" + +class Env: + pass + +env = Env() +""" +A pox on "globals are evil". + +All-purpose bucket for storing global variables. Different environments +(e.g. webapp, test suite) put the appropriate value in it. +The most important values to be stored are app_conf (either from +lakesuperior.config_parser.config or lakesuperior.config_parser.test_config) +and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals). + +e.g.:: + + >>> from lakesuperior.config_parser import config + >>> from lakesuperior.globals import AppGlobals + >>> from lakesuperior import env + >>> env.app_globals = AppGlobals(config) + +This is automated in non-test environments by importing +`lakesuperior.env_setup`. + +:rtype: Object +""" + +thread_env = threading.local() +""" +Thread-local environment. + +This is used to store thread-specific variables such as start/end request +timestamps. + +:rtype: threading.local +""" diff --git a/lakesuperior/api/admin.py b/lakesuperior/api/admin.py index d873db7..22b2094 100644 --- a/lakesuperior/api/admin.py +++ b/lakesuperior/api/admin.py @@ -1,8 +1,7 @@ import logging +from lakesuperior import env from lakesuperior.config_parser import parse_config -from lakesuperior.env import env -from lakesuperior.globals import AppGlobals from lakesuperior.migrator import Migrator from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout from lakesuperior.store.ldp_rs.lmdb_store import TxnManager @@ -47,17 +46,12 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs): return Migrator(src, dest, **kwargs).migrate(start_pts, list_file) -def integrity_check(config_dir=None): +def integrity_check(): """ Check integrity of the data set. At the moment this is limited to referential integrity. Other checks can be added and triggered by different argument flags. """ - if config_dir: - env.config = parse_config(config_dir)[0] - env.app_globals = AppGlobals(env.config) - else: - import lakesuperior.env_setup with TxnManager(env.app_globals.rdfly.store): - return { t for t in env.app_globals.rdfly.find_refint_violations()} + return set(env.app_globals.rdfly.find_refint_violations()) diff --git a/lakesuperior/api/query.py b/lakesuperior/api/query.py index f7c9623..d972d2b 100644 --- a/lakesuperior/api/query.py +++ b/lakesuperior/api/query.py @@ -2,9 +2,9 @@ from io import BytesIO +from lakesuperior import env from lakesuperior.dictionaries.namespaces import ns_collection as nsc from lakesuperior.dictionaries.namespaces import ns_mgr as nsm -from lakesuperior.env import env from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore, TxnManager diff --git a/lakesuperior/api/resource.py b/lakesuperior/api/resource.py index 10c7b4e..6beff8d 100644 --- a/lakesuperior/api/resource.py +++ b/lakesuperior/api/resource.py @@ -13,7 +13,7 @@ from lakesuperior.config_parser import config from lakesuperior.exceptions import ( InvalidResourceError, ResourceNotExistsError, TombstoneError) -from lakesuperior.env import env +from lakesuperior import env, thread_env from lakesuperior.globals import RES_DELETED, RES_UPDATED from lakesuperior.model.ldp_factory import LDP_NR_TYPE, LdpFactory from lakesuperior.store.ldp_rs.lmdb_store import TxnManager @@ -72,15 +72,16 @@ def _transaction_deco(fn): def _wrapper(*args, **kwargs): # Mark transaction begin timestamp. This is used for create and # update timestamps on resources. - env.timestamp = arrow.utcnow() - env.timestamp_term = Literal(env.timestamp, datatype=XSD.dateTime) + thread_env.timestamp = arrow.utcnow() + thread_env.timestamp_term = Literal( + thread_env.timestamp, datatype=XSD.dateTime) with TxnManager(env.app_globals.rdf_store, write=write) as txn: ret = fn(*args, **kwargs) if len(env.app_globals.changelog): job = Thread(target=_process_queue) job.start() - delattr(env, 'timestamp') - delattr(env, 'timestamp_term') + delattr(thread_env, 'timestamp') + delattr(thread_env, 'timestamp_term') return ret return _wrapper return _transaction_deco diff --git a/lakesuperior/config_parser.py b/lakesuperior/config_parser.py index 1b733f6..eff41b0 100644 --- a/lakesuperior/config_parser.py +++ b/lakesuperior/config_parser.py @@ -1,6 +1,6 @@ import sys -from os import path, environ +from os import chdir, environ, getcwd, path import hiyapyco import yaml @@ -8,8 +8,10 @@ import lakesuperior -default_config_dir = environ.get('FCREPO_CONFIG_DIR', path.dirname( - path.abspath(lakesuperior.__file__)) + '/etc.defaults') +default_config_dir = environ.get( + 'FCREPO_CONFIG_DIR', + path.join( + path.dirname(path.abspath(lakesuperior.__file__)), 'etc.defaults')) """ Default configuration directory. @@ -53,38 +55,30 @@ def parse_config(config_dir=None): print('Reading configuration at {}'.format(config_dir)) for cname in configs: - file = '{}/{}.yml'.format(config_dir , cname) + file = path.join(config_dir, '{}.yml'.format(cname)) with open(file, 'r') as stream: _config[cname] = yaml.load(stream, yaml.SafeLoader) - error_msg = ''' - ************** - ** WARNING! ** - ************** + if not _config['application']['data_dir']: + _config['application']['data_dir'] = path.join( + lakesuperior.basedir, 'data') - Your test {} store location is set to be the same as the production - location. This means that if you run a test suite, your live data may be - wiped clean! + data_dir = _config['application']['data_dir'] + _config['application']['store']['ldp_nr']['location'] = path.join( + data_dir, 'ldpnr_store') + _config['application']['store']['ldp_rs']['location'] = path.join( + data_dir, 'ldprs_store') + # If log handler file names are relative, they will be relative to the + # data dir. + oldwd = getcwd() + chdir(data_dir) + for handler in _config['logging']['handlers'].values(): + if 'filename' in handler: + handler['filename'] = path.realpath(handler['filename']) + chdir(oldwd) - Please review your configuration before starting. - ''' - - # Merge default and test configurations. - _test_config = {'application': hiyapyco.load( - config_dir + '/application.yml', - config_dir + '/test.yml', method=hiyapyco.METHOD_MERGE)} - - if _config['application']['store']['ldp_rs']['location'] \ - == _test_config['application']['store']['ldp_rs']['location']: - raise RuntimeError(error_msg.format('RDF')) - sys.exit() - - if _config['application']['store']['ldp_nr']['path'] \ - == _test_config['application']['store']['ldp_nr']['path']: - raise RuntimeError(error_msg.format('binary')) - sys.exit() - return _config, _test_config + return _config # Load default configuration. -config, test_config = parse_config() +config = parse_config() diff --git a/data/bootstrap/rsrc_centric_layout.sparql b/lakesuperior/data/bootstrap/rsrc_centric_layout.sparql similarity index 100% rename from data/bootstrap/rsrc_centric_layout.sparql rename to lakesuperior/data/bootstrap/rsrc_centric_layout.sparql diff --git a/lakesuperior/endpoints/ldp.py b/lakesuperior/endpoints/ldp.py index f3cdff4..1cebf03 100644 --- a/lakesuperior/endpoints/ldp.py +++ b/lakesuperior/endpoints/ldp.py @@ -251,7 +251,7 @@ def post_resource(parent_uid): hdr = {'Location' : uri} if mimetype and not is_rdf: - hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\ + hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="{0}"'\ .format(uri) out_headers.update(hdr) @@ -312,6 +312,8 @@ def put_resource(uid): @ldp.route('/', methods=['PATCH'], strict_slashes=False) +@ldp.route('/', defaults={'uid': '/'}, methods=['PATCH'], + strict_slashes=False) def patch_resource(uid, is_metadata=False): """ https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH diff --git a/lakesuperior/endpoints/main.py b/lakesuperior/endpoints/main.py index 158412b..e883397 100644 --- a/lakesuperior/endpoints/main.py +++ b/lakesuperior/endpoints/main.py @@ -1,7 +1,11 @@ import logging +from os import path + from flask import Blueprint, render_template +from lakesuperior import basedir + logger = logging.getLogger(__name__) # Blueprint for main pages. Not much here. @@ -14,7 +18,11 @@ @main.route('/', methods=['GET']) def index(): """Homepage.""" - return render_template('index.html') + version_fname = path.abspath( + path.join(path.dirname(basedir), 'VERSION')) + with open(version_fname) as fh: + version = fh.readlines()[0] + return render_template('index.html', version=version) @main.route('/debug', methods=['GET']) diff --git a/lakesuperior/endpoints/query.py b/lakesuperior/endpoints/query.py index e06e388..55143c8 100644 --- a/lakesuperior/endpoints/query.py +++ b/lakesuperior/endpoints/query.py @@ -3,7 +3,7 @@ from flask import Blueprint, current_app, request, render_template, send_file from rdflib.plugin import PluginException -from lakesuperior.env import env +from lakesuperior import env from lakesuperior.dictionaries.namespaces import ns_mgr as nsm from lakesuperior.api import query as query_api diff --git a/lakesuperior/endpoints/templates/index.html b/lakesuperior/endpoints/templates/index.html index 075539e..25f3852 100644 --- a/lakesuperior/endpoints/templates/index.html +++ b/lakesuperior/endpoints/templates/index.html @@ -1,6 +1,7 @@ {% extends 'base.html' %} {% block title %}LAKEsuperior{% endblock %} {% block content %} +

Version {{ version }}

Superior, they said, never gives up her dead
When the gales of November come early diff --git a/lakesuperior/env.py b/lakesuperior/env.py deleted file mode 100644 index b7b37d0..0000000 --- a/lakesuperior/env.py +++ /dev/null @@ -1,25 +0,0 @@ -import threading - -''' -Global bucket for switching configuration. Different environments -(e.g. webapp, test suite) put the appropriate value in it. -The most important values to be stored are app_conf (either from -lakesuperior.config_parser.config or lakesuperior.config_parser.test_config) -and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals). - -e.g.: - ->>> from lakesuperior.config_parser import config ->>> from lakesuperior.globals import AppGlobals ->>> from lakesuperior.env import env ->>> env.config = config ->>> env.app_globals = AppGlobals(config) - -This is automated in non-test environments by importing -`lakesuperior.env_setup`. -''' -class Env: - pass - -env = Env() -#env = threading.local() diff --git a/lakesuperior/env_setup.py b/lakesuperior/env_setup.py index b80e32f..f630a31 100644 --- a/lakesuperior/env_setup.py +++ b/lakesuperior/env_setup.py @@ -1,16 +1,15 @@ +from lakesuperior import env from lakesuperior.config_parser import config from lakesuperior.globals import AppGlobals -from lakesuperior.env import env __doc__=""" Default configuration. Import this module to initialize the configuration for a production setup:: - >>>from lakesuperior import env_setup + >>> import lakesuperior.env_setup Will load the default configuration. """ -env.config = config env.app_globals = AppGlobals(config) diff --git a/lakesuperior/etc.defaults/application.yml b/lakesuperior/etc.defaults/application.yml index 5ccb46f..910d9f3 100644 --- a/lakesuperior/etc.defaults/application.yml +++ b/lakesuperior/etc.defaults/application.yml @@ -4,6 +4,21 @@ # settings. Individual items can be selectively overridden as long as the YAML # hierarchical structure is kept. +# Set app_mode to either 'prod', 'test' or 'dev'. +# 'prod' is normal running mode. 'test' is used for running test suites. +# 'dev' is similar to normal mode but with reload and debug enabled. +app_mode: 'prod' + +# Base data directory. This contains both volatile files such as PID files, +# and persistent ones, such as resource data. LDP-NRs will be stored under +# /ldpnr_store and LDP-RSs under /ldprs_store. +# +# If different data files need to be running on different storage hardware, +# the individual subdirectories can be mounted on different file systems. +# +# If unset, it will default to /data. +data_dir: + # Configuration for binary path and fixity check generation. The hash is a # checksumn of the contents of the file. uuid: @@ -18,9 +33,6 @@ store: # The semantic store used for persisting LDP-RS (RDF Source) resources. # MUST support SPARQL 1.1 query and update. ldp_rs: - # Directory where the RDF data files are stored. - location: data/ldprs_store - # store layout. At the moment, only `rsrc_centric_layout`is supported. layout: rsrc_centric_layout @@ -47,9 +59,6 @@ store: # See store.ldp_rs.layout. layout: default_layout - # The filesystem path to the root of the binary store. - path: data/ldpnr_store - # How to split the balanced pairtree to generate a path. The hash # string is defined by the uuid.algo parameter value. # This parameter defines how many characters are in each branch. 2-4 is diff --git a/lakesuperior/etc.defaults/gunicorn.yml b/lakesuperior/etc.defaults/gunicorn.yml index 68467c8..5471dc8 100644 --- a/lakesuperior/etc.defaults/gunicorn.yml +++ b/lakesuperior/etc.defaults/gunicorn.yml @@ -4,12 +4,8 @@ # Commented values are the application defaults. # Directory where the WSGI server data are stored. -data_dir: 'data' - -# Set app_mode to either 'prod', 'test' or 'dev'. -# 'prod' is normal running mode. 'test' is used for running test suites. -# 'dev' is similar to normal mode but with reload and debug enabled. -app_mode: 'dev' +# Relative paths are relative to the `data_dir` value in `application.yml`. +data_dir: . #listen_addr: '0.0.0.0' #listen_port: 8000 diff --git a/lakesuperior/etc.defaults/logging.yml b/lakesuperior/etc.defaults/logging.yml index de96460..3c240a3 100644 --- a/lakesuperior/etc.defaults/logging.yml +++ b/lakesuperior/etc.defaults/logging.yml @@ -13,8 +13,10 @@ formatters: handlers: logfile: class: logging.handlers.RotatingFileHandler - # Change this. - filename: /tmp/lakesuperior.log + # Relative paths are relative to the `data_dir` value in `application.yml`. + # You can change this value to an absolute path or leave it alone and + # symlink the location to a different directory. + filename: log/lakesuperior.log maxBytes: 10485760 backupCount: 3 formatter: default_fmt diff --git a/lakesuperior/globals.py b/lakesuperior/globals.py index 70643d4..49943e1 100644 --- a/lakesuperior/globals.py +++ b/lakesuperior/globals.py @@ -27,7 +27,7 @@ class AppGlobals: The variables are set on initialization by passing a configuration dict. Usually this is done when starting an application. The instance with the - loaded variables is then assigned to the :data:`lakesuperior.env.env` + loaded variables is then assigned to the :data:`lakesuperior.env` global variable. You can either load the default configuration:: @@ -36,20 +36,19 @@ class AppGlobals: Or set up an environment with a custom configuration:: - >>>from lakesuperior.env import env - >>>from lakesuperior.app_globals import AppGlobals - >>>my_config = {'name': 'value', '...': '...'} - >>>env.config = my_config - >>>env.app_globals = AppGlobals(my_config) + >>> from lakesuperior import env + >>> from lakesuperior.app_globals import AppGlobals + >>> my_config = {'name': 'value', '...': '...'} + >>> env.app_globals = AppGlobals(my_config) """ - def __init__(self, conf): + def __init__(self, config): """ Generate global variables from configuration. """ from lakesuperior.messaging.messenger import Messenger - app_conf = conf['application'] + app_conf = config['application'] # Initialize RDF layout. rdfly_mod_name = app_conf['store']['ldp_rs']['layout'] @@ -69,11 +68,26 @@ def __init__(self, conf): self._messenger = Messenger(app_conf['messaging']) # Exposed globals. + self._config = config self._rdfly = rdfly_cls(app_conf['store']['ldp_rs']) self._nonrdfly = nonrdfly_cls(app_conf['store']['ldp_nr']) self._changelog = deque() + @property + def config(self): + """ + Global configuration. + + This is a collection of all configuration options **except** for the + WSGI configuration which is initialized at a different time and is + stored under :data:`lakesuperior.env.wsgi_options`. + + *TODO:* Update class reference when interface will be separated from + implementation. + """ + return self._config + @property def rdfly(self): """ diff --git a/lakesuperior/lsup_admin.py b/lakesuperior/lsup_admin.py index 99ab9ec..d0c4dd7 100644 --- a/lakesuperior/lsup_admin.py +++ b/lakesuperior/lsup_admin.py @@ -1,15 +1,30 @@ import click import click_log +import csv import json import logging -import os import sys +from os import getcwd, path + +import arrow + +from lakesuperior import env from lakesuperior.api import admin as admin_api from lakesuperior.config_parser import config -from lakesuperior.env import env +from lakesuperior.globals import AppGlobals from lakesuperior.store.ldp_rs.lmdb_store import TxnManager +__doc__=""" +Utility to perform core maintenance tasks via console command-line. + +The command-line tool is self-documented. Type:: + + lsup-admin --help + +for a list of tools and options. +""" + logger = logging.getLogger(__name__) click_log.basic_config(logger) @@ -53,7 +68,8 @@ def bootstrap(): click.echo('Initializing binary store at {}'.format(nonrdfly.root)) nonrdfly.bootstrap() click.echo('Binary store initialized.') - click.echo('Repository successfully set up. Go to town.') + click.echo('\nRepository successfully set up. Go to town.') + click.echo('If the HTTP server is running, it must be restarted.') @click.command() @@ -88,8 +104,11 @@ def check_fixity(uid): '--config-folder', '-c', default=None, help='Alternative configuration ' 'folder to look up. If not set, the location set in the environment or ' 'the default configuration is used.') +@click.option( + '--output', '-o', default=None, help='Output file. If not specified, a ' + 'timestamp-named file will be generated automatically.') @click.command() -def check_refint(config_folder=None): +def check_refint(config_folder=None, output=None): """ Check referential integrity. @@ -98,19 +117,41 @@ def check_refint(config_folder=None): resources. For repositories set up with the `referential_integrity` option (the default), this is a pre-condition for a consistent data set. - Note: this check is run regardless of whether the repository enforces + If inconsistencies are found, a report is generated in CSV format with the + following columns: `s`, `p`, `o` (respectively the terms of the + triple containing the dangling relationship) and `missing` which + indicates which term is the missing URI (currently always set to `o`). + + Note: this check can be run regardless of whether the repository enforces referential integrity. """ - check_results = admin_api.integrity_check(config_folder) + if config_folder: + env.app_globals = AppGlobals(parse_config(config_dir)) + else: + import lakesuperior.env_setup + + check_results = admin_api.integrity_check() + click.echo('Integrity check results:') if len(check_results): click.echo(click.style('Inconsistencies found!', fg='red', bold=True)) - click.echo('Missing object in the following triples:') - for trp in check_results: - click.echo(' '.join([str(t) for t in trp[0]])) + if not output: + output = path.join(getcwd(), 'refint_report-{}.csv'.format( + arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S'))) + elif not output.endswith('.csv'): + output += '.csv' + + with open(output, 'w', newline='') as fh: + writer = csv.writer(fh) + writer.writerow(('s', 'p', 'o', 'missing')) + for trp in check_results: + # ``o`` is always hardcoded for now. + writer.writerow([t.n3() for t in trp[0]] + ['o']) + + click.echo('Report generated at {}'.format(output)) else: click.echo(click.style('Clean. ', fg='green', bold=True) - + 'No inconsistency found.') + + 'No inconsistency found. No report generated.') @click.command() diff --git a/lakesuperior/migrator.py b/lakesuperior/migrator.py index 817de0a..c0a3c49 100644 --- a/lakesuperior/migrator.py +++ b/lakesuperior/migrator.py @@ -10,8 +10,9 @@ from rdflib import Graph, URIRef +from lakesuperior import env, basedir from lakesuperior.dictionaries.namespaces import ns_collection as nsc -from lakesuperior.env import env +from lakesuperior.exceptions import InvalidResourceError from lakesuperior.globals import AppGlobals, ROOT_UID from lakesuperior.config_parser import parse_config from lakesuperior.store.ldp_rs.lmdb_store import TxnManager @@ -28,8 +29,7 @@ def __init__(self, store): self.store = store def __enter__(self): - self.store.open( - env.config['application']['store']['ldp_rs']) + self.store.open(env.app_globals.rdfly.config) def __exit__(self, *exc): self.store.close() @@ -69,8 +69,8 @@ class Migrator: def __init__( - self, src, dest, zero_binaries=False, compact_uris=False, - skip_errors=False): + self, src, dest, clear=False, zero_binaries=False, + compact_uris=False, skip_errors=False): """ Set up base paths and clean up existing directories. @@ -82,8 +82,10 @@ def __init__( it must be a writable directory. It will be deleted and recreated. If it does not exist, it will be created along with its parents if missing. - :param str binary_handling: One of ``include``, ``truncate`` or - ``split``. + :param bool clear: Whether to clear any pre-existing data at the + locations indicated. + :param bool zero_binaries: Whether to create zero-byte binary files + rather than copy the sources. :param bool compact_uris: NOT IMPLEMENTED. Whether the process should attempt to compact URIs generated with broken up path segments. If the UID matches a pattern such as ``/12/34/56/123456...`` it is @@ -95,34 +97,36 @@ def __init__( """ # Set up repo folder structure and copy default configuration to # destination file. - cur_dir = path.dirname(path.dirname(path.abspath(__file__))) self.dbpath = '{}/data/ldprs_store'.format(dest) self.fpath = '{}/data/ldpnr_store'.format(dest) self.config_dir = '{}/etc'.format(dest) - shutil.rmtree(dest, ignore_errors=True) - shutil.copytree( - '{}/etc.defaults'.format(cur_dir), self.config_dir) + if clear: + shutil.rmtree(dest, ignore_errors=True) + if not path.isdir(self.config_dir): + shutil.copytree( + '{}/etc.defaults'.format(basedir), self.config_dir) # Modify and overwrite destination configuration. - orig_config, _ = parse_config(self.config_dir) + orig_config = parse_config(self.config_dir) orig_config['application']['store']['ldp_rs']['location'] = self.dbpath orig_config['application']['store']['ldp_nr']['path'] = self.fpath - with open('{}/application.yml'.format(self.config_dir), 'w') \ - as config_file: - config_file.write(yaml.dump(orig_config['application'])) + if clear: + with open('{}/application.yml'.format(self.config_dir), 'w') \ + as config_file: + config_file.write(yaml.dump(orig_config['application'])) - env.config = parse_config(self.config_dir)[0] - env.app_globals = AppGlobals(env.config) + env.app_globals = AppGlobals(parse_config(self.config_dir)) self.rdfly = env.app_globals.rdfly self.nonrdfly = env.app_globals.nonrdfly - with TxnManager(env.app_globals.rdf_store, write=True) as txn: - self.rdfly.bootstrap() - self.rdfly.store.close() - env.app_globals.nonrdfly.bootstrap() + if clear: + with TxnManager(env.app_globals.rdf_store, write=True) as txn: + self.rdfly.bootstrap() + self.rdfly.store.close() + env.app_globals.nonrdfly.bootstrap() self.src = src.rstrip('/') self.zero_binaries = zero_binaries @@ -155,7 +159,7 @@ def migrate(self, start_pts=None, list_file=None): 'Starting point {} does not begin with a slash.' .format(start)) - if start != ROOT_UID: + if not rsrc_api.exists(start): # Create the full hierarchy with link to the parents. rsrc_api.create_or_replace(start) # Then populate the new resource and crawl for more @@ -165,8 +169,11 @@ def migrate(self, start_pts=None, list_file=None): with open(list_file, 'r') as fp: for uri in fp: uid = uri.strip().replace(self.src, '') - if uid != ROOT_UID: - rsrc_api.create_or_replace(uid) + if not rsrc_api.exists(uid): + try: + rsrc_api.create_or_replace(uid) + except InvalidResourceError: + pass self._crawl(uid) logger.info('Dumped {} resources.'.format(self._ct)) @@ -189,12 +196,17 @@ def _crawl(self, uid): # Internal URI of destination. iuri = ibase + uid - rsp = requests.head(uri) - if not self.skip_errors: - rsp.raise_for_status() - elif rsp.status_code > 399: - print('Error retrieving resource {} headers: {} {}'.format( - uri, rsp.status_code, rsp.text)) + try: + rsp = requests.head(uri) + except: + logger.warn('Error retrieving resource {}'.format(uri)) + return + if rsp: + if not self.skip_errors: + rsp.raise_for_status() + elif rsp.status_code > 399: + print('Error retrieving resource {} headers: {} {}'.format( + uri, rsp.status_code, rsp.text)) # Determine LDP type. ldp_type = 'ldp_nr' @@ -218,12 +230,17 @@ def _crawl(self, uid): # links. get_uri = ( uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri)) - get_rsp = requests.get(get_uri) - if not self.skip_errors: - get_rsp.raise_for_status() - elif get_rsp.status_code > 399: - print('Error retrieving resource {} body: {} {}'.format( - uri, get_rsp.status_code, get_rsp.text)) + try: + get_rsp = requests.get(get_uri) + except: + logger.warn('Error retrieving resource {}'.format(get_uri)) + return + if get_rsp: + if not self.skip_errors: + get_rsp.raise_for_status() + elif get_rsp.status_code > 399: + print('Error retrieving resource {} body: {} {}'.format( + uri, get_rsp.status_code, get_rsp.text)) data = get_rsp.content.replace( self.src.encode('utf-8'), ibase.encode('utf-8')) diff --git a/lakesuperior/model/ldp_factory.py b/lakesuperior/model/ldp_factory.py index 5e84353..bd0d896 100644 --- a/lakesuperior/model/ldp_factory.py +++ b/lakesuperior/model/ldp_factory.py @@ -7,11 +7,11 @@ from rdflib.resource import Resource from rdflib.namespace import RDF +from lakesuperior import env from lakesuperior.model.ldpr import Ldpr from lakesuperior.model.ldp_nr import LdpNr from lakesuperior.model.ldp_rs import LdpRs, Ldpc, LdpDc, LdpIc from lakesuperior.config_parser import config -from lakesuperior.env import env from lakesuperior.dictionaries.namespaces import ns_collection as nsc from lakesuperior.exceptions import ( IncompatibleLdpTypeError, InvalidResourceError, ResourceExistsError, diff --git a/lakesuperior/model/ldp_nr.py b/lakesuperior/model/ldp_nr.py index 52c5779..7294c3a 100644 --- a/lakesuperior/model/ldp_nr.py +++ b/lakesuperior/model/ldp_nr.py @@ -6,7 +6,7 @@ from rdflib.resource import Resource from rdflib.term import URIRef, Literal, Variable -from lakesuperior.env import env +from lakesuperior import env from lakesuperior.dictionaries.namespaces import ns_collection as nsc from lakesuperior.model.ldpr import Ldpr from lakesuperior.model.ldp_rs import LdpRs diff --git a/lakesuperior/model/ldp_rs.py b/lakesuperior/model/ldp_rs.py index 0ee42ec..50ee46b 100644 --- a/lakesuperior/model/ldp_rs.py +++ b/lakesuperior/model/ldp_rs.py @@ -2,7 +2,7 @@ from rdflib import Graph -from lakesuperior.env import env +from lakesuperior import env from lakesuperior.globals import RES_UPDATED from lakesuperior.dictionaries.namespaces import ns_collection as nsc from lakesuperior.model.ldpr import Ldpr diff --git a/lakesuperior/model/ldpr.py b/lakesuperior/model/ldpr.py index 49d5d44..305c956 100644 --- a/lakesuperior/model/ldpr.py +++ b/lakesuperior/model/ldpr.py @@ -10,7 +10,7 @@ from rdflib import Graph, URIRef, Literal from rdflib.namespace import RDF -from lakesuperior.env import env +from lakesuperior import env, thread_env from lakesuperior.globals import ( RES_CREATED, RES_DELETED, RES_UPDATED, ROOT_UID) from lakesuperior.dictionaries.namespaces import ns_collection as nsc @@ -411,7 +411,7 @@ def bury_rsrc(self, inbound, tstone_pointer=None): else: add_trp = { (self.uri, RDF.type, nsc['fcsystem'].Tombstone), - (self.uri, nsc['fcrepo'].created, env.timestamp_term), + (self.uri, nsc['fcrepo'].created, thread_env.timestamp_term), } self.modify(RES_DELETED, remove_trp, add_trp) @@ -432,7 +432,7 @@ def forget_rsrc(self, inbound=True): Remove all traces of a resource and versions. """ logger.info('Purging resource {}'.format(self.uid)) - refint = env.config['store']['ldp_rs']['referential_integrity'] + refint = rdfly.config['referential_integrity'] inbound = True if refint else inbound rdfly.forget_rsrc(self.uid, inbound) @@ -637,8 +637,8 @@ def sparql_delta(self, q): :rtype: tuple(rdflib.Graph) :return: Remove and add graphs. These can be used - with ``BaseStoreLayout.update_resource`` and/or recorded as separate - events in a provenance tracking system. + with ``BaseStoreLayout.update_resource`` and/or recorded as separate + events in a provenance tracking system. """ logger.debug('Provided SPARQL query: {}'.format(q)) pre_gr = self.imr @@ -683,7 +683,7 @@ def modify( method. :param ev_type: The type of event (create, update, - delete) or None. In the latter case, no notification is sent. + delete) or None. In the latter case, no notification is sent. :type ev_type: str or None :param set remove_trp: Triples to be removed. :param set add_trp: Triples to be added. @@ -692,7 +692,7 @@ def modify( if ( ev_type is not None and - env.config['application'].get('messaging')): + env.app_globals.config['application'].get('messaging')): logger.debug('Enqueuing message for {}'.format(self.uid)) self._enqueue_msg(ev_type, remove_trp, add_trp) @@ -720,7 +720,7 @@ def _enqueue_msg(self, ev_type, remove_trp=None, add_trp=None): env.app_globals.changelog.append((set(remove_trp), set(add_trp), { 'ev_type': ev_type, - 'timestamp': env.timestamp.format(), + 'timestamp': thread_env.timestamp.format(), 'rsrc_type': rsrc_type, 'actor': actor, })) @@ -769,7 +769,7 @@ def _add_srv_mgd_triples(self, create=False): # Create and modify timestamp. if create: self.provided_imr.set(( - self.uri, nsc['fcrepo'].created, env.timestamp_term)) + self.uri, nsc['fcrepo'].created, thread_env.timestamp_term)) self.provided_imr.set(( self.uri, nsc['fcrepo'].createdBy, self.DEFAULT_USER)) else: @@ -781,12 +781,12 @@ def _add_srv_mgd_triples(self, create=False): self.uri, nsc['fcrepo'].createdBy))) self.provided_imr.set(( - self.uri, nsc['fcrepo'].lastModified, env.timestamp_term)) + self.uri, nsc['fcrepo'].lastModified, thread_env.timestamp_term)) self.provided_imr.set(( self.uri, nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)) - def _containment_rel(self, create): + def _containment_rel(self, create, ignore_type=True): """Find the closest parent in the path indicated by the uid and establish a containment triple. @@ -805,6 +805,11 @@ def _containment_rel(self, create): :param bool create: Whether the resource is being created. If false, the parent container is not updated. + "param bool ignore_type: If False (the default), an exception is raised + if trying to create a resource under a non-container. This can be + overridden in special cases (e.g. when migrating a repository in which + a LDP-NR has "children" under ``fcr:versions``) by setting this to + True. """ from lakesuperior.model.ldp_factory import LdpFactory @@ -814,7 +819,9 @@ def _containment_rel(self, create): cnd_parent_uid = '/' + '/'.join(path_components[:-1]) if rdfly.ask_rsrc_exists(cnd_parent_uid): parent_rsrc = LdpFactory.from_stored(cnd_parent_uid) - if nsc['ldp'].Container not in parent_rsrc.types: + if ( + not ignore_type + and nsc['ldp'].Container not in parent_rsrc.types): raise InvalidResourceError( cnd_parent_uid, 'Parent {} is not a container.') diff --git a/lakesuperior/profiler.py b/lakesuperior/profiler.py index 574aa13..ad4cf91 100644 --- a/lakesuperior/profiler.py +++ b/lakesuperior/profiler.py @@ -5,9 +5,9 @@ # Environment must be set before importing the app factory function. import lakesuperior.env_setup +from lakesuperior import env from lakesuperior.config_parser import config from lakesuperior.globals import AppGlobals -from lakesuperior.env import env options = { 'restrictions': [30], diff --git a/lakesuperior/server.py b/lakesuperior/server.py index b1fbb19..98a93a8 100644 --- a/lakesuperior/server.py +++ b/lakesuperior/server.py @@ -4,21 +4,20 @@ # Environment must be set before importing the app factory function. import lakesuperior.env_setup +from lakesuperior import env from lakesuperior.config_parser import config from lakesuperior.globals import AppGlobals -from lakesuperior.env import env from lakesuperior.app import create_app -dictConfig(env.config['logging']) +dictConfig(env.app_globals.config['logging']) logger = logging.getLogger(__name__) logger.info('Graph store location: {}'.format( - env.config['application']['store']['ldp_rs']['location'])) -logger.info('Binary store location: {}'.format( - env.config['application']['store']['ldp_nr']['path'])) + env.app_globals.rdfly.config['location'])) +logger.info('Binary store location: {}'.format(env.app_globals.nonrdfly.root)) -fcrepo = create_app(env.config['application']) +fcrepo = create_app(env.app_globals.config['application']) if __name__ == "__main__": fcrepo.run(host='0.0.0.0') diff --git a/lakesuperior/store/ldp_nr/base_non_rdf_layout.py b/lakesuperior/store/ldp_nr/base_non_rdf_layout.py index bf0e3ca..4cf89be 100644 --- a/lakesuperior/store/ldp_nr/base_non_rdf_layout.py +++ b/lakesuperior/store/ldp_nr/base_non_rdf_layout.py @@ -20,7 +20,7 @@ def __init__(self, config): Initialize the base non-RDF store layout. """ self.config = config - self.root = config['path'] + self.root = config['location'] ## INTERFACE METHODS ## diff --git a/lakesuperior/store/ldp_rs/lmdb_store.py b/lakesuperior/store/ldp_rs/lmdb_store.py index 90a9e4e..db70c0e 100644 --- a/lakesuperior/store/ldp_rs/lmdb_store.py +++ b/lakesuperior/store/ldp_rs/lmdb_store.py @@ -14,6 +14,7 @@ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI from rdflib.store import Store, VALID_STORE, NO_STORE +from lakesuperior import env logger = logging.getLogger(__name__) @@ -473,7 +474,7 @@ def add(self, triple, context=None, quoted=False): pk_c = self._pickle(context) # Add new individual terms or gather keys for existing ones. - keys = [None, None, None, None] + keys = [None] * 4 with self.cur('th:t') as icur: for i, pk_t in enumerate((pk_s, pk_p, pk_o, pk_c)): thash = self._hash(pk_t) @@ -856,10 +857,20 @@ def _init_db_environments(self, create=True): else: return NO_STORE - self.data_env = lmdb.open(path + '/main', subdir=False, create=create, - map_size=self.MAP_SIZE, max_dbs=4, readahead=False) - self.idx_env = lmdb.open(path + '/index', subdir=False, create=create, - map_size=self.MAP_SIZE, max_dbs=6, readahead=False) + if getattr(env, 'wsgi_options', False): + self._workers = env.wsgi_options['workers'] + else: + self._workers = 1 + logger.info('Max LMDB readers: {}'.format(self._workers)) + + self.data_env = lmdb.open( + path + '/main', subdir=False, create=create, + map_size=self.MAP_SIZE, max_dbs=4, + max_spare_txns=self._workers, readahead=False) + self.idx_env = lmdb.open( + path + '/index', subdir=False, create=create, + map_size=self.MAP_SIZE, max_dbs=6, + max_spare_txns=self._workers, readahead=False) # Clear stale readers. data_stale_readers = self.data_env.reader_check() @@ -1003,7 +1014,7 @@ def _lookup(self, triple_pattern): yield from self._lookup_2bound({'s': s, 'o': o}) # s ? ? else: - yield from self._lookup_1bound('s', s) + yield from self._lookup_1bound('s:po', s) else: if p is not None: # ? p o @@ -1011,11 +1022,11 @@ def _lookup(self, triple_pattern): yield from self._lookup_2bound({'p': p, 'o': o}) # ? p ? else: - yield from self._lookup_1bound('p', p) + yield from self._lookup_1bound('p:so', p) else: # ? ? o if o is not None: - yield from self._lookup_1bound('o', o) + yield from self._lookup_1bound('o:sp', o) # ? ? ? else: # Get all triples in the database. @@ -1023,21 +1034,20 @@ def _lookup(self, triple_pattern): yield from cur.iternext_nodup() - def _lookup_1bound(self, label, term): + def _lookup_1bound(self, idx_name, term): """ Lookup triples for a pattern with one bound term. - :param str label: Which term is being searched for. One of `s`, - `p`, or `o`. + :param str idx_name: The index to look up as one of the keys of + ``_lookup_ordering``. :param rdflib.URIRef term: Bound term to search for. - :rtype: iterator(bytes) + :rtype: Iterator(bytes) :return: SPO keys matching the pattern. """ k = self._to_key(term) if not k: return iter(()) - idx_name = '{}:{}'.format(label, 'spo'.replace(label, '')) term_order = self._lookup_ordering[idx_name] with self.cur(idx_name) as cur: if cur.set_key(k): @@ -1045,7 +1055,7 @@ def _lookup_1bound(self, label, term): subkeys = self._split_key(match) # Compose result. - out = [None, None, None] + out = [None] * 3 out[term_order[0]] = k out[term_order[1]] = subkeys[0] out[term_order[2]] = subkeys[1] diff --git a/lakesuperior/store/ldp_rs/rsrc_centric_layout.py b/lakesuperior/store/ldp_rs/rsrc_centric_layout.py index d719d66..9833e70 100644 --- a/lakesuperior/store/ldp_rs/rsrc_centric_layout.py +++ b/lakesuperior/store/ldp_rs/rsrc_centric_layout.py @@ -2,7 +2,9 @@ from collections import defaultdict from itertools import chain +from os import path from string import Template +from urllib.parse import urldefrag import arrow @@ -12,13 +14,13 @@ from rdflib.resource import Resource from rdflib.store import Store +from lakesuperior import basedir, env from lakesuperior.dictionaries.namespaces import ns_collection as nsc from lakesuperior.dictionaries.namespaces import ns_mgr as nsm from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \ srv_mgd_predicates, srv_mgd_types from lakesuperior.exceptions import (InvalidResourceError, ResourceNotExistsError, TombstoneError, PathSegmentError) -from lakesuperior.env import env from lakesuperior.store.ldp_rs.lmdb_store import TxnManager @@ -179,8 +181,10 @@ def bootstrap(self): logger.info('Initializing the graph store with system data.') store.open() + fname = path.join( + basedir, 'data', 'bootstrap', 'rsrc_centric_layout.sparql') with TxnManager(store, True): - with open('data/bootstrap/rsrc_centric_layout.sparql', 'r') as f: + with open(fname, 'r') as f: data = Template(f.read()) self.ds.update(data.substitute(timestamp=arrow.utcnow())) @@ -553,14 +557,20 @@ def find_refint_violations(self): :rtype: set :return: Triples referencing a repository URI that is not a resource. """ - for obj in self.store.all_terms('o'): + #import pdb; pdb.set_trace() + for i, obj in enumerate(self.store.all_terms('o'), start=1): if ( isinstance(obj, URIRef) - and str(obj).startswith(nsc['fcres']) - and not self.ask_rsrc_exists(self.uri_to_uid(obj))): - print('Object not found: {}'.format(obj)) + and obj.startswith(nsc['fcres']) + and not obj.endswith('fcr:fixity') + and not obj.endswith('fcr:versions') + and not self.ask_rsrc_exists(self.uri_to_uid( + urldefrag(obj).url))): + logger.warn('Object not found: {}'.format(obj)) for trp in self.store.triples((None, None, obj)): yield trp + if i % 100 == 0: + logger.info('{} terms processed.'.format(i)) ## PROTECTED MEMBERS ## diff --git a/lakesuperior/wsgi.py b/lakesuperior/wsgi.py index 57b4808..9a440dd 100644 --- a/lakesuperior/wsgi.py +++ b/lakesuperior/wsgi.py @@ -1,15 +1,15 @@ import multiprocessing import yaml -from os import environ, makedirs, path +from os import chdir, environ, makedirs, getcwd, path import gunicorn.app.base -from lakesuperior.server import fcrepo +from lakesuperior import env, env_setup from lakesuperior.config_parser import default_config_dir -config_file = '{}/gunicorn.yml'.format(default_config_dir) +config_file = path.join(default_config_dir, 'gunicorn.yml') with open(config_file, 'r') as fh: config = yaml.load(fh, yaml.SafeLoader) @@ -17,11 +17,14 @@ listen_addr = config.get('listen_addr', '0.0.0.0') listen_port = config.get('listen_port', 8000) preload_app = config.get('preload_app', True) -app_mode = config.get('app_mode', 'prod') +app_mode = env.app_globals.config['application'].get('app_mode', 'prod') +oldwd = getcwd() +chdir(env.app_globals.config['application']['data_dir']) data_dir = path.realpath(config.get('data_dir')) -run_dir = '{}/run'.format(data_dir) -log_dir = '{}/log'.format(data_dir) +chdir(oldwd) +run_dir = path.join(data_dir, 'run') +log_dir = path.join(data_dir, 'log') makedirs(log_dir, exist_ok=True) makedirs(run_dir, exist_ok=True) @@ -43,10 +46,11 @@ def default_workers(): 'daemon': app_mode=='prod', 'reload': app_mode=='dev' and not preload_app, - 'pidfile': '{}/fcrepo.pid'.format(run_dir), - 'accesslog': '{}/gunicorn-access.log'.format(log_dir), - 'errorlog': '{}/gunicorn-error.log'.format(log_dir), + 'pidfile': path.join(run_dir, 'fcrepo.pid'), + 'accesslog': path.join(log_dir, 'gunicorn-access.log'), + 'errorlog': path.join(log_dir, 'gunicorn-error.log'), } +env.wsgi_options = options class WsgiApp(gunicorn.app.base.BaseApplication): @@ -64,6 +68,7 @@ def load(self): def run(): + from lakesuperior.server import fcrepo WsgiApp(fcrepo, options).run() diff --git a/setup.py b/setup.py index ce23bbf..aa45989 100644 --- a/setup.py +++ b/setup.py @@ -25,9 +25,14 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() +# Read release number. +with open(path.realpath(path.join(here, 'VERSION'))) as fh: + version = fh.readlines()[0] + + setup( name='lakesuperior', - version='1.0.0a12', + version=version, description='A Linked Data Platform repository sever.', long_description=long_description, @@ -81,6 +86,7 @@ 'HiYaPyCo', 'PyYAML', 'arrow', + 'cchardet', 'click', 'click-log', 'gevent', @@ -105,9 +111,7 @@ #extras_require={}, #package_data={ #}, - data_files=[ - ('data/bootstrap', glob('data/bootstrap/*')), - ], + #data_files=[], entry_points={ 'console_scripts': [ diff --git a/tests/endpoints/test_ldp.py b/tests/endpoints/test_ldp.py index 2ff72d6..e1d4ad0 100644 --- a/tests/endpoints/test_ldp.py +++ b/tests/endpoints/test_ldp.py @@ -298,6 +298,26 @@ def test_post_409(self, rnd_img): assert self.client.post('/ldp/post_409').status_code == 409 + def test_patch_root(self): + ''' + Test patching root node. + ''' + path = '/ldp/' + self.client.get(path) + uri = g.webroot + '/' + + with open('tests/data/sparql_update/simple_insert.sparql') as data: + resp = self.client.patch(path, + data=data, + headers={'content-type' : 'application/sparql-update'}) + + assert resp.status_code == 204 + + resp = self.client.get(path) + gr = Graph().parse(data=resp.data, format='text/turtle') + assert gr[ URIRef(uri) : nsc['dc'].title : Literal('Hello') ] + + def test_patch(self): ''' Test patching a resource.