Skip to content

Commit

Permalink
Merge af6df1e into e853c95
Browse files Browse the repository at this point in the history
  • Loading branch information
mwatts15 committed Jul 17, 2019
2 parents e853c95 + af6df1e commit 5cde8db
Show file tree
Hide file tree
Showing 14 changed files with 1,049 additions and 54 deletions.
66 changes: 42 additions & 24 deletions PyOpenWorm/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def retrieve(self, source, archive='data.tar', archive_type=None):
archive_type : str
The type of the archive to create.
'''
from PyOpenWorm.datasource import DataSource
sid = self._pow_command._den3(source)
if not archive_type:
for ext in EXT_TO_ARCHIVE_FMT:
Expand All @@ -80,7 +81,9 @@ def retrieve(self, source, archive='data.tar', archive_type=None):
archive_type = 'tar'

try:
dd = self._pow_command._dsd[sid]
sources = self._pow_command._data_ctx.stored(DataSource)(ident=sid).load()
for data_source in sources:
dd = self._pow_command._dsd[data_source]
except KeyError:
raise GenericUserError('Could not find data for {} ({})'.format(sid, source))

Expand Down Expand Up @@ -1141,11 +1144,11 @@ def _load_data_source_directories(self):
# to not worry about checking if they have loaded something before.

# XXX persist the dict
lclasses = [POWDirDataSourceDirLoader]
lclasses = [POWDirDataSourceDirLoader()]
dsd = _DSD(dict(), pth_join(self.powdir, 'data_source_data'), lclasses)
dindex = open(pth_join(self.powdir, 'data_source_directories'))
for ds_id, dname in (x.strip().split(' ', 1) for x in dindex):
dsd[ds_id] = dname
dsd.put(ds_id, dname)
self._data_source_directories = dsd

def _stage_translation_directory(self, source_directory, target_directory):
Expand Down Expand Up @@ -1577,43 +1580,48 @@ def __str__(self):


class _DSD(object):
def __init__(self, ds_dict, base_directory, loader_classes):
def __init__(self, ds_dict, base_directory, loaders):
self._dsdict = ds_dict
self._base_directory = base_directory
self._lclasses = loader_classes
self._loaders = self._init_loaders(loaders)

def __getitem__(self, dsid):
dsid = str(dsid)
def __getitem__(self, data_source):
dsid = str(data_source.identifier)
try:
return self._dsdict[dsid]
except KeyError:
res = self._load_data_source(dsid)
res = self._load_data_source(data_source)
if res:
self._dsdict[dsid] = res
return res
raise

def _loaders(self):
for cls in self._lclasses:
nd = pth_join(self._base_directory, cls.dirkey)
def put(self, data_source_ident, directory):
self._dsdict[str(data_source_ident)] = directory

def _init_loaders(self, loaders):
res = []
for loader in loaders:
nd = pth_join(self._base_directory, loader.dirkey)
if not exists(nd):
makedirs(nd)
yield cls(nd)
loader.base_directory = nd
res.append(loader)
return res

def _load_data_source(self, dsid):
for loader in self._loaders():
if loader.can_load(dsid):
return loader(dsid)
def _load_data_source(self, data_source):
for loader in self._loaders:
if loader.can_load(data_source):
return loader(data_source)


class DataSourceDirectoryProvider(FilePathProvider):
def __init__(self, dsd):
self._dsd = dsd

def __call__(self, ob):
ident = ob.identifier
try:
path = self._dsd[ident]
path = self._dsd[ob]
except KeyError:
return None

Expand All @@ -1629,13 +1637,23 @@ def file_path(self):


class POWDirDataSourceDirLoader(DataSourceDirLoader):
def __init__(self, basedir):
super(POWDirDataSourceDirLoader, self).__init__(basedir)
self._idx_fname = pth_join(self._basedir, 'index')
def __init__(self, basedir=None):
super(POWDirDataSourceDirLoader, self).__init__()
self._index = dict()
self.base_directory = basedir

@property
def base_directory(self):
return self._base_directory

@base_directory.setter
def base_directory(self, val):
if val:
self._base_directory = val
self._idx_fname = pth_join(val, 'index')

def _load_index(self):
with scandir(self._basedir) as dirents:
with scandir(self._base_directory) as dirents:
dentdict = {de.name: de for de in dirents}
with open(self._idx_fname) as f:
for l in f:
Expand All @@ -1649,12 +1667,12 @@ def _load_index(self):
def _index_dir_entry_is_bad(self, dname, de):
if not de:
msg = "There is no directory entry for {} in {}"
L.warn(msg.format(dname, self._basedir), exc_info=True)
L.warn(msg.format(dname, self._base_directory), exc_info=True)
return True

if not de.is_dir():
msg = "The directory entry for {} in {} is not a directory"
L.warn(msg.format(dname, self._basedir))
L.warn(msg.format(dname, self._base_directory))
return True

return False
Expand Down
3 changes: 2 additions & 1 deletion PyOpenWorm/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,10 @@ def rdf_graph(self):
class ClassContextMeta(ContextMeta):

def __call__(self, ident):
res = ClassContexts.get(ident)
res = ClassContexts.get(URIRef(ident))
if not res:
res = super(ClassContextMeta, self).__call__(ident=ident)
ClassContexts[URIRef(ident)] = res
return res


Expand Down
16 changes: 16 additions & 0 deletions PyOpenWorm/context_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def __init__(self, context=None, include_stored=False, **kwargs):
super(ContextStore, self).__init__(**kwargs)
self._memory_store = None
self._include_stored = include_stored
self._namespaces = dict()
self._namespaces_r = dict()
if context is not None:
self._init_store(context)

Expand Down Expand Up @@ -129,6 +131,20 @@ def contexts(self, triple=None):
seen.add(ctx)
yield ctx

def namespace(self, prefix):
return self._namespaces.get(prefix)

def prefix(self, uri):
return self._namespaces_r.get(uri)

def bind(self, prefix, namespace):
self._namespaces[prefix] = namespace
self._namespaces_r[namespace] = prefix

def namespaces(self):
for x in self._namespaces.items():
yield x


class RDFContextStore(Store):
# Returns triples imported by the given context
Expand Down
39 changes: 22 additions & 17 deletions PyOpenWorm/datasource_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,23 @@ def dirkey(self):


class DataSourceDirLoader(six.with_metaclass(DataSourceDirLoaderMeta, object)):
'''
Loads a data files for a DataSource
def __init__(self, base_directory):
self._basedir = realpath(base_directory)
The loader is expected to organize files for each data source within the given
base directory.
'''
def __init__(self):
self.base_directory = None

def __call__(self, ident):
def __call__(self, data_source):
'''
Load the data source
Parameters
----------
ident : str
The identifier of the data source to load data for
data_source : PyOpenWorm.datasource.DataSource
The data source to load data for
Returns
-------
Expand All @@ -44,45 +49,45 @@ def __call__(self, ident):
'''
# Call str(·) to give a more uniform interface to the sub-class ``load``
# Conventionally, types that tag or "enhance" a string have the base string representation as their __str__
s = self.load(str(ident))
s = self.load(data_source)
if not s:
raise LoadFailed(ident, self, 'Loader returned an empty string')
raise LoadFailed(data_source, self, 'Loader returned an empty string')

# N.B.: This logic is NOT intended as a security measure against directory traversal: it is only to make the
# interface both flexible and unambiguous for implementers

# Relative paths are allowed
if not isabs(s):
s = pth_join(self._basedir, s)
s = pth_join(self.base_directory, s)

# Make sure the loader isn't doing some nonsense with symlinks or non-portable paths
rpath = realpath(s)
if not rpath.startswith(self._basedir):
msg = 'Loader returned a file path outside of the base directory, {}'.format(self._basedir)
raise LoadFailed(ident, self, msg)
if not rpath.startswith(self.base_directory):
msg = 'Loader returned a file path outside of the base directory, {}'.format(self.base_directory)
raise LoadFailed(data_source, self, msg)

if not exists(rpath):
msg = 'Loader returned a non-existant file {}'.format(rpath)
raise LoadFailed(ident, self, msg)
raise LoadFailed(data_source, self, msg)

if not isdir(rpath):
msg = 'Loader did not return a directory, but returned {}'.format(rpath)
raise LoadFailed(ident, self, msg)
raise LoadFailed(data_source, self, msg)

return rpath

def load(self, ident):
def load(self, data_source):
raise NotImplementedError()

def can_load(self, ident):
def can_load(self, data_source):
return False

def __str__(self):
return FCN(type(self)) + '()'


class LoadFailed(Exception):
def __init__(self, ident, loader, *args):
def __init__(self, data_source, loader, *args):
msg = args[0]
mmsg = 'Failed to load {} data with loader {}{}'.format(ident, loader, ': ' + msg if msg else '')
mmsg = 'Failed to load {} data with loader {}{}'.format(data_source, loader, ': ' + msg if msg else '')
super(LoadFailed, self).__init__(mmsg, *args[1:])

0 comments on commit 5cde8db

Please sign in to comment.