Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RF+BF: minor tune ups and a bugfix triggered by tests on systems with symlinks in TMPDIR path #3

Merged
merged 2 commits into from
Aug 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions datalad/distribution/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import git
import os
from os.path import abspath, join as opj, normpath, exists
from os.path import realpath
from six import string_types, PY2
from functools import wraps

Expand Down Expand Up @@ -293,7 +294,7 @@ def create_subdataset(self, path,
# get absolute path (considering explicit vs relative):
path = resolve_path(path, self)
from .install import _with_sep
if not path.startswith(_with_sep(self.path)):
if not realpath(path).startswith(_with_sep(realpath(self.path))): # realpath OK
raise ValueError("path %s outside dataset %s" % (path, self))

subds = Dataset(path)
Expand Down Expand Up @@ -325,9 +326,11 @@ def create_subdataset(self, path,
# or call install to add the thing inplace
from .install import _install_subds_inplace
from os.path import relpath
return _install_subds_inplace(ds=self, path=subds.path,
relativepath=relpath(subds.path, self.path),
name=name)
return _install_subds_inplace(
ds=self, path=subds.path,
relativepath=relpath(realpath(subds.path), realpath(self.path)), # realpath OK
name=name
)

# def get_file_handles(self, pattern=None, fulfilled=None):
# """Get paths to all known file_handles, optionally matching a specific
Expand Down
2 changes: 1 addition & 1 deletion datalad/distribution/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import logging
import os
from os.path import join as opj, abspath, relpath, pardir, isabs, isdir, \
exists, islink, sep, realpath
exists, islink, sep

from six.moves.urllib.parse import quote as urlquote

Expand Down
17 changes: 12 additions & 5 deletions datalad/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Metadata"""
"""Metadata handling (parsing, storing, querying)"""


import os
Expand Down Expand Up @@ -78,16 +78,23 @@ def get_dataset_identifier(ds):
return dsid


def _get_base_metadata(ds_identifier):
"""Return base metadata as dict for a given ds_identifier
"""

return {
"@context": "http://schema.org/",
"@id": ds_identifier,
}


def _get_implicit_metadata(ds, ds_identifier):
"""Convert git/git-annex info into metadata

Anything that doesn't come as metadata in dataset **content**, but is
encoded in the dataset repository itself.
"""
meta = {
"@context": "http://schema.org/",
"@id": ds_identifier,
}
meta = _get_base_metadata(ds_identifier)

# whenever we have a full dataset, give it a type
if ds.is_installed():
Expand Down
4 changes: 4 additions & 0 deletions datalad/metadata/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@

from . import bids
from . import frictionless_datapackage

# TODO: consider bringing common functionality together via a class hierarchy
# something along the
# BaseMetaParser -> JSONMetaParser -> {BIDS, Frictionless_DataPackage}?
20 changes: 13 additions & 7 deletions datalad/metadata/parsers/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@
from os.path import exists, join as opj
from simplejson import load as jsonload
from .. import get_dataset_identifier
from .. import _get_base_metadata

# XXX Could become a class attribute
_metadata_fname = 'dataset_description.json'


# XXX Could become a class method
def has_metadata(ds):
return exists(opj(ds.path, 'dataset_description.json'))
return exists(opj(ds.path, _metadata_fname))


def get_metadata(ds, identifier):
# XXX Could become a class method
# XXX consider RFing into get_metadata(ds) and then centrally updating base_metadata
# with returned meta, or do we foresee some meta without that base?
def get_metadata(ds, ds_identifier):
"""Extract metadata from BIDS datasets.

Parameters
Expand All @@ -32,12 +41,9 @@ def get_metadata(ds, identifier):
if not has_metadata(ds):
raise ValueError("no BIDS metadata found at {}".format(ds.path))

bids = jsonload(open(opj(ds.path, 'dataset_description.json')))
bids = jsonload(open(opj(ds.path, _metadata_fname)))

meta = {
"@context": "http://schema.org/",
"@id": identifier,
}
meta = _get_base_metadata(ds_identifier)

# TODO maybe normalize labels of standard licenses to definition URIs
# perform mapping
Expand Down
14 changes: 5 additions & 9 deletions datalad/metadata/parsers/frictionless_datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from os.path import exists, join as opj
from simplejson import load as jsonload
from .. import _get_base_metadata

_metadata_fname = 'datapackage.json'

Expand Down Expand Up @@ -40,15 +41,13 @@ def _compact_author(obj):

def _compact_license(obj):
if isinstance(obj, dict):
if 'url' in obj:
return obj['url']
else:
return obj['type']
# With obj itself if no url or type
return obj.get('url', obj.get('type', obj))
else:
return obj


def get_metadata(ds, identifier):
def get_metadata(ds, ds_identifier):
"""Extract metadata from a frictionless data package.

Parameters
Expand All @@ -67,10 +66,7 @@ def get_metadata(ds, identifier):

foreign = jsonload(open(opj(ds.path, _metadata_fname)))

meta = {
"@context": "http://schema.org/",
"@id": identifier,
}
meta = _get_base_metadata(ds_identifier)

for term in (
'name', 'title', 'description', 'keywords', 'version',
Expand Down
6 changes: 4 additions & 2 deletions datalad/support/annexrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,8 @@ def file_has_content(self, files, allow_quick=True, batch=False):
for f in files:
filepath = opj(self.path, f)
if islink(filepath): # if symlink
target_path = realpath(filepath) # find abspath of node pointed to by symlink
# find abspath of node pointed to by symlink
target_path = realpath(filepath) # realpath OK
# TODO: checks for being not outside of this repository
out.append(exists(target_path) and '.git/annex/objects' in target_path)
else:
Expand Down Expand Up @@ -645,7 +646,8 @@ def is_under_annex(self, files, allow_quick=True, batch=False):
filepath = opj(self.path, f)
# todo checks for being not outside of this repository
out.append(
islink(filepath) and '.git/annex/objects' in realpath(filepath)
islink(filepath)
and '.git/annex/objects' in realpath(filepath) # realpath OK
)
return out

Expand Down
6 changes: 3 additions & 3 deletions datalad/support/gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _normalize_path(base_dir, path):
if not path:
return path

base_dir = realpath(base_dir)
base_dir = realpath(base_dir) # realpath OK
# path = normpath(path)
# Note: disabled normpath, because it may break paths containing symlinks;
# But we don't want to realpath relative paths, in case cwd isn't the
Expand All @@ -152,7 +152,7 @@ def _normalize_path(base_dir, path):
# path might already be a symlink pointing to annex etc,
# so realpath only its directory, to get "inline" with
# realpath(base_dir) above
path = opj(realpath(dirname(path)), basename(path))
path = opj(realpath(dirname(path)), basename(path)) # realpath OK
# Executive decision was made to not do this kind of magic!
#
# elif commonprefix([realpath(getpwd()), base_dir]) == base_dir:
Expand All @@ -162,7 +162,7 @@ def _normalize_path(base_dir, path):
# BUT with relative curdir/pardir start it would assume relative to curdir
#
elif path.startswith(_curdirsep) or path.startswith(_pardirsep):
path = normpath(opj(realpath(getpwd()), path))
path = normpath(opj(realpath(getpwd()), path)) # realpath OK
else:
# We were called from outside the repo. Therefore relative paths
# are interpreted as being relative to self.path already.
Expand Down
3 changes: 2 additions & 1 deletion datalad/support/vcr_.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def externals_use_cassette(name):
but want to minimize their network traffic by using vcr.py
"""
from mock import patch
with patch.dict('os.environ', {'DATALAD_USECASSETTE': realpath(_get_cassette_path(name))}):
cassette_path = realpath(_get_cassette_path(name)) # realpath OK
with patch.dict('os.environ', {'DATALAD_USECASSETTE': cassette_path}):
yield